未验证 提交 de4310e6 编写于 作者: H huangjiyi 提交者: GitHub

[PHI decoupling] simplify "convert_utils.h" in fluid (#48168)

* rm dependence to "convert_utils.h" in some files

* fix bugs

* replace DataType2String with DataTypeToString

* replace framework::DataTypeSize with phi::SizeOf

* mv convert_function from fluid to phi and rm old map

* recommit with pre-commit

* repalce ProtoVarType with ProtoDataType and update comment.

* fix error about include "dnnl.hpp"

* revert add dep mkldnn to convert_utils in phi

* add mkldnn deps in convert_utils.h in phi

* move deps to convert_utils.h in phi
上级 df23c7c3
...@@ -111,14 +111,13 @@ void SerializeLodTensor(framework::Variable* var, ...@@ -111,14 +111,13 @@ void SerializeLodTensor(framework::Variable* var,
} }
// IO Buffer // IO Buffer
if (platform::is_cpu_place(tensor->place())) { if (platform::is_cpu_place(tensor->place())) {
auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype()); auto data_len = tensor->numel() * phi::SizeOf(tensor->dtype());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8); iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(tensor->data()), data_len); iobuf->append(reinterpret_cast<const char*>(tensor->data()), data_len);
} else { } else {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
char* temp_ptr = char* temp_ptr =
new char[tensor->numel() * new char[tensor->numel() * phi::SizeOf(tensor->dtype())]; // NOLINT
framework::DataTypeSize(tensor->dtype())]; // NOLINT
auto stream = reinterpret_cast<const phi::GPUContext&>(ctx).stream(); auto stream = reinterpret_cast<const phi::GPUContext&>(ctx).stream();
memory::Copy( memory::Copy(
platform::CPUPlace(), platform::CPUPlace(),
...@@ -128,7 +127,7 @@ void SerializeLodTensor(framework::Variable* var, ...@@ -128,7 +127,7 @@ void SerializeLodTensor(framework::Variable* var,
tensor->numel() * framework::SizeOfType( tensor->numel() * framework::SizeOfType(
framework::TransToProtoVarType(tensor->dtype())), framework::TransToProtoVarType(tensor->dtype())),
stream); stream);
auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype()); auto data_len = tensor->numel() * phi::SizeOf(tensor->dtype());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8); iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(temp_ptr), data_len); iobuf->append(reinterpret_cast<const char*>(temp_ptr), data_len);
delete[] temp_ptr; delete[] temp_ptr;
...@@ -159,14 +158,13 @@ void SerializeSelectedRows(framework::Variable* var, ...@@ -159,14 +158,13 @@ void SerializeSelectedRows(framework::Variable* var,
} }
// IO Buffer // IO Buffer
if (platform::is_cpu_place(tensor->place())) { if (platform::is_cpu_place(tensor->place())) {
auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype()); auto data_len = tensor->numel() * phi::SizeOf(tensor->dtype());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8); iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(tensor->data()), data_len); iobuf->append(reinterpret_cast<const char*>(tensor->data()), data_len);
} else { } else {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
char* temp_ptr = char* temp_ptr =
new char[tensor->numel() * new char[tensor->numel() * phi::SizeOf(tensor->dtype())]; // NOLINT
framework::DataTypeSize(tensor->dtype())]; // NOLINT
auto stream = reinterpret_cast<const phi::GPUContext&>(ctx).stream(); auto stream = reinterpret_cast<const phi::GPUContext&>(ctx).stream();
memory::Copy( memory::Copy(
platform::CPUPlace(), platform::CPUPlace(),
...@@ -176,7 +174,7 @@ void SerializeSelectedRows(framework::Variable* var, ...@@ -176,7 +174,7 @@ void SerializeSelectedRows(framework::Variable* var,
tensor->numel() * framework::SizeOfType( tensor->numel() * framework::SizeOfType(
framework::TransToProtoVarType(tensor->dtype())), framework::TransToProtoVarType(tensor->dtype())),
stream); stream);
auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype()); auto data_len = tensor->numel() * phi::SizeOf(tensor->dtype());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8); iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(temp_ptr), data_len); iobuf->append(reinterpret_cast<const char*>(temp_ptr), data_len);
delete[] temp_ptr; delete[] temp_ptr;
...@@ -259,16 +257,15 @@ void DeserializeLodTensor(framework::Variable* var, ...@@ -259,16 +257,15 @@ void DeserializeLodTensor(framework::Variable* var,
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
unsigned long data_len; // NOLINT unsigned long data_len; // NOLINT
char* temp_ptr = char* temp_ptr =
new char[tensor->numel() * new char[tensor->numel() * phi::SizeOf(tensor->dtype())]; // NOLINT
framework::DataTypeSize(tensor->dtype())]; // NOLINT io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT
io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT io_buffer_itr.copy_and_forward((void*)temp_ptr, data_len); // NOLINT
io_buffer_itr.copy_and_forward((void*)temp_ptr, data_len); // NOLINT
auto stream = reinterpret_cast<const phi::GPUContext&>(ctx).stream(); auto stream = reinterpret_cast<const phi::GPUContext&>(ctx).stream();
memory::Copy(place, memory::Copy(place,
tensor_data, tensor_data,
platform::CPUPlace(), platform::CPUPlace(),
(void*)temp_ptr, // NOLINT (void*)temp_ptr, // NOLINT
tensor->numel() * framework::DataTypeSize(tensor->dtype()), tensor->numel() * phi::SizeOf(tensor->dtype()),
stream); stream);
delete[] temp_ptr; delete[] temp_ptr;
#endif #endif
...@@ -303,17 +300,16 @@ void DeserializeSelectedRows( ...@@ -303,17 +300,16 @@ void DeserializeSelectedRows(
} else if (platform::is_gpu_place(place)) { } else if (platform::is_gpu_place(place)) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
char* temp_ptr = char* temp_ptr =
new char[tensor->numel() * new char[tensor->numel() * phi::SizeOf(tensor->dtype())]; // NOLINT
framework::DataTypeSize(tensor->dtype())]; // NOLINT unsigned long data_len; // NOLINT
unsigned long data_len; // NOLINT io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT
io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT
io_buffer_itr.copy_and_forward(temp_ptr, data_len); io_buffer_itr.copy_and_forward(temp_ptr, data_len);
auto stream = reinterpret_cast<const phi::GPUContext&>(ctx).stream(); auto stream = reinterpret_cast<const phi::GPUContext&>(ctx).stream();
memory::Copy(place, memory::Copy(place,
tensor_data, tensor_data,
platform::CPUPlace(), platform::CPUPlace(),
temp_ptr, temp_ptr,
tensor->numel() * framework::DataTypeSize(tensor->dtype()), tensor->numel() * phi::SizeOf(tensor->dtype()),
stream); stream);
delete[] temp_ptr; delete[] temp_ptr;
#endif #endif
......
...@@ -41,14 +41,14 @@ int GetMicroId(const platform::DeviceContext& ctx, ...@@ -41,14 +41,14 @@ int GetMicroId(const platform::DeviceContext& ctx,
} else { } else {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
std::vector<char> temp; std::vector<char> temp;
temp.resize(tensor->numel() * framework::DataTypeSize(tensor->dtype())); temp.resize(tensor->numel() * phi::SizeOf(tensor->dtype()));
char* temp_ptr = temp.data(); char* temp_ptr = temp.data();
auto stream = reinterpret_cast<const phi::GPUContext&>(ctx).stream(); auto stream = reinterpret_cast<const phi::GPUContext&>(ctx).stream();
memory::Copy(platform::CPUPlace(), memory::Copy(platform::CPUPlace(),
temp_ptr, temp_ptr,
tensor->place(), tensor->place(),
tensor->data(), tensor->data(),
tensor->numel() * framework::DataTypeSize(tensor->dtype()), tensor->numel() * phi::SizeOf(tensor->dtype()),
stream); stream);
float* temp_ptr_float = reinterpret_cast<float*>(temp_ptr); float* temp_ptr_float = reinterpret_cast<float*>(temp_ptr);
micro_id = static_cast<int>(temp_ptr_float[0]); micro_id = static_cast<int>(temp_ptr_float[0]);
......
...@@ -49,7 +49,7 @@ inline std::vector<paddle::experimental::Tensor> AmpAutoCasts( ...@@ -49,7 +49,7 @@ inline std::vector<paddle::experimental::Tensor> AmpAutoCasts(
std::string op_name) { std::string op_name) {
VLOG(6) << "AMP AmpAutoCasts:" VLOG(6) << "AMP AmpAutoCasts:"
<< " inputs(" << inputs_name << ") dst_dtype(" << " inputs(" << inputs_name << ") dst_dtype("
<< paddle::framework::DataType2String(dst_dtype) << ")."; << phi::DataTypeToString(dst_dtype) << ").";
std::vector<paddle::experimental::Tensor> inputs_casted; std::vector<paddle::experimental::Tensor> inputs_casted;
for (auto& input : inputs) { for (auto& input : inputs) {
if (NeedCast(input, dst_dtype)) { if (NeedCast(input, dst_dtype)) {
...@@ -72,7 +72,7 @@ inline paddle::experimental::Tensor AmpAutoCast( ...@@ -72,7 +72,7 @@ inline paddle::experimental::Tensor AmpAutoCast(
std::string op_name) { std::string op_name) {
VLOG(6) << "AMP AmpAutoCasts:" VLOG(6) << "AMP AmpAutoCasts:"
<< " input(" << input_name << ") dst_dtype(" << " input(" << input_name << ") dst_dtype("
<< paddle::framework::DataType2String(dst_dtype) << ")."; << phi::DataTypeToString(dst_dtype) << ").";
if (dst_dtype == paddle::experimental::DataType::FLOAT16) { if (dst_dtype == paddle::experimental::DataType::FLOAT16) {
if (op_name == "run_program") { if (op_name == "run_program") {
return input; return input;
......
...@@ -69,7 +69,7 @@ inline std::vector<paddle::experimental::Tensor> EagerAmpAutoCasts( ...@@ -69,7 +69,7 @@ inline std::vector<paddle::experimental::Tensor> EagerAmpAutoCasts(
bool trace_backward = true) { bool trace_backward = true) {
VLOG(6) << "AMP AmpAutoCasts:" VLOG(6) << "AMP AmpAutoCasts:"
<< " inputs(" << inputs_name << ") dst_dtype(" << " inputs(" << inputs_name << ") dst_dtype("
<< paddle::framework::DataType2String(dst_dtype) << ")."; << phi::DataTypeToString(dst_dtype) << ").";
std::vector<paddle::experimental::Tensor> inputs_casted; std::vector<paddle::experimental::Tensor> inputs_casted;
for (auto& input : inputs) { for (auto& input : inputs) {
if (NeedCast(input, dst_dtype)) { if (NeedCast(input, dst_dtype)) {
...@@ -89,7 +89,7 @@ inline paddle::experimental::Tensor EagerAmpAutoCast( ...@@ -89,7 +89,7 @@ inline paddle::experimental::Tensor EagerAmpAutoCast(
bool trace_backward = true) { bool trace_backward = true) {
VLOG(6) << "AMP AmpAutoCasts:" VLOG(6) << "AMP AmpAutoCasts:"
<< " input(" << egr::EagerUtils::TensorStr(input) << " to dst_dtype(" << " input(" << egr::EagerUtils::TensorStr(input) << " to dst_dtype("
<< paddle::framework::DataType2String(dst_dtype) << ")."; << phi::DataTypeToString(dst_dtype) << ").";
if (dst_dtype == paddle::experimental::DataType::FLOAT16) { if (dst_dtype == paddle::experimental::DataType::FLOAT16) {
if (op_name == "run_program") { if (op_name == "run_program") {
return input; return input;
......
...@@ -41,16 +41,16 @@ static void CheckTensor(const paddle::experimental::Tensor& pre, ...@@ -41,16 +41,16 @@ static void CheckTensor(const paddle::experimental::Tensor& pre,
"The tensor in before and after hook are not consistent")); "The tensor in before and after hook are not consistent"));
} }
if (pre.initialized() && post.initialized()) { if (pre.initialized() && post.initialized()) {
VLOG(7) << paddle::framework::DataType2String(pre.dtype()) << " " VLOG(7) << phi::DataTypeToString(pre.dtype()) << " "
<< paddle::framework::DataType2String(post.dtype()); << phi::DataTypeToString(post.dtype());
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
pre.dtype(), pre.dtype(),
post.dtype(), post.dtype(),
paddle::platform::errors::PermissionDenied( paddle::platform::errors::PermissionDenied(
"The dtype of tensor before(%s) and after(%s) hook are not " "The dtype of tensor before(%s) and after(%s) hook are not "
"consistent", "consistent",
paddle::framework::DataType2String(pre.dtype()), phi::DataTypeToString(pre.dtype()),
paddle::framework::DataType2String(post.dtype()))); phi::DataTypeToString(post.dtype())));
PADDLE_ENFORCE_EQ(pre.place(), PADDLE_ENFORCE_EQ(pre.place(),
post.place(), post.place(),
paddle::platform::errors::PermissionDenied( paddle::platform::errors::PermissionDenied(
......
...@@ -1242,23 +1242,7 @@ cc_test( ...@@ -1242,23 +1242,7 @@ cc_test(
SRCS phi_utils_test.cc SRCS phi_utils_test.cc
DEPS phi_utils) DEPS phi_utils)
if(WITH_GPU OR WITH_ROCM) cc_library(fluid_convert_utils DEPS data_type)
cc_library(
fluid_convert_utils
SRCS convert_utils.cc
DEPS data_type place gpu_info)
else()
cc_library(
fluid_convert_utils
SRCS convert_utils.cc
DEPS data_type place)
endif()
# every source file that includes "dnnl.h" must depends on mkldnn
# or, the first one should depends on mkldnn
if(WITH_MKLDNN)
add_dependencies(fluid_convert_utils mkldnn)
endif()
cc_test( cc_test(
convert_utils_test convert_utils_test
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/convert_utils.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/phi/common/pstring.h"
namespace paddle {
namespace framework {
paddle::experimental::DataType TransToPhiDataType(
const paddle::framework::proto::VarType::Type& dtype) {
// Set the order of case branches according to the frequency with
// the data type is used
switch (dtype) {
case paddle::framework::proto::VarType::FP32:
return DataType::FLOAT32;
case paddle::framework::proto::VarType::FP64:
return DataType::FLOAT64;
case paddle::framework::proto::VarType::INT64:
return DataType::INT64;
case paddle::framework::proto::VarType::INT32:
return DataType::INT32;
case paddle::framework::proto::VarType::INT8:
return DataType::INT8;
case paddle::framework::proto::VarType::UINT8:
return DataType::UINT8;
case paddle::framework::proto::VarType::INT16:
return DataType::INT16;
case paddle::framework::proto::VarType::COMPLEX64:
return DataType::COMPLEX64;
case paddle::framework::proto::VarType::COMPLEX128:
return DataType::COMPLEX128;
case paddle::framework::proto::VarType::FP16:
return DataType::FLOAT16;
case paddle::framework::proto::VarType::BF16:
return DataType::BFLOAT16;
case paddle::framework::proto::VarType::BOOL:
return DataType::BOOL;
case paddle::framework::proto::VarType::PSTRING:
return DataType::PSTRING;
default:
return DataType::UNDEFINED;
}
}
paddle::framework::proto::VarType::Type TransToProtoVarType(
const paddle::experimental::DataType& dtype) {
// Set the order of case branches according to the frequency with
// the data type is used
switch (dtype) {
case DataType::FLOAT32:
return paddle::framework::proto::VarType::FP32;
case DataType::FLOAT64:
return paddle::framework::proto::VarType::FP64;
case DataType::INT64:
return paddle::framework::proto::VarType::INT64;
case DataType::INT32:
return paddle::framework::proto::VarType::INT32;
case DataType::INT8:
return paddle::framework::proto::VarType::INT8;
case DataType::UINT8:
return paddle::framework::proto::VarType::UINT8;
case DataType::INT16:
return paddle::framework::proto::VarType::INT16;
case DataType::COMPLEX64:
return paddle::framework::proto::VarType::COMPLEX64;
case DataType::COMPLEX128:
return paddle::framework::proto::VarType::COMPLEX128;
case DataType::FLOAT16:
return paddle::framework::proto::VarType::FP16;
case DataType::BFLOAT16:
return paddle::framework::proto::VarType::BF16;
case DataType::BOOL:
return paddle::framework::proto::VarType::BOOL;
case DataType::PSTRING:
return paddle::framework::proto::VarType::PSTRING;
default:
PADDLE_THROW(paddle::platform::errors::Unimplemented(
"Unsupported data type `%s` when casting it into "
"paddle data type.",
dtype));
}
}
size_t DataTypeSize(DataType dtype) {
switch (dtype) {
case DataType::UNDEFINED:
return 0;
case DataType::BOOL:
return sizeof(bool);
case DataType::INT8:
return sizeof(int8_t);
case DataType::UINT8:
return sizeof(uint8_t);
case DataType::INT16:
return sizeof(int16_t);
case DataType::INT32:
return sizeof(int);
case DataType::INT64:
return sizeof(int64_t);
case DataType::BFLOAT16:
return sizeof(paddle::platform::bfloat16);
case DataType::FLOAT16:
return sizeof(paddle::platform::float16);
case DataType::FLOAT32:
return sizeof(float);
case DataType::FLOAT64:
return sizeof(double);
case DataType::COMPLEX64:
return sizeof(paddle::platform::complex<float>);
case DataType::COMPLEX128:
return sizeof(paddle::platform::complex<double>);
case DataType::PSTRING:
return sizeof(paddle::platform::pstring);
default:
return 0;
}
}
DataType String2DataType(const std::string& str) {
if (str == "bool") {
return DataType::BOOL;
} else if (str == "float16") {
return DataType::FLOAT16;
} else if (str == "float32") {
return DataType::FLOAT32;
} else if (str == "float64") {
return DataType::FLOAT64;
} else if (str == "int8") {
return DataType::INT8;
} else if (str == "int16") {
return DataType::INT16;
} else if (str == "int32") {
return DataType::INT32;
} else if (str == "int64") {
return DataType::INT64;
} else if (str == "uint8") {
return DataType::UINT8;
} else if (str == "complex64") {
return DataType::COMPLEX64;
} else if (str == "complex128") {
return DataType::COMPLEX128;
} else if (str == "pstring") {
return DataType::PSTRING;
} else if (str == "bfloat16") {
return DataType::BFLOAT16;
} else {
return DataType::UNDEFINED;
}
}
} // namespace framework
} // namespace paddle
...@@ -14,12 +14,8 @@ limitations under the License. */ ...@@ -14,12 +14,8 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/phi/common/backend.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/tensor_meta.h"
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/core/utils/data_type.h"
// TODO(chenweihang): this file may need to be removed // TODO(chenweihang): this file may need to be removed
...@@ -27,19 +23,16 @@ limitations under the License. */ ...@@ -27,19 +23,16 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
using DataType = paddle::experimental::DataType; using DataType = phi::DataType;
using DataLayout = phi::DataLayout; using DataLayout = phi::DataLayout;
DataType TransToPhiDataType( using phi::DataTypeToString;
const paddle::framework::proto::VarType::Type& dtype); using phi::SizeOf;
using phi::TransToPhiDataType;
paddle::framework::proto::VarType::Type TransToProtoVarType(
const DataType& dtype);
size_t DataTypeSize(DataType dtype);
DataType String2DataType(const std::string& str);
using phi::DataType2String; inline proto::VarType::Type TransToProtoVarType(const DataType& dtype) {
return static_cast<proto::VarType::Type>(phi::TransToProtoVarType(dtype));
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -45,7 +45,7 @@ void SetMicroId(paddle::framework::Scope* scope, ...@@ -45,7 +45,7 @@ void SetMicroId(paddle::framework::Scope* scope,
if (platform::is_gpu_place(place)) { if (platform::is_gpu_place(place)) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
std::vector<char> temp; std::vector<char> temp;
temp.resize(tensor->numel() * framework::DataTypeSize(tensor->dtype())); temp.resize(tensor->numel() * phi::SizeOf(tensor->dtype()));
char* temp_ptr = temp.data(); char* temp_ptr = temp.data();
float* temp_ptr_float = reinterpret_cast<float*>(temp_ptr); float* temp_ptr_float = reinterpret_cast<float*>(temp_ptr);
temp_ptr_float[0] = micro_id; temp_ptr_float[0] = micro_id;
......
...@@ -74,9 +74,9 @@ void TensorCopyImpl(const TENSOR& src, ...@@ -74,9 +74,9 @@ void TensorCopyImpl(const TENSOR& src,
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
auto size = src.layout() == DataLayout::ONEDNN auto size = src.layout() == DataLayout::ONEDNN
? src.memory_size() ? src.memory_size()
: src.numel() * framework::DataTypeSize(src.dtype()); : src.numel() * phi::SizeOf(src.dtype());
#else #else
auto size = src.numel() * framework::DataTypeSize(src.dtype()); auto size = src.numel() * phi::SizeOf(src.dtype());
#endif #endif
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
...@@ -486,7 +486,7 @@ void TensorCopySync(const phi::DenseTensor& src, ...@@ -486,7 +486,7 @@ void TensorCopySync(const phi::DenseTensor& src,
return; return;
} }
auto size = src.numel() * framework::DataTypeSize(src.dtype()); auto size = src.numel() * phi::SizeOf(src.dtype());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} }
...@@ -679,7 +679,7 @@ void TensorToStream(std::ostream& os, ...@@ -679,7 +679,7 @@ void TensorToStream(std::ostream& os,
os.write(out.data(), size); os.write(out.data(), size);
} }
{ // the 3rd field, tensor data { // the 3rd field, tensor data
uint64_t size = tensor.numel() * framework::DataTypeSize(tensor.dtype()); uint64_t size = tensor.numel() * phi::SizeOf(tensor.dtype());
auto* data_ptr = tensor.data(); auto* data_ptr = tensor.data();
PADDLE_ENFORCE_LT(size, PADDLE_ENFORCE_LT(size,
......
...@@ -217,7 +217,7 @@ void TensorCopyAsync(paddle::lite_api::Tensor* dst, ...@@ -217,7 +217,7 @@ void TensorCopyAsync(paddle::lite_api::Tensor* dst,
const platform::Place& src_place = src.place(); const platform::Place& src_place = src.place();
const platform::Place& dst_place = GetNativePlace(dst->target()); const platform::Place& dst_place = GetNativePlace(dst->target());
const size_t bytes = const size_t bytes =
static_cast<size_t>(src.numel()) * framework::DataTypeSize(src.dtype()); static_cast<size_t>(src.numel()) * phi::SizeOf(src.dtype());
dst->Resize(phi::vectorize(src.dims())); dst->Resize(phi::vectorize(src.dims()));
const void* src_data = src.data(); const void* src_data = src.data();
void* dst_data{nullptr}; void* dst_data{nullptr};
...@@ -241,7 +241,7 @@ void TensorCopyAsync(phi::DenseTensor* dst, ...@@ -241,7 +241,7 @@ void TensorCopyAsync(phi::DenseTensor* dst,
const platform::Place& src_place = GetNativePlace(src.target()); const platform::Place& src_place = GetNativePlace(src.target());
const platform::Place& dst_place = dst->place(); const platform::Place& dst_place = dst->place();
int64_t src_numel = GetLiteTensorNumel(src); int64_t src_numel = GetLiteTensorNumel(src);
const size_t bytes = src_numel * framework::DataTypeSize(dst->dtype()); const size_t bytes = src_numel * phi::SizeOf(dst->dtype());
const void* src_data = src.data<void>(); const void* src_data = src.data<void>();
// When Lite is ready, the source type needs to be modified here. // When Lite is ready, the source type needs to be modified here.
void* dst_data = dst->mutable_data(dst_place, dst->dtype()); void* dst_data = dst->mutable_data(dst_place, dst->dtype());
......
...@@ -219,7 +219,7 @@ class LazyZerosNPU { ...@@ -219,7 +219,7 @@ class LazyZerosNPU {
if (!found_inf_vec[0]) { if (!found_inf_vec[0]) {
framework::TensorCopy(*x, place, dev_ctx, out); framework::TensorCopy(*x, place, dev_ctx, out);
} else if (zero_ptr != dst_ptr) { } else if (zero_ptr != dst_ptr) {
auto size = out->numel() * framework::DataTypeSize(out->dtype()); auto size = out->numel() * phi::SizeOf(out->dtype());
memory::Copy(place, dst_ptr, place, zero_ptr, size, stream); memory::Copy(place, dst_ptr, place, zero_ptr, size, stream);
} }
} }
......
...@@ -128,7 +128,7 @@ class CEmbeddingGradOpCPUKernel : public framework::OpKernel<T> { ...@@ -128,7 +128,7 @@ class CEmbeddingGradOpCPUKernel : public framework::OpKernel<T> {
table_grad_t->mutable_data<T>(table_t->dims(), context.GetPlace()); table_grad_t->mutable_data<T>(table_t->dims(), context.GetPlace());
size_t table_t_mem_size = size_t table_t_mem_size =
table_t->numel() * framework::DataTypeSize(table_grad_t->dtype()); table_t->numel() * phi::SizeOf(table_grad_t->dtype());
size_t table_grad_t_mem_size = size_t table_grad_t_mem_size =
table_grad_t->numel() * table_grad_t->numel() *
framework::SizeOfType( framework::SizeOfType(
......
...@@ -127,10 +127,8 @@ void NPUGetIdsEmbedding(const framework::ExecutionContext &context) { ...@@ -127,10 +127,8 @@ void NPUGetIdsEmbedding(const framework::ExecutionContext &context) {
auto pad_shape = phi::make_ddim({table_t->dims()[0] + 1, table_t->dims()[1]}); auto pad_shape = phi::make_ddim({table_t->dims()[0] + 1, table_t->dims()[1]});
phi::DenseTensor table_t_pad; phi::DenseTensor table_t_pad;
size_t mem_size = size_t mem_size = table_t->numel() * phi::SizeOf(table_t->dtype());
table_t->numel() * framework::DataTypeSize(table_t->dtype()); size_t line_mem_size = table_t->dims()[1] * phi::SizeOf(table_t->dtype());
size_t line_mem_size =
table_t->dims()[1] * framework::DataTypeSize(table_t->dtype());
PADDLE_ENFORCE_EQ(line_mem_size % 64, PADDLE_ENFORCE_EQ(line_mem_size % 64,
0, 0,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
...@@ -227,11 +225,11 @@ void NPUUpdateEmbedding(const framework::ExecutionContext &context) { ...@@ -227,11 +225,11 @@ void NPUUpdateEmbedding(const framework::ExecutionContext &context) {
// copy table_t_pad to table_t // copy table_t_pad to table_t
T *dst = table_grad_t->mutable_data<T>(table_t->dims(), context.GetPlace()); T *dst = table_grad_t->mutable_data<T>(table_t->dims(), context.GetPlace());
const size_t mem_size = const size_t mem_size =
table_grad_t->numel() * framework::DataTypeSize(table_grad_t->dtype()); table_grad_t->numel() * phi::SizeOf(table_grad_t->dtype());
// check align // check align
size_t line_mem_size = size_t line_mem_size =
table_grad_t->dims()[1] * framework::DataTypeSize(table_grad_t->dtype()); table_grad_t->dims()[1] * phi::SizeOf(table_grad_t->dtype());
PADDLE_ENFORCE_EQ(line_mem_size % 64, PADDLE_ENFORCE_EQ(line_mem_size % 64,
0, 0,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
......
...@@ -153,7 +153,7 @@ static void AppendProposals(phi::DenseTensor* dst, ...@@ -153,7 +153,7 @@ static void AppendProposals(phi::DenseTensor* dst,
const phi::DenseTensor& src) { const phi::DenseTensor& src) {
auto* out_data = dst->data(); auto* out_data = dst->data();
auto* to_add_data = src.data(); auto* to_add_data = src.data();
size_t size_of_t = framework::DataTypeSize(src.dtype()); size_t size_of_t = phi::SizeOf(src.dtype());
offset *= size_of_t; offset *= size_of_t;
std::memcpy( std::memcpy(
reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(out_data) + offset), reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(out_data) + offset),
......
...@@ -46,7 +46,7 @@ class IpuRuntimeOp : public framework::OperatorBase { ...@@ -46,7 +46,7 @@ class IpuRuntimeOp : public framework::OperatorBase {
for (size_t i = 0; i < outputs.size(); ++i) { for (size_t i = 0; i < outputs.size(); ++i) {
auto* out = outputs[i]; auto* out = outputs[i];
if (out->dims().size() == 0) { if (out->dims().size() == 0) {
auto sizeof_dtype = framework::DataTypeSize(out->dtype()); auto sizeof_dtype = phi::SizeOf(out->dtype());
int64_t dim = out->memory_size() / sizeof_dtype; int64_t dim = out->memory_size() / sizeof_dtype;
out->Resize({dim}); out->Resize({dim});
VLOG(10) << "set ipu_runtime_op output: " << output_names[i] VLOG(10) << "set ipu_runtime_op output: " << output_names[i]
......
...@@ -128,7 +128,7 @@ void InitTensorsOnClient(framework::Scope* scope, ...@@ -128,7 +128,7 @@ void InitTensorsOnClient(framework::Scope* scope,
reinterpret_cast<void*>(x_ptr), reinterpret_cast<void*>(x_ptr),
platform::CPUPlace(), platform::CPUPlace(),
reinterpret_cast<void*>(x_vec_ptr), reinterpret_cast<void*>(x_vec_ptr),
x_var->numel() * framework::DataTypeSize(x_var->dtype()), x_var->numel() * phi::SizeOf(x_var->dtype()),
stream); stream);
// auto res_var = scope->Var("res")->GetMutable<phi::DenseTensor>(); // auto res_var = scope->Var("res")->GetMutable<phi::DenseTensor>();
......
...@@ -191,8 +191,7 @@ void BufferedReader::ReadAsync(size_t i) { ...@@ -191,8 +191,7 @@ void BufferedReader::ReadAsync(size_t i) {
cuda[i].set_layout(cpu[i].layout()); cuda[i].set_layout(cpu[i].layout());
cuda_pinned_ptrs[i] = cuda_pinned_ptrs[i] =
cuda[i].mutable_data(cuda_pinned_place, cpu[i].type()); cuda[i].mutable_data(cuda_pinned_place, cpu[i].type());
auto size = cpu[i].numel() * auto size = cpu[i].numel() * phi::SizeOf(cpu[i].dtype());
paddle::framework::DataTypeSize(cpu[i].dtype());
memory::Copy(cuda_pinned_place, memory::Copy(cuda_pinned_place,
cuda_pinned_ptrs[i], cuda_pinned_ptrs[i],
...@@ -245,8 +244,7 @@ void BufferedReader::ReadAsync(size_t i) { ...@@ -245,8 +244,7 @@ void BufferedReader::ReadAsync(size_t i) {
auto cpu_place = cpu[i].place(); auto cpu_place = cpu[i].place();
auto cpu_ptr = cpu[i].data(); auto cpu_ptr = cpu[i].data();
auto gpu_ptr = gpu_ptrs[i]; auto gpu_ptr = gpu_ptrs[i];
auto size = auto size = cpu[i].numel() * phi::SizeOf(cpu[i].dtype());
cpu[i].numel() * paddle::framework::DataTypeSize(cpu[i].dtype());
if (platform::is_cuda_pinned_place(cpu_place)) { if (platform::is_cuda_pinned_place(cpu_place)) {
memory::Copy( memory::Copy(
place_, gpu_ptr, cpu_place, cpu_ptr, size, stream_.get()); place_, gpu_ptr, cpu_place, cpu_ptr, size, stream_.get());
...@@ -312,8 +310,7 @@ void BufferedReader::ReadAsync(size_t i) { ...@@ -312,8 +310,7 @@ void BufferedReader::ReadAsync(size_t i) {
auto cpu_place = cpu[i].place(); auto cpu_place = cpu[i].place();
auto cpu_ptr = cpu[i].data(); auto cpu_ptr = cpu[i].data();
auto npu_ptr = npu_ptrs[i]; auto npu_ptr = npu_ptrs[i];
auto size = auto size = cpu[i].numel() * phi::SizeOf(cpu[i].dtype());
cpu[i].numel() * paddle::framework::DataTypeSize(cpu[i].dtype());
if ((platform::is_npu_place(cpu_place))) { if ((platform::is_npu_place(cpu_place))) {
memory::Copy( memory::Copy(
place_, npu_ptr, cpu_place, cpu_ptr, size, stream_.get()); place_, npu_ptr, cpu_place, cpu_ptr, size, stream_.get());
...@@ -364,8 +361,7 @@ void BufferedReader::ReadAsync(size_t i) { ...@@ -364,8 +361,7 @@ void BufferedReader::ReadAsync(size_t i) {
auto cpu_place = cpu[i].place(); auto cpu_place = cpu[i].place();
auto cpu_ptr = cpu[i].data(); auto cpu_ptr = cpu[i].data();
auto mlu_ptr = mlu_ptrs[i]; auto mlu_ptr = mlu_ptrs[i];
auto size = auto size = cpu[i].numel() * phi::SizeOf(cpu[i].dtype());
cpu[i].numel() * paddle::framework::DataTypeSize(cpu[i].dtype());
if ((platform::is_mlu_place(cpu_place))) { if ((platform::is_mlu_place(cpu_place))) {
memory::Copy( memory::Copy(
place_, mlu_ptr, cpu_place, cpu_ptr, size, stream_.get()); place_, mlu_ptr, cpu_place, cpu_ptr, size, stream_.get());
...@@ -417,8 +413,7 @@ void BufferedReader::ReadAsync(size_t i) { ...@@ -417,8 +413,7 @@ void BufferedReader::ReadAsync(size_t i) {
auto cpu_place = cpu[i].place(); auto cpu_place = cpu[i].place();
auto cpu_ptr = cpu[i].data(); auto cpu_ptr = cpu[i].data();
auto xpu_ptr = xpu_ptrs[i]; auto xpu_ptr = xpu_ptrs[i];
auto size = auto size = cpu[i].numel() * phi::SizeOf(cpu[i].dtype());
cpu[i].numel() * paddle::framework::DataTypeSize(cpu[i].dtype());
// TODO(zhanghuan) for now hardware not support xpu_memcpy_async, maybe // TODO(zhanghuan) for now hardware not support xpu_memcpy_async, maybe
// KL3 // KL3
if ((platform::is_xpu_place(cpu_place))) { if ((platform::is_xpu_place(cpu_place))) {
...@@ -471,8 +466,7 @@ void BufferedReader::ReadAsync(size_t i) { ...@@ -471,8 +466,7 @@ void BufferedReader::ReadAsync(size_t i) {
auto cpu_place = cpu[i].place(); auto cpu_place = cpu[i].place();
auto cpu_ptr = cpu[i].data(); auto cpu_ptr = cpu[i].data();
auto custom_device_ptr = custom_device_ptrs[i]; auto custom_device_ptr = custom_device_ptrs[i];
auto size = auto size = cpu[i].numel() * phi::SizeOf(cpu[i].dtype());
cpu[i].numel() * paddle::framework::DataTypeSize(cpu[i].dtype());
if ((platform::is_custom_place(cpu_place))) { if ((platform::is_custom_place(cpu_place))) {
memory::Copy(place_, custom_device_ptr, cpu_place, cpu_ptr, size); memory::Copy(place_, custom_device_ptr, cpu_place, cpu_ptr, size);
custom_device_stream_->Synchronize(); custom_device_stream_->Synchronize();
......
...@@ -23,9 +23,9 @@ ...@@ -23,9 +23,9 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h" #include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/core/utils/data_type.h"
namespace phi { namespace phi {
class DenseTensor; class DenseTensor;
...@@ -37,7 +37,7 @@ namespace platform { ...@@ -37,7 +37,7 @@ namespace platform {
template <typename T> template <typename T>
inline cudnnDataType_t ToCudnnDataType(const T& t) { inline cudnnDataType_t ToCudnnDataType(const T& t) {
auto type = framework::ToDataType(t); auto type = framework::ToDataType(t);
return ToCudnnDataType(type); return ToCudnnDataType(phi::TransToPhiDataType(type));
} }
template <typename T> template <typename T>
...@@ -68,21 +68,20 @@ inline std::vector<T> TransformDimOrder(const std::vector<T>& dims) { ...@@ -68,21 +68,20 @@ inline std::vector<T> TransformDimOrder(const std::vector<T>& dims) {
} }
template <> template <>
inline cudnnDataType_t ToCudnnDataType( inline cudnnDataType_t ToCudnnDataType(const phi::DataType& t) {
const framework::proto::VarType::Type& t) {
cudnnDataType_t type = CUDNN_DATA_FLOAT; cudnnDataType_t type = CUDNN_DATA_FLOAT;
switch (t) { switch (t) {
case framework::proto::VarType::FP16: case phi::DataType::FLOAT16:
type = CUDNN_DATA_HALF; type = CUDNN_DATA_HALF;
break; break;
case framework::proto::VarType::FP32: case phi::DataType::FLOAT32:
type = CUDNN_DATA_FLOAT; type = CUDNN_DATA_FLOAT;
break; break;
case framework::proto::VarType::FP64: case phi::DataType::FLOAT64:
type = CUDNN_DATA_DOUBLE; type = CUDNN_DATA_DOUBLE;
break; break;
#if CUDNN_VERSION_MIN(8, 1, 0) #if CUDNN_VERSION_MIN(8, 1, 0)
case framework::proto::VarType::BF16: case phi::DataType::BFLOAT16:
type = CUDNN_DATA_BFLOAT16; type = CUDNN_DATA_BFLOAT16;
break; break;
#endif #endif
...@@ -152,12 +151,12 @@ class TensorDescriptor { ...@@ -152,12 +151,12 @@ class TensorDescriptor {
if (groups > 1) { if (groups > 1) {
dims_with_group[1] = dims_with_group[1] / groups; dims_with_group[1] = dims_with_group[1] / groups;
} }
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensorNdDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(
desc_.get(), dynload::cudnnSetTensorNdDescriptor(desc_.get(),
ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype())), ToCudnnDataType(tensor.dtype()),
dims_with_group.size(), dims_with_group.size(),
dims_with_group.data(), dims_with_group.data(),
strides.data())); strides.data()));
} }
void set(const std::vector<int>& dims, void set(const std::vector<int>& dims,
...@@ -179,8 +178,7 @@ class TensorDescriptor { ...@@ -179,8 +178,7 @@ class TensorDescriptor {
void set(const phi::DenseTensor& tensor, const cudnnTensorFormat_t format) { void set(const phi::DenseTensor& tensor, const cudnnTensorFormat_t format) {
auto dims = phi::vectorize<int>(tensor.dims()); auto dims = phi::vectorize<int>(tensor.dims());
auto dtype = auto dtype = ToCudnnDataType(tensor.dtype());
ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype()));
set(dims, format, dtype); set(dims, format, dtype);
} }
...@@ -232,8 +230,7 @@ class FilterDescriptor { ...@@ -232,8 +230,7 @@ class FilterDescriptor {
const cudnnTensorFormat_t format, const cudnnTensorFormat_t format,
const int groups = 1) { const int groups = 1) {
auto dims = phi::vectorize<int>(tensor.dims()); auto dims = phi::vectorize<int>(tensor.dims());
auto dtype = auto dtype = ToCudnnDataType(tensor.dtype());
ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype()));
set(dims, format, dtype, groups); set(dims, format, dtype, groups);
} }
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h" #include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/core/utils/data_type.h"
namespace phi { namespace phi {
class DenseTensor; class DenseTensor;
...@@ -36,7 +37,7 @@ namespace platform { ...@@ -36,7 +37,7 @@ namespace platform {
template <typename T> template <typename T>
inline miopenDataType_t ToCudnnDataType(const T& t) { inline miopenDataType_t ToCudnnDataType(const T& t) {
auto type = framework::ToDataType(t); auto type = framework::ToDataType(t);
return ToCudnnDataType(type); return ToCudnnDataType(phi::TransToPhiDataType(type));
} }
inline std::vector<int> TransformDimOrder(const std::vector<int>& dims) { inline std::vector<int> TransformDimOrder(const std::vector<int>& dims) {
...@@ -63,14 +64,13 @@ inline std::vector<int> TransformDimOrder(const std::vector<int>& dims) { ...@@ -63,14 +64,13 @@ inline std::vector<int> TransformDimOrder(const std::vector<int>& dims) {
} }
template <> template <>
inline miopenDataType_t ToCudnnDataType( inline miopenDataType_t ToCudnnDataType(const phi::DataType& t) {
const framework::proto::VarType::Type& t) {
miopenDataType_t type = miopenFloat; miopenDataType_t type = miopenFloat;
switch (t) { switch (t) {
case framework::proto::VarType::FP16: case phi::DataType::FLOAT16:
type = miopenHalf; type = miopenHalf;
break; break;
case framework::proto::VarType::FP32: case phi::DataType::FLOAT32:
type = miopenFloat; type = miopenFloat;
break; break;
default: default:
...@@ -142,7 +142,7 @@ class TensorDescriptor { ...@@ -142,7 +142,7 @@ class TensorDescriptor {
} }
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor(
(miopenTensorDescriptor_t)(desc_.get()), (miopenTensorDescriptor_t)(desc_.get()),
ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype())), ToCudnnDataType(tensor.dtype()),
static_cast<int>(dims_with_group.size()), static_cast<int>(dims_with_group.size()),
const_cast<int*>(dims_with_group.data()), const_cast<int*>(dims_with_group.data()),
const_cast<int*>(strides.data()))); const_cast<int*>(strides.data())));
...@@ -166,7 +166,7 @@ class TensorDescriptor { ...@@ -166,7 +166,7 @@ class TensorDescriptor {
} }
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor(
(miopenTensorDescriptor_t)(desc_.get()), (miopenTensorDescriptor_t)(desc_.get()),
ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype())), ToCudnnDataType(tensor.dtype()),
static_cast<int>(dims_with_group.size()), static_cast<int>(dims_with_group.size()),
const_cast<int*>(dims_with_group.data()), const_cast<int*>(dims_with_group.data()),
const_cast<int*>(strides.data()))); const_cast<int*>(strides.data())));
...@@ -214,7 +214,7 @@ class FilterDescriptor { ...@@ -214,7 +214,7 @@ class FilterDescriptor {
} }
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor(
(miopenTensorDescriptor_t)(desc_.get()), (miopenTensorDescriptor_t)(desc_.get()),
ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype())), ToCudnnDataType(tensor.dtype()),
static_cast<int>(dims_with_group.size()), static_cast<int>(dims_with_group.size()),
const_cast<int*>(dims_with_group.data()), const_cast<int*>(dims_with_group.data()),
const_cast<int*>(strides.data()))); const_cast<int*>(strides.data())));
......
...@@ -79,7 +79,7 @@ class EagerNumpyAllocation : public phi::Allocation { ...@@ -79,7 +79,7 @@ class EagerNumpyAllocation : public phi::Allocation {
explicit EagerNumpyAllocation(PyObject* numpy_data, phi::DataType dtype) explicit EagerNumpyAllocation(PyObject* numpy_data, phi::DataType dtype)
: Allocation( : Allocation(
static_cast<void*>(pybind11::detail::array_proxy(numpy_data)->data), static_cast<void*>(pybind11::detail::array_proxy(numpy_data)->data),
framework::DataTypeSize(dtype) * PyArray_Size_(numpy_data), phi::SizeOf(dtype) * PyArray_Size_(numpy_data),
paddle::platform::CPUPlace()), paddle::platform::CPUPlace()),
arr_(numpy_data) { arr_(numpy_data) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
......
...@@ -116,7 +116,7 @@ static PyObject* tensor_method_numpy(TensorObject* self, ...@@ -116,7 +116,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
} }
auto tensor_dims = self->tensor.shape(); auto tensor_dims = self->tensor.shape();
auto numpy_dtype = TensorDtype2NumpyDtype(self->tensor.type()); auto numpy_dtype = TensorDtype2NumpyDtype(self->tensor.type());
auto sizeof_dtype = paddle::framework::DataTypeSize(self->tensor.type()); auto sizeof_dtype = phi::SizeOf(self->tensor.type());
Py_intptr_t py_dims[paddle::framework::DDim::kMaxRank]; Py_intptr_t py_dims[paddle::framework::DDim::kMaxRank];
Py_intptr_t py_strides[paddle::framework::DDim::kMaxRank]; Py_intptr_t py_strides[paddle::framework::DDim::kMaxRank];
size_t numel = 1; size_t numel = 1;
...@@ -203,8 +203,7 @@ static PyObject* tensor_method_numpy(TensorObject* self, ...@@ -203,8 +203,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
paddle::platform::GpuMemcpySync( paddle::platform::GpuMemcpySync(
pybind11::detail::array_proxy(array)->data, pybind11::detail::array_proxy(array)->data,
dense_tensor->data(), dense_tensor->data(),
paddle::framework::DataTypeSize(dense_tensor->dtype()) * phi::SizeOf(dense_tensor->dtype()) * dense_tensor->numel(),
dense_tensor->numel(),
kind); kind);
} else { } else {
VLOG(6) << "Getting DenseTensor's numpy value"; VLOG(6) << "Getting DenseTensor's numpy value";
...@@ -213,8 +212,7 @@ static PyObject* tensor_method_numpy(TensorObject* self, ...@@ -213,8 +212,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
paddle::platform::GpuMemcpySync( paddle::platform::GpuMemcpySync(
pybind11::detail::array_proxy(array)->data, pybind11::detail::array_proxy(array)->data,
dense_tensor->data(), dense_tensor->data(),
paddle::framework::DataTypeSize(dense_tensor->dtype()) * phi::SizeOf(dense_tensor->dtype()) * dense_tensor->numel(),
dense_tensor->numel(),
kind); kind);
} }
#endif #endif
...@@ -258,8 +256,7 @@ static PyObject* tensor_method_numpy(TensorObject* self, ...@@ -258,8 +256,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
->MemoryCopyD2H( ->MemoryCopyD2H(
pybind11::detail::array_proxy(array)->data, pybind11::detail::array_proxy(array)->data,
dense_tensor->data(), dense_tensor->data(),
paddle::framework::DataTypeSize(dense_tensor->dtype()) * phi::SizeOf(dense_tensor->dtype()) * dense_tensor->numel());
dense_tensor->numel());
} else { } else {
VLOG(6) << "Getting DenseTensor's numpy value"; VLOG(6) << "Getting DenseTensor's numpy value";
auto dense_tensor = auto dense_tensor =
...@@ -268,8 +265,7 @@ static PyObject* tensor_method_numpy(TensorObject* self, ...@@ -268,8 +265,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
->MemoryCopyD2H( ->MemoryCopyD2H(
pybind11::detail::array_proxy(array)->data, pybind11::detail::array_proxy(array)->data,
dense_tensor->data(), dense_tensor->data(),
paddle::framework::DataTypeSize(dense_tensor->dtype()) * phi::SizeOf(dense_tensor->dtype()) * dense_tensor->numel());
dense_tensor->numel());
} }
#endif #endif
} else { } else {
...@@ -1698,7 +1694,7 @@ static PyObject* tensor_method_element_size(TensorObject* self, ...@@ -1698,7 +1694,7 @@ static PyObject* tensor_method_element_size(TensorObject* self,
PyObject* args, PyObject* args,
PyObject* kwargs) { PyObject* kwargs) {
EAGER_TRY EAGER_TRY
uint32_t element_size = framework::DataTypeSize(self->tensor.dtype()); uint32_t element_size = phi::SizeOf(self->tensor.dtype());
return ToPyObject(element_size); return ToPyObject(element_size);
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
......
...@@ -562,7 +562,7 @@ void BindImperative(py::module *m_ptr) { ...@@ -562,7 +562,7 @@ void BindImperative(py::module *m_ptr) {
&t, array, platform::CPUPlace(), true); &t, array, platform::CPUPlace(), true);
// 3. allocate shared memory // 3. allocate shared memory
void *data_ptr = t.data(); void *data_ptr = t.data();
size_t data_size = t.numel() * framework::DataTypeSize(t.dtype()); size_t data_size = t.numel() * phi::SizeOf(t.dtype());
auto shared_writer_holder = auto shared_writer_holder =
memory::allocation::AllocateMemoryMapWriterAllocation(data_size); memory::allocation::AllocateMemoryMapWriterAllocation(data_size);
// 4. maintain mmap fd set & backup ipc_name // 4. maintain mmap fd set & backup ipc_name
...@@ -602,7 +602,7 @@ void BindImperative(py::module *m_ptr) { ...@@ -602,7 +602,7 @@ void BindImperative(py::module *m_ptr) {
&t, array, platform::CPUPlace(), true); &t, array, platform::CPUPlace(), true);
// 3. allocate shared memory // 3. allocate shared memory
void *data_ptr = t.data(); void *data_ptr = t.data();
size_t data_size = t.numel() * framework::DataTypeSize(t.dtype()); size_t data_size = t.numel() * phi::SizeOf(t.dtype());
auto shared_writer_holder = auto shared_writer_holder =
memory::allocation::AllocateMemoryMapWriterAllocation(data_size); memory::allocation::AllocateMemoryMapWriterAllocation(data_size);
// 4. maintain mmap fd set & backup ipc_name // 4. maintain mmap fd set & backup ipc_name
......
...@@ -258,6 +258,8 @@ inline std::string DataTypeToString(const DataType& dtype) { ...@@ -258,6 +258,8 @@ inline std::string DataTypeToString(const DataType& dtype) {
namespace phi { namespace phi {
using DataType = paddle::experimental::DataType; using DataType = paddle::experimental::DataType;
using paddle::experimental::DataTypeToString;
using paddle::experimental::SizeOf;
} // namespace phi } // namespace phi
namespace paddle { namespace paddle {
......
...@@ -9,6 +9,10 @@ cc_library( ...@@ -9,6 +9,10 @@ cc_library(
set(convert_utils_deps data_type place op_utils phi_backends) set(convert_utils_deps data_type place op_utils phi_backends)
if(WITH_MKLDNN)
set(convert_utils_deps ${convert_utils_deps} mkldnn)
endif()
cc_library( cc_library(
convert_utils convert_utils
SRCS convert_utils.cc SRCS convert_utils.cc
......
...@@ -22,31 +22,6 @@ limitations under the License. */ ...@@ -22,31 +22,6 @@ limitations under the License. */
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
namespace phi { namespace phi {
// Here we can't depend on the fluid proto::VarType, so we use the dtype enum
// value directly. See also `assign_value_sig.cc`.
// proto::VarType::INT16 -> 1 -> phi::DataType::INT16
// proto::VarType::INT32 -> 2 -> phi::DataType::INT32
// proto::VarType::INT64 -> 3 -> phi::DataType::INT64
// proto::VarType::FP16 -> 4 -> phi::DataType::FLOAT16
// proto::VarType::FP32 -> 5 -> phi::DataType::FLOAT32
// proto::VarType::FP64 -> 6 -> phi::DataType::FLOAT64
// proto::VarType::UINT8 -> 20 -> phi::DataType::UINT8
static std::map<int, phi::DataType> var_type_map{{1, phi::DataType::INT16},
{2, phi::DataType::INT32},
{3, phi::DataType::INT64},
{4, phi::DataType::FLOAT16},
{5, phi::DataType::FLOAT32},
{6, phi::DataType::FLOAT64},
{20, phi::DataType::UINT8}};
static std::map<phi::DataType, int> map_to_var_type{{phi::DataType::INT16, 1},
{phi::DataType::INT32, 2},
{phi::DataType::INT64, 3},
{phi::DataType::FLOAT16, 4},
{phi::DataType::FLOAT32, 5},
{phi::DataType::FLOAT64, 6},
{phi::DataType::UINT8, 20}};
#define _PhiForEachDataTypeHelper_(callback, cpp_type, data_type) \ #define _PhiForEachDataTypeHelper_(callback, cpp_type, data_type) \
callback(cpp_type, data_type); callback(cpp_type, data_type);
...@@ -136,39 +111,98 @@ inline DataType ToRealType(const DataType& type) { ...@@ -136,39 +111,98 @@ inline DataType ToRealType(const DataType& type) {
} }
} }
inline std::string DataType2String(DataType dtype) { // In some cases we need to use the conversion between phi::DataType and
// fluid proto::VarType::Type, but can't depend on the proto::VarType::Type.
// So here we defined an enum type ProtoDataType which corresponds to
// proto::VarType::Type in fluid, but keeps only the data types we need.
// Note: The ProtoDataType (defined here) and proto::VarType::Type (defined
// in framework.pb.h) need to be modified simultaneously.
enum ProtoDataType {
BOOL = 0,
INT16 = 1,
INT32 = 2,
INT64 = 3,
FP16 = 4,
FP32 = 5,
FP64 = 6,
UINT8 = 20,
INT8 = 21,
BF16 = 22,
COMPLEX64 = 23,
COMPLEX128 = 24,
PSTRING = 29
};
inline DataType TransToPhiDataType(const int& dtype) {
// Set the order of case branches according to the frequency with
// the data type is used
switch (dtype) { switch (dtype) {
case DataType::BOOL: case ProtoDataType::FP32:
return "bool"; return DataType::FLOAT32;
case ProtoDataType::FP64:
return DataType::FLOAT64;
case ProtoDataType::INT64:
return DataType::INT64;
case ProtoDataType::INT32:
return DataType::INT32;
case ProtoDataType::INT8:
return DataType::INT8;
case ProtoDataType::UINT8:
return DataType::UINT8;
case ProtoDataType::INT16:
return DataType::INT16;
case ProtoDataType::COMPLEX64:
return DataType::COMPLEX64;
case ProtoDataType::COMPLEX128:
return DataType::COMPLEX128;
case ProtoDataType::FP16:
return DataType::FLOAT16;
case ProtoDataType::BF16:
return DataType::BFLOAT16;
case ProtoDataType::BOOL:
return DataType::BOOL;
case ProtoDataType::PSTRING:
return DataType::PSTRING;
default:
return DataType::UNDEFINED;
}
}
inline int TransToProtoVarType(const DataType& dtype) {
// Set the order of case branches according to the frequency with
// the data type is used
switch (dtype) {
case DataType::FLOAT32:
return ProtoDataType::FP32;
case DataType::FLOAT64:
return ProtoDataType::FP64;
case DataType::INT64:
return ProtoDataType::INT64;
case DataType::INT32:
return ProtoDataType::INT32;
case DataType::INT8: case DataType::INT8:
return "int8"; return ProtoDataType::INT8;
case DataType::UINT8: case DataType::UINT8:
return "uint8"; return ProtoDataType::UINT8;
case DataType::INT16: case DataType::INT16:
return "int16"; return ProtoDataType::INT16;
case DataType::INT32:
return "int32";
case DataType::INT64:
return "int64";
case DataType::FLOAT16:
return "float16";
case DataType::FLOAT32:
return "float32";
case DataType::FLOAT64:
return "float64";
case DataType::COMPLEX64: case DataType::COMPLEX64:
return "complex64"; return ProtoDataType::COMPLEX64;
case DataType::COMPLEX128: case DataType::COMPLEX128:
return "complex128"; return ProtoDataType::COMPLEX128;
case DataType::PSTRING: case DataType::FLOAT16:
return "pstring"; return ProtoDataType::FP16;
case DataType::BFLOAT16: case DataType::BFLOAT16:
return "bfloat16"; return ProtoDataType::BF16;
case DataType::BOOL:
return ProtoDataType::BOOL;
case DataType::PSTRING:
return ProtoDataType::PSTRING;
default: default:
PADDLE_THROW( PADDLE_THROW(phi::errors::Unimplemented(
errors::InvalidArgument("Unknow phi::DataType, the int value = %d.", "Unsupported data type `%s` when casting it into "
static_cast<int>(dtype))); "paddle data type.",
return ""; dtype));
} }
} }
......
...@@ -133,9 +133,9 @@ void ArgMinMaxInferMeta(const MetaTensor& x, ...@@ -133,9 +133,9 @@ void ArgMinMaxInferMeta(const MetaTensor& x,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"The attribute of dtype in argmin/argmax must be [%s] or [%s], but " "The attribute of dtype in argmin/argmax must be [%s] or [%s], but "
"received [%s]", "received [%s]",
phi::DataType2String(DataType::INT32), phi::DataTypeToString(DataType::INT32),
phi::DataType2String(DataType::INT64), phi::DataTypeToString(DataType::INT64),
phi::DataType2String(var_type_map[dtype]))); phi::DataTypeToString(phi::TransToPhiDataType(dtype))));
if (!config.is_runtime && axis.FromTensor()) { if (!config.is_runtime && axis.FromTensor()) {
std::vector<int64_t> vec; std::vector<int64_t> vec;
...@@ -177,7 +177,7 @@ void ArgMinMaxInferMeta(const MetaTensor& x, ...@@ -177,7 +177,7 @@ void ArgMinMaxInferMeta(const MetaTensor& x,
auto x_rank = x_dims.size(); auto x_rank = x_dims.size();
if (int_axis < 0) int_axis += x_rank; if (int_axis < 0) int_axis += x_rank;
if (config.is_runtime) { if (config.is_runtime) {
if (dtype == map_to_var_type[DataType::INT32]) { if (dtype == phi::TransToProtoVarType(DataType::INT32)) {
int64_t all_element_num = 0; int64_t all_element_num = 0;
if (flatten) { if (flatten) {
all_element_num = phi::product(x_dims); all_element_num = phi::product(x_dims);
......
...@@ -149,7 +149,7 @@ void ArgMinMaxKernel(const Context& dev_ctx, ...@@ -149,7 +149,7 @@ void ArgMinMaxKernel(const Context& dev_ctx,
return; return;
} }
phi::VisitDataTypeTiny( phi::VisitDataTypeTiny(
var_type_map[dtype], phi::TransToPhiDataType(dtype),
VisitDataArgMinMaxFunctor<Context, T, EnumArgMinMaxValue>( VisitDataArgMinMaxFunctor<Context, T, EnumArgMinMaxValue>(
dev_ctx, x, axis.to<int64_t>(), keepdims, flatten, out)); dev_ctx, x, axis.to<int64_t>(), keepdims, flatten, out));
} }
......
...@@ -81,9 +81,9 @@ void IndexSampleGradKernel(const Context& ctx, ...@@ -81,9 +81,9 @@ void IndexSampleGradKernel(const Context& ctx,
errors::InvalidArgument( errors::InvalidArgument(
"Input(Index) holds the wrong type, it holds %s, but " "Input(Index) holds the wrong type, it holds %s, but "
"desires to be %s or %s", "desires to be %s or %s",
phi::DataType2String(index_type), phi::DataTypeToString(index_type),
phi::DataType2String(DataType::INT32), phi::DataTypeToString(DataType::INT32),
phi::DataType2String(DataType::INT64))); phi::DataTypeToString(DataType::INT64)));
if (index_type == DataType::INT32) { if (index_type == DataType::INT32) {
IndexSampleGradInner<T, Context, int>(ctx, out_grad, index, x_grad); IndexSampleGradInner<T, Context, int>(ctx, out_grad, index, x_grad);
} else if (index_type == DataType::INT64) { } else if (index_type == DataType::INT64) {
......
...@@ -94,9 +94,9 @@ void IndexSampleKernel(const Context &ctx, ...@@ -94,9 +94,9 @@ void IndexSampleKernel(const Context &ctx,
errors::InvalidArgument( errors::InvalidArgument(
"Input(Index) holds the wrong type, it holds %s, but " "Input(Index) holds the wrong type, it holds %s, but "
"desires to be %s or %s", "desires to be %s or %s",
phi::DataType2String(index_type), phi::DataTypeToString(index_type),
phi::DataType2String(DataType::INT32), phi::DataTypeToString(DataType::INT32),
phi::DataType2String(DataType::INT64))); phi::DataTypeToString(DataType::INT64)));
if (index_type == DataType::INT32) { if (index_type == DataType::INT32) {
IndexSampleInner<T, Context, int>(ctx, x, index, out); IndexSampleInner<T, Context, int>(ctx, x, index, out);
} else if (index_type == DataType::INT64) { } else if (index_type == DataType::INT64) {
......
...@@ -32,7 +32,7 @@ void UniqueConsecutiveKernel(const Context& dev_ctx, ...@@ -32,7 +32,7 @@ void UniqueConsecutiveKernel(const Context& dev_ctx,
DenseTensor* out, DenseTensor* out,
DenseTensor* index, DenseTensor* index,
DenseTensor* counts) { DenseTensor* counts) {
auto data_type = var_type_map[dtype]; auto data_type = phi::TransToPhiDataType(dtype);
if (data_type == phi::DataType::INT32) { if (data_type == phi::DataType::INT32) {
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
x.numel(), x.numel(),
......
...@@ -82,9 +82,9 @@ struct UniqueOpFunctor { ...@@ -82,9 +82,9 @@ struct UniqueOpFunctor {
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"Index holds the wrong type, it holds %s, " "Index holds the wrong type, it holds %s, "
"but desires to be %s or %s", "but desires to be %s or %s",
phi::DataType2String(index_type), phi::DataTypeToString(index_type),
phi::DataType2String(DataType::INT32), phi::DataTypeToString(DataType::INT32),
phi::DataType2String(DataType::INT64))); phi::DataTypeToString(DataType::INT64)));
if (index_type == DataType::INT32) { if (index_type == DataType::INT32) {
for (auto i = 0; i < in_->numel(); ++i) { for (auto i = 0; i < in_->numel(); ++i) {
......
...@@ -215,7 +215,7 @@ void ArgMinMaxOpCUDAKernel(const Context& dev_ctx, ...@@ -215,7 +215,7 @@ void ArgMinMaxOpCUDAKernel(const Context& dev_ctx,
return; return;
} }
phi::VisitDataTypeTiny( phi::VisitDataTypeTiny(
var_type_map[dtype], phi::TransToPhiDataType(dtype),
VisitDataCudaArgMinMaxFunctor<Context, T, Reducer>( VisitDataCudaArgMinMaxFunctor<Context, T, Reducer>(
dev_ctx, x, axis.to<int64_t>(), keepdims, flatten, out)); dev_ctx, x, axis.to<int64_t>(), keepdims, flatten, out));
} }
......
...@@ -75,9 +75,9 @@ void IndexSampleGradKernel(const Context& ctx, ...@@ -75,9 +75,9 @@ void IndexSampleGradKernel(const Context& ctx,
errors::InvalidArgument( errors::InvalidArgument(
"Input(Index) holds the wrong type, it holds %s, but " "Input(Index) holds the wrong type, it holds %s, but "
"desires to be %s or %s", "desires to be %s or %s",
phi::DataType2String(index_type), phi::DataTypeToString(index_type),
phi::DataType2String(DataType::INT32), phi::DataTypeToString(DataType::INT32),
phi::DataType2String(DataType::INT64))); phi::DataTypeToString(DataType::INT64)));
auto stream = reinterpret_cast<const phi::GPUContext&>(ctx).stream(); auto stream = reinterpret_cast<const phi::GPUContext&>(ctx).stream();
auto input_num = x.numel(); auto input_num = x.numel();
......
...@@ -64,9 +64,9 @@ void IndexSampleKernel(const Context& ctx, ...@@ -64,9 +64,9 @@ void IndexSampleKernel(const Context& ctx,
errors::InvalidArgument( errors::InvalidArgument(
"Input(Index) holds the wrong type, it holds %s, but " "Input(Index) holds the wrong type, it holds %s, but "
"desires to be %s or %s", "desires to be %s or %s",
phi::DataType2String(index_type), phi::DataTypeToString(index_type),
phi::DataType2String(DataType::INT32), phi::DataTypeToString(DataType::INT32),
phi::DataType2String(DataType::INT64))); phi::DataTypeToString(DataType::INT64)));
const T* in_data = x.data<T>(); const T* in_data = x.data<T>();
T* out_data = ctx.template Alloc<T>(out); T* out_data = ctx.template Alloc<T>(out);
auto stream = reinterpret_cast<const phi::GPUContext&>(ctx).stream(); auto stream = reinterpret_cast<const phi::GPUContext&>(ctx).stream();
......
...@@ -33,7 +33,7 @@ void UniqueConsecutiveKernel(const Context& dev_ctx, ...@@ -33,7 +33,7 @@ void UniqueConsecutiveKernel(const Context& dev_ctx,
DenseTensor* out, DenseTensor* out,
DenseTensor* index, DenseTensor* index,
DenseTensor* counts) { DenseTensor* counts) {
auto data_type = var_type_map[dtype]; auto data_type = phi::TransToPhiDataType(dtype);
if (data_type == phi::DataType::INT32) { if (data_type == phi::DataType::INT32) {
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
x.numel() + 1, x.numel() + 1,
......
...@@ -17,11 +17,11 @@ limitations under the License. */ ...@@ -17,11 +17,11 @@ limitations under the License. */
#include <vector> #include <vector>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h" #include "paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h"
#include "paddle/phi/backends/dynload/cudnn_frontend.h" #include "paddle/phi/backends/dynload/cudnn_frontend.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/kernels/autotune/cache.h" #include "paddle/phi/kernels/autotune/cache.h"
#include "paddle/phi/kernels/autotune/switch_autotune.h" #include "paddle/phi/kernels/autotune/switch_autotune.h"
...@@ -95,8 +95,7 @@ class CudnnFrontendConvHelper { ...@@ -95,8 +95,7 @@ class CudnnFrontendConvHelper {
.setStrides(strides.size(), strides.data()) .setStrides(strides.size(), strides.data())
.setId(id) .setId(id)
.setAlignment(GetAlignment(tensor)) .setAlignment(GetAlignment(tensor))
.setDataType(paddle::platform::ToCudnnDataType( .setDataType(paddle::platform::ToCudnnDataType(tensor->dtype()))
paddle::framework::TransToProtoVarType(tensor->dtype())))
.build(); .build();
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册