未验证 提交 7e7e9404 编写于 作者: A Aurelius84 提交者: GitHub

[PTen]Migrate proto::VarType outside of Pten (#39411)

* #1 migrate dist-related type()-> dtype()

* move datatype function from pten -> fluid/framework

* change type() in imperative into convert(dtype())

* modify xx_tensor->type into xx_tensor->dtype

* change the set_type interface and the caller

* modify xx_tensor.type into xx_tensor.dtype

* fix mutable_data(place, dtype())

* change caller of mutable_data in pten and distributed

* change the caller of mutable_data in fluid/framework

* change the caller of mutable_data in imperative directory

* mutable_data: inference

* update the call of mutable_data

* transfer MakePenScalarArray MakePtenScalar ResetHolderWithType

* pass the compile. the next step is remove VarType in Pten

* fix all and remove VarType from pten. success in linux. Next task is other platform

* fix conflict with develop

* fix compiled error

* Fix reset conversion

* fix conflict

* fix compiled problem

* fix typo

* Fix << in tensor_utils.cc

* fix type->dtype

* fix unittest

* fix tensor init constructor

* fix DataTypeSize for BFloat16

* fix code style

* fix npu compiled error

* fix npu

* compile npu sucessfully

* fix conflict

* fix conflict
Co-authored-by: Nxiongkun <xiongkun03@baidu.com>
上级 9c2cee1c
......@@ -562,7 +562,7 @@ bool DistModel::FetchResults(std::vector<DistModelTensor> *output_data,
framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx);
auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var);
auto type = fetch.type();
auto type = framework::TransToProtoVarType(fetch.dtype());
auto output = &(output_data->at(i));
output->name = idx_to_fetches_[idx];
bool rst = false;
......
......@@ -13,8 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/distributed/ps/service/brpc_utils.h"
#include <arpa/inet.h>
#include <netdb.h>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
......@@ -98,25 +101,29 @@ void SerializeLodTensor(framework::Variable* var,
}
}
}
var_msg->set_data_type(static_cast<VarMsg::Type>(tensor->type()));
var_msg->set_data_type(static_cast<VarMsg::Type>(
framework::TransToProtoVarType(tensor->dtype())));
for (auto& dim : framework::vectorize(tensor->dims())) {
var_msg->add_dims(dim);
}
// IO Buffer
if (platform::is_cpu_place(tensor->place())) {
auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(tensor->data()), data_len);
} else {
#ifdef PADDLE_WITH_CUDA
char* temp_ptr = new char[tensor->numel() *
framework::SizeOfType(tensor->type())]; // NOLINT
char* temp_ptr =
new char[tensor->numel() *
framework::DataTypeSize(tensor->dtype())]; // NOLINT
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(
platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(),
tensor->numel() * framework::SizeOfType(tensor->type()), stream);
auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
tensor->numel() * framework::SizeOfType(
framework::TransToProtoVarType(tensor->dtype())),
stream);
auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(temp_ptr), data_len);
delete[] temp_ptr;
......@@ -139,25 +146,29 @@ void SerializeSelectedRows(framework::Variable* var,
var_data->resize(rows->size() * sizeof(int64_t));
char* data_ptr = const_cast<char*>(var_data->data());
memcpy(data_ptr, &((*rows)[0]), rows->size() * sizeof(int64_t));
var_msg->set_data_type(static_cast<VarMsg::Type>(tensor->type()));
var_msg->set_data_type(static_cast<VarMsg::Type>(
framework::TransToProtoVarType(tensor->dtype())));
for (auto& dim : framework::vectorize(tensor->dims())) {
var_msg->add_dims(dim);
}
// IO Buffer
if (platform::is_cpu_place(tensor->place())) {
auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(tensor->data()), data_len);
} else {
#ifdef PADDLE_WITH_CUDA
char* temp_ptr = new char[tensor->numel() *
framework::SizeOfType(tensor->type())]; // NOLINT
char* temp_ptr =
new char[tensor->numel() *
framework::DataTypeSize(tensor->dtype())]; // NOLINT
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(
platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(),
tensor->numel() * framework::SizeOfType(tensor->type()), stream);
auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
tensor->numel() * framework::SizeOfType(
framework::TransToProtoVarType(tensor->dtype())),
stream);
auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(temp_ptr), data_len);
delete[] temp_ptr;
......@@ -225,8 +236,9 @@ void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg,
}
tensor->set_lod(lod);
void* tensor_data =
tensor->mutable_data(place, VarMessageToVarType(msg.data_type()));
void* tensor_data = tensor->mutable_data(
place,
framework::TransToPtenDataType(VarMessageToVarType(msg.data_type())));
// IO Buffer
if (platform::is_cpu_place(place)) {
......@@ -236,15 +248,16 @@ void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg,
} else if (platform::is_gpu_place(place)) {
#ifdef PADDLE_WITH_CUDA
unsigned long data_len; // NOLINT
char* temp_ptr = new char[tensor->numel() *
framework::SizeOfType(tensor->type())]; // NOLINT
io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT
io_buffer_itr.copy_and_forward((void*)temp_ptr, data_len); // NOLINT
char* temp_ptr =
new char[tensor->numel() *
framework::DataTypeSize(tensor->dtype())]; // NOLINT
io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT
io_buffer_itr.copy_and_forward((void*)temp_ptr, data_len); // NOLINT
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(
place, tensor_data, platform::CPUPlace(), (void*)temp_ptr, // NOLINT
tensor->numel() * framework::SizeOfType(tensor->type()), stream);
tensor->numel() * framework::DataTypeSize(tensor->dtype()), stream);
delete[] temp_ptr;
#endif
}
......@@ -266,8 +279,9 @@ void DeserializeSelectedRows(
vec_dim.push_back(x);
}
tensor->Resize(framework::make_ddim(vec_dim));
void* tensor_data =
tensor->mutable_data(place, VarMessageToVarType(msg.data_type()));
void* tensor_data = tensor->mutable_data(
place,
framework::TransToPtenDataType(VarMessageToVarType(msg.data_type())));
// IO Buffer
if (platform::is_cpu_place(place)) {
unsigned long data_len; // NOLINT
......@@ -275,15 +289,16 @@ void DeserializeSelectedRows(
io_buffer_itr.copy_and_forward(tensor_data, data_len);
} else if (platform::is_gpu_place(place)) {
#ifdef PADDLE_WITH_CUDA
char* temp_ptr = new char[tensor->numel() *
framework::SizeOfType(tensor->type())]; // NOLINT
unsigned long data_len; // NOLINT
io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT
char* temp_ptr =
new char[tensor->numel() *
framework::DataTypeSize(tensor->dtype())]; // NOLINT
unsigned long data_len; // NOLINT
io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT
io_buffer_itr.copy_and_forward(temp_ptr, data_len);
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(place, tensor_data, platform::CPUPlace(), temp_ptr,
tensor->numel() * framework::SizeOfType(tensor->type()),
tensor->numel() * framework::DataTypeSize(tensor->dtype()),
stream);
delete[] temp_ptr;
#endif
......
......@@ -13,6 +13,8 @@
// limitations under the License.
#include "paddle/fluid/distributed/ps/service/heter_client.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/string/split.h"
......@@ -39,13 +41,13 @@ int GetMicroId(const platform::DeviceContext& ctx,
} else {
#ifdef PADDLE_WITH_CUDA
std::vector<char> temp;
temp.resize(tensor->numel() * framework::SizeOfType(tensor->type()));
temp.resize(tensor->numel() * framework::DataTypeSize(tensor->dtype()));
char* temp_ptr = temp.data();
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(
platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(),
tensor->numel() * framework::SizeOfType(tensor->type()), stream);
tensor->numel() * framework::DataTypeSize(tensor->dtype()), stream);
float* temp_ptr_float = reinterpret_cast<float*>(temp_ptr);
micro_id = static_cast<int>(temp_ptr_float[0]);
#endif
......
......@@ -15,6 +15,7 @@
#include "paddle/fluid/eager/grad_tensor_holder.h"
#include "paddle/fluid/imperative/gradient_accumulator.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/pten/kernels/funcs/math_function.h"
......
......@@ -452,4 +452,10 @@ endif()
cc_test(scope_guard_test SRCS scope_guard_test.cc)
cc_test(pten_utils_test SRCS pten_utils_test.cc DEPS pten_utils)
if(WITH_GPU OR WITH_ROCM)
cc_library(fluid_convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info)
else()
cc_library(fluid_convert_utils SRCS convert_utils.cc DEPS data_type place)
endif()
cc_test(custom_kernel_test SRCS custom_kernel_test.cc DEPS custom_kernel pten_tensor)
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/convert_utils.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
namespace paddle {
namespace framework {
paddle::experimental::DataType TransToPtenDataType(
const paddle::framework::proto::VarType::Type& dtype) {
// Set the order of case branches according to the frequency with
// the data type is used
switch (dtype) {
case paddle::framework::proto::VarType::FP32:
return DataType::FLOAT32;
case paddle::framework::proto::VarType::FP64:
return DataType::FLOAT64;
case paddle::framework::proto::VarType::INT64:
return DataType::INT64;
case paddle::framework::proto::VarType::INT32:
return DataType::INT32;
case paddle::framework::proto::VarType::INT8:
return DataType::INT8;
case paddle::framework::proto::VarType::UINT8:
return DataType::UINT8;
case paddle::framework::proto::VarType::INT16:
return DataType::INT16;
case paddle::framework::proto::VarType::COMPLEX64:
return DataType::COMPLEX64;
case paddle::framework::proto::VarType::COMPLEX128:
return DataType::COMPLEX128;
case paddle::framework::proto::VarType::FP16:
return DataType::FLOAT16;
case paddle::framework::proto::VarType::BF16:
return DataType::BFLOAT16;
case paddle::framework::proto::VarType::BOOL:
return DataType::BOOL;
default:
return DataType::UNDEFINED;
}
}
paddle::framework::proto::VarType::Type TransToProtoVarType(
const paddle::experimental::DataType& dtype) {
// Set the order of case branches according to the frequency with
// the data type is used
switch (dtype) {
case DataType::FLOAT32:
return paddle::framework::proto::VarType::FP32;
case DataType::FLOAT64:
return paddle::framework::proto::VarType::FP64;
case DataType::INT64:
return paddle::framework::proto::VarType::INT64;
case DataType::INT32:
return paddle::framework::proto::VarType::INT32;
case DataType::INT8:
return paddle::framework::proto::VarType::INT8;
case DataType::UINT8:
return paddle::framework::proto::VarType::UINT8;
case DataType::INT16:
return paddle::framework::proto::VarType::INT16;
case DataType::COMPLEX64:
return paddle::framework::proto::VarType::COMPLEX64;
case DataType::COMPLEX128:
return paddle::framework::proto::VarType::COMPLEX128;
case DataType::FLOAT16:
return paddle::framework::proto::VarType::FP16;
case DataType::BFLOAT16:
return paddle::framework::proto::VarType::BF16;
case DataType::BOOL:
return paddle::framework::proto::VarType::BOOL;
default:
PADDLE_THROW(paddle::platform::errors::Unimplemented(
"Unsupported data type `%s` when casting it into "
"paddle data type.",
dtype));
}
}
size_t DataTypeSize(DataType dtype) {
switch (dtype) {
case DataType::UNDEFINED:
return 0;
case DataType::BOOL:
return sizeof(bool);
case DataType::INT8:
return sizeof(int8_t);
case DataType::UINT8:
return sizeof(uint8_t);
case DataType::INT16:
return sizeof(int16_t);
case DataType::INT32:
return sizeof(int);
case DataType::INT64:
return sizeof(int64_t);
case DataType::BFLOAT16:
return sizeof(paddle::platform::bfloat16);
case DataType::FLOAT16:
return sizeof(paddle::platform::float16);
case DataType::FLOAT32:
return sizeof(float);
case DataType::FLOAT64:
return sizeof(double);
case DataType::COMPLEX64:
return sizeof(paddle::platform::complex<float>);
case DataType::COMPLEX128:
return sizeof(paddle::platform::complex<double>);
default:
return 0;
}
}
DataType String2DataType(const std::string& str) {
if (str == "bool") {
return DataType::BOOL;
} else if (str == "float16") {
return DataType::FLOAT16;
} else if (str == "float32") {
return DataType::FLOAT32;
} else if (str == "float64") {
return DataType::FLOAT64;
} else if (str == "int8") {
return DataType::INT8;
} else if (str == "int16") {
return DataType::INT16;
} else if (str == "int32") {
return DataType::INT32;
} else if (str == "int64") {
return DataType::INT64;
} else if (str == "uint8") {
return DataType::UINT8;
} else if (str == "complex64") {
return DataType::COMPLEX64;
} else if (str == "complex128") {
return DataType::COMPLEX128;
} else {
return DataType::UNDEFINED;
}
}
std::string DataType2String(DataType dtype) {
switch (dtype) {
case DataType::BOOL:
return "bool";
case DataType::INT8:
return "int8";
case DataType::UINT8:
return "uint8";
case DataType::INT16:
return "int16";
case DataType::INT32:
return "int32";
case DataType::INT64:
return "int64";
case DataType::FLOAT16:
return "float16";
case DataType::FLOAT32:
return "float32";
case DataType::FLOAT64:
return "float64";
case DataType::COMPLEX64:
return "complex64";
case DataType::COMPLEX128:
return "complex128";
default:
PADDLE_THROW(paddle::platform::errors::InvalidArgument(
"Unknow pten::DataType, the int value = %d.",
static_cast<int>(dtype)));
return "";
}
}
} // namespace framework
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/pten/common/backend.h"
#include "paddle/pten/common/data_type.h"
#include "paddle/pten/common/layout.h"
#include "paddle/pten/core/tensor_meta.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/platform/place.h"
// TODO(chenweihang): this file may need to be removed
namespace paddle {
namespace framework {
using DataType = paddle::experimental::DataType;
using DataLayout = paddle::experimental::DataLayout;
DataType TransToPtenDataType(
const paddle::framework::proto::VarType::Type& dtype);
paddle::framework::proto::VarType::Type TransToProtoVarType(
const DataType& dtype);
size_t DataTypeSize(DataType dtype);
DataType String2DataType(const std::string& str);
std::string DataType2String(DataType dtype);
} // namespace framework
} // namespace paddle
......@@ -26,6 +26,7 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/op_meta_info_helper.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
......@@ -777,12 +778,14 @@ void RegisterOperatorWithMetaInfo(const std::vector<OpMetaInfo>& op_meta_infos,
std::vector<DataType> vec_custom_dtype;
for (size_t i = 0; i < ctx->InputSize(in_name); ++i) {
auto dtype = ctx->GetInputDataType(in_name, i);
vec_custom_dtype.emplace_back(pten::TransToPtenDataType(dtype));
vec_custom_dtype.emplace_back(
paddle::framework::TransToPtenDataType(dtype));
}
vec_input_dtypes.emplace_back(vec_custom_dtype);
} else {
auto dtype = ctx->GetInputDataType(in_name);
input_dtypes.emplace_back(pten::TransToPtenDataType(dtype));
input_dtypes.emplace_back(
paddle::framework::TransToPtenDataType(dtype));
}
}
......@@ -794,12 +797,14 @@ void RegisterOperatorWithMetaInfo(const std::vector<OpMetaInfo>& op_meta_infos,
auto out_name = op_outputs[i];
if (detail::IsDuplicableVar(out_name)) {
for (size_t j = 0; j < output_dtypes.size(); ++j) {
auto dtype = pten::TransToProtoVarType(output_dtypes[i]);
auto dtype =
paddle::framework::TransToProtoVarType(output_dtypes[i]);
ctx->SetOutputDataType(out_name, dtype, j);
}
} else {
ctx->SetOutputDataType(out_name,
pten::TransToProtoVarType(output_dtypes[i]));
ctx->SetOutputDataType(
out_name,
paddle::framework::TransToProtoVarType(output_dtypes[i]));
}
}
};
......
......@@ -18,6 +18,7 @@
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_reuse.h"
#endif
#include "paddle/fluid/framework/convert_utils.h"
namespace paddle {
namespace framework {
......@@ -79,10 +80,10 @@ void TransDataLayout(const OpKernelType& kernel_type_for_var,
}
out->Resize(make_ddim(dst_dim));
out->mutable_data(expected_kernel_type.place_, in.type());
out->mutable_data(expected_kernel_type.place_, in.dtype());
framework::VisitDataType(
in.type(),
framework::TransToProtoVarType(in.dtype()),
CastDataLayout(pool.Get(expected_kernel_type.place_), axis, in, out));
out->set_layout(expected_kernel_type.data_layout_);
......@@ -153,11 +154,13 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
auto in_tz = paddle::framework::vectorize<int64_t>(in.dims());
auto out_tz = in_tz;
memory::data_type in_type = ToMKLDNNDataType(in.type());
PADDLE_ENFORCE_NE(in_type, memory::data_type::undef,
platform::errors::InvalidArgument(
"Input tensor type (%s) is not supported.",
DataTypeToString(in.type())));
memory::data_type in_type =
ToMKLDNNDataType(framework::TransToProtoVarType(in.dtype()));
PADDLE_ENFORCE_NE(
in_type, memory::data_type::undef,
platform::errors::InvalidArgument(
"Input tensor type (%s) is not supported.",
DataTypeToString(framework::TransToProtoVarType(in.dtype()))));
auto in_format = platform::MKLDNNFormatForSize(in_tz.size(), in.format());
auto out_format =
......@@ -169,8 +172,8 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
if ((in_format != out_format) || always_copy) {
void* in_data = GetDataFromTensor(in, in_type);
platform::ReorderMKLDNNHandler handler(in_tz, in.type(), in_type,
cpu_engine);
platform::ReorderMKLDNNHandler handler(
in_tz, framework::TransToProtoVarType(in.dtype()), in_type, cpu_engine);
auto reorder_src_memory_p = handler.AcquireSrcMemory(in_format, in_data);
auto reorder_dst_memory_p =
......
......@@ -16,6 +16,7 @@
#include <string>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/tensor.h"
TEST(DataType, float16) {
......@@ -27,10 +28,11 @@ TEST(DataType, float16) {
Tensor tensor;
CPUPlace cpu;
tensor.mutable_data(cpu, dtype);
tensor.mutable_data(cpu, f::TransToPtenDataType(dtype));
// test fp16 tensor
EXPECT_EQ(tensor.type(), f::ToDataType(typeid(float16)));
EXPECT_EQ(f::TransToProtoVarType(tensor.dtype()),
f::ToDataType(typeid(float16)));
// test fp16 size
EXPECT_EQ(f::SizeOfType(dtype), 2u);
......@@ -49,10 +51,11 @@ TEST(DataType, bfloat16) {
Tensor tensor;
CPUPlace cpu;
tensor.mutable_data(cpu, dtype);
tensor.mutable_data(cpu, f::TransToPtenDataType(dtype));
// test bf16 tensor
EXPECT_EQ(tensor.type(), f::ToDataType(typeid(bfloat16)));
EXPECT_EQ(f::TransToProtoVarType(tensor.dtype()),
f::ToDataType(typeid(bfloat16)));
// test bf16 size
EXPECT_EQ(f::SizeOfType(dtype), 2u);
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/framework/data_type_transform.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/platform/transform.h"
......@@ -65,12 +66,14 @@ struct CastDataType {
void TransDataType(const OpKernelType& kernel_type_for_var,
const OpKernelType& expected_kernel_type, const Tensor& in,
Tensor* out) {
PADDLE_ENFORCE_EQ(in.type(), kernel_type_for_var.data_type_,
platform::errors::InvalidArgument(
"The src dtype(%s) of input tensor and kernel_type(%s) "
"are not conststent.",
DataTypeToString(in.type()),
DataTypeToString(kernel_type_for_var.data_type_)));
PADDLE_ENFORCE_EQ(
framework::TransToProtoVarType(in.dtype()),
kernel_type_for_var.data_type_,
platform::errors::InvalidArgument(
"The src dtype(%s) of input tensor and kernel_type(%s) "
"are not conststent.",
DataTypeToString(framework::TransToProtoVarType(in.dtype())),
DataTypeToString(kernel_type_for_var.data_type_)));
auto dst_type = expected_kernel_type.data_type_;
TransDataType(in, dst_type, out);
}
......@@ -81,7 +84,7 @@ void TransDataType(const Tensor& in,
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
out->Resize(in.dims());
auto src_type = in.type();
auto src_type = framework::TransToProtoVarType(in.dtype());
auto dst_type = type;
auto ctx = pool.Get(in.place());
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/framework/details/all_reduce_op_handle.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/platform/place.h"
......@@ -127,7 +128,7 @@ void AllReduceOpHandle::AllReduceImpl(
platform::errors::PreconditionNotMet(
"The numel of tensor %s should be > 0, but got numel is %d.",
in_var_handles[i]->name(), numel));
dtype = lod_tensor.type();
dtype = framework::TransToProtoVarType(lod_tensor.dtype());
is_gpu_place = platform::is_gpu_place(lod_tensor.place());
#if defined(PADDLE_WITH_XPU_BKCL)
is_xpu_place = platform::is_xpu_place(lod_tensor.place());
......@@ -139,7 +140,7 @@ void AllReduceOpHandle::AllReduceImpl(
"The size of tensors of the same variable in different local "
"scopes should be equal."));
PADDLE_ENFORCE_EQ(
dtype, lod_tensor.type(),
dtype, framework::TransToProtoVarType(lod_tensor.dtype()),
platform::errors::PreconditionNotMet(
"The dtype of tensors of the same variable in different local "
"scopes should be equal."));
......@@ -227,14 +228,15 @@ void AllReduceOpHandle::AllReduceFunc(
// Reduce All Tensor to trg in CPU
ReduceBufferData func(lod_tensor_data, trg.data(), numel);
VisitDataType(trg.type(), func);
VisitDataType(framework::TransToProtoVarType(trg.dtype()), func);
for (size_t i = 1; i < local_exec_scopes_.size(); ++i) {
auto &scope = local_exec_scopes_[i];
auto &p = places[i];
auto *var = scope->FindVar(out_var_names[i]);
size_t size = numel * SizeOfType(trg.type());
size_t size =
numel * SizeOfType(framework::TransToProtoVarType(trg.dtype()));
RunAndRecordEvent(p, [&trg, var, p, size] {
auto dst_ptr = var->GetMutable<framework::LoDTensor>()->data();
platform::CPUPlace cpu_place;
......
......@@ -14,6 +14,7 @@
#include "paddle/fluid/framework/details/broadcast_op_handle.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/platform/place.h"
......@@ -87,7 +88,8 @@ void BroadcastOpHandle::BroadcastOneVar(
int root_id = in_tensor.place().device;
std::vector<std::function<void()>> broadcast_calls;
int type = platform::ToNCCLDataType(in_tensor.type());
int type = platform::ToNCCLDataType(
framework::TransToProtoVarType(in_tensor.dtype()));
size_t numel = static_cast<size_t>(in_tensor.numel());
for (auto out_var_handle : out_var_handles) {
......@@ -147,7 +149,8 @@ void BroadcastOpHandle::BroadcastOneVar(
int root_id = in_tensor.place().device;
std::vector<std::function<void()>> broadcast_calls;
int type = platform::ToBKCLDataType(in_tensor.type());
int type = platform::ToBKCLDataType(
framework::TransToProtoVarType(in_tensor.dtype()));
size_t numel = static_cast<size_t>(in_tensor.numel());
for (auto out_var_handle : out_var_handles) {
......@@ -239,7 +242,7 @@ void BroadcastOpHandle::InitOutputValue(
}
VariableVisitor::ShareDimsAndLoD(*in_var, out_var);
VariableVisitor::GetMutableTensor(out_var).mutable_data(t_out_p,
in_tensor.type());
in_tensor.dtype());
}
}
......
......@@ -16,6 +16,7 @@
#include <string>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/profiler.h"
namespace paddle {
......@@ -49,7 +50,7 @@ static void CheckTensorAttrs(const LoDTensor *tensor,
if (tensor->numel() && tensor->IsInitialized()) {
// step1: check type
PADDLE_ENFORCE_EQ(
type, tensor->type(),
type, framework::TransToProtoVarType(tensor->dtype()),
platform::errors::InvalidArgument(
"The data type of fetched Tensors or the items of fetched "
"LoDTensorArray are different from each other on different "
......@@ -57,7 +58,7 @@ static void CheckTensorAttrs(const LoDTensor *tensor,
"(th) fetched variable. Please set the "
"parameter `return_merged = False` when you "
"call the `Executor.run()` method.",
DataTypeToString(type), DataTypeToString(tensor->type()), offset));
DataTypeToString(type), tensor->dtype(), offset));
// step2: check layout
PADDLE_ENFORCE_EQ(
......@@ -139,7 +140,7 @@ void FetchAsyncOpHandle::FetchMergedLodTensor(
for (auto *t : src_lodtensors) {
if (t->numel() && t->IsInitialized()) {
check_dim = t->dims();
new_type = t->type();
new_type = paddle::framework::TransToProtoVarType(t->dtype());
new_layout = t->layout();
break;
}
......@@ -169,10 +170,10 @@ void FetchAsyncOpHandle::FetchMergedLodTensor(
dst_lodtensor->set_lod(src_lodtensors[0]->lod());
if (platform::is_gpu_place(src_lodtensors[0]->place())) {
dst_lodtensor->mutable_data(platform::CUDAPinnedPlace(),
src_lodtensors[0]->type());
src_lodtensors[0]->dtype());
} else {
dst_lodtensor->mutable_data(platform::CPUPlace(),
src_lodtensors[0]->type());
src_lodtensors[0]->dtype());
}
// slice and memcpy
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/framework/details/fused_all_reduce_op_handle.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/platform/device_memory_aligment.h"
......@@ -337,7 +338,8 @@ void FusedAllReduceOpHandle::GetDTypeAndNumel(
size_t size_of_dtype = 0;
for (size_t i = 0; i < grad_tensor.size(); ++i) {
// Get dtype
auto ele_dtype = grad_tensor.at(i).second->type();
auto ele_dtype =
framework::TransToProtoVarType(grad_tensor.at(i).second->dtype());
if (i == 0) {
*dtype = ele_dtype;
size_of_dtype = framework::SizeOfType(ele_dtype);
......
......@@ -115,7 +115,7 @@ void GatherOpHandle::RunImpl() {
DDim out_dim = pre_in_value.GetCompleteDims();
out_dim[0] = static_cast<int64_t>(rows);
out_value->mutable_value()->Resize(out_dim).mutable_data(
t_out_p, pre_in_value.value().type());
t_out_p, pre_in_value.value().dtype());
Tensor *out_tensor = out_value->mutable_value();
// copy
......
......@@ -19,6 +19,7 @@
#ifdef PADDLE_WITH_ASCEND_CL
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#endif
#include "paddle/fluid/framework/convert_utils.h"
namespace paddle {
namespace framework {
......@@ -307,7 +308,7 @@ void tensor_check<platform::CPUDeviceContext>(const std::string& op_type,
const platform::Place& place) {
TensorCheckerVisitor<platform::CPUDeviceContext> vistor(op_type, var_name,
tensor, place);
VisitDataType(tensor.type(), vistor);
VisitDataType(framework::TransToProtoVarType(tensor.dtype()), vistor);
}
void CheckVarHasNanOrInf(const std::string& op_type,
......@@ -348,7 +349,8 @@ void CheckVarHasNanOrInf(const std::string& op_type,
return;
} else if (platform::is_xpu_place(tensor->place())) {
#ifdef PADDLE_WITH_XPU
if (tensor->type() != proto::VarType::FP32) {
if (framework::TransToProtoVarType(tensor->dtype()) !=
proto::VarType::FP32) {
return;
}
......@@ -377,14 +379,15 @@ void CheckVarHasNanOrInf(const std::string& op_type,
return;
} else if (platform::is_npu_place(tensor->place())) {
#ifdef PADDLE_WITH_ASCEND_CL
if (tensor->type() != proto::VarType::FP32) {
if (framework::TransToProtoVarType(tensor->dtype()) !=
proto::VarType::FP32) {
return;
}
framework::LoDTensor cpu_tensor;
cpu_tensor.Resize(tensor->dims());
float* cpu_data = static_cast<float*>(
cpu_tensor.mutable_data(platform::CPUPlace(), tensor->type()));
cpu_tensor.mutable_data(platform::CPUPlace(), tensor->dtype()));
framework::TensorCopySync(*tensor, platform::CPUPlace(), &cpu_tensor);
bool flag = false;
......@@ -475,8 +478,10 @@ void PrintNpuVarInfo(const std::string& op_type, const std::string& var_name,
return;
}
if ((tensor->type() != proto::VarType::FP32) &&
(tensor->type() != proto::VarType::FP16)) {
if ((framework::TransToProtoVarType(tensor->dtype()) !=
proto::VarType::FP32) &&
(framework::TransToProtoVarType(tensor->dtype()) !=
proto::VarType::FP16)) {
return;
}
......@@ -490,16 +495,17 @@ void PrintNpuVarInfo(const std::string& op_type, const std::string& var_name,
framework::Tensor cpu_tensor;
cpu_tensor.Resize(tensor->dims());
cpu_tensor.mutable_data(platform::CPUPlace(), tensor->type());
cpu_tensor.mutable_data(platform::CPUPlace(), tensor->dtype());
framework::TensorCopySync(*tensor, platform::CPUPlace(), &cpu_tensor);
LOG(WARNING) << "print [" << var_name << "] tensor info:";
// use env strategy control in future, -1=print_all.
int print_num = 3;
if (tensor->type() == proto::VarType::FP32) {
if (framework::TransToProtoVarType(tensor->dtype()) == proto::VarType::FP32) {
const float* value = cpu_tensor.data<float>();
PrintNanInf(value, tensor->numel(), print_num, op_type, var_name, false);
} else if (tensor->type() == proto::VarType::FP16) {
} else if (framework::TransToProtoVarType(tensor->dtype()) ==
proto::VarType::FP16) {
const paddle::platform::float16* value =
cpu_tensor.data<paddle::platform::float16>();
PrintNanInf(value, tensor->numel(), print_num, op_type, var_name, false);
......
......@@ -19,6 +19,7 @@
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/convert_utils.h"
namespace paddle {
namespace framework {
......@@ -208,7 +209,7 @@ void tensor_check<platform::CUDADeviceContext>(const std::string& op_type,
TensorCheckerVisitor<platform::CUDADeviceContext> vistor(op_type, var_name,
tensor, place);
VisitDataType(tensor.type(), vistor);
VisitDataType(framework::TransToProtoVarType(tensor.dtype()), vistor);
}
} // namespace details
......
......@@ -130,7 +130,8 @@ struct GatherLocalSelectedRowsFunctor {
DDim out_dim = pre_in->GetCompleteDims();
out_dim[0] = static_cast<int64_t>(rows);
dst_tensor.mutable_value()->Resize(out_dim);
dst_tensor.mutable_value()->mutable_data(out_place, pre_in->value().type());
dst_tensor.mutable_value()->mutable_data(out_place,
pre_in->value().dtype());
}
void operator()() {
......
......@@ -14,6 +14,7 @@
#include "paddle/fluid/framework/details/reduce_op_handle.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
......@@ -150,7 +151,8 @@ void ReduceOpHandle::RunImpl() {
if (!FLAGS_cpu_deterministic) {
ReduceLoDTensor func(lod_tensors,
out_var->GetMutable<framework::LoDTensor>());
VisitDataType(lod_tensors[0]->type(), func);
VisitDataType(framework::TransToProtoVarType(lod_tensors[0]->dtype()),
func);
} else {
// We sum lod_tensors to reduce_sum_trg which is in local_scopes_0
// here, but it doesn't mean reduce_sum_trg must be in local_scopes_0.
......@@ -158,7 +160,8 @@ void ReduceOpHandle::RunImpl() {
->FindVar(out_var_handle->name())
->GetMutable<framework::LoDTensor>();
ReduceLoDTensor func(lod_tensors, &reduce_sum_trg);
VisitDataType(lod_tensors[0]->type(), func);
VisitDataType(framework::TransToProtoVarType(lod_tensors[0]->dtype()),
func);
auto trg = out_var->GetMutable<framework::LoDTensor>();
if (reduce_sum_trg.data() != trg->data()) {
......@@ -171,7 +174,7 @@ void ReduceOpHandle::RunImpl() {
auto pre_in = pre_in_var->Get<framework::LoDTensor>();
VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var);
VariableVisitor::GetMutableTensor(out_var).mutable_data(
out_var_handle->place(), pre_in.type());
out_var_handle->place(), pre_in.dtype());
auto out_p = out_var_handle->place();
int root_id = out_p.device;
......@@ -191,7 +194,8 @@ void ReduceOpHandle::RunImpl() {
out_var_handle->place());
}
int type = platform::ToNCCLDataType(lod_tensor.type());
int type = platform::ToNCCLDataType(
framework::TransToProtoVarType(lod_tensor.dtype()));
size_t numel = static_cast<size_t>(lod_tensor.numel());
all_reduce_calls.emplace_back(
[buffer, recvbuffer, type, numel, root_id, &nccl_ctx] {
......@@ -217,7 +221,7 @@ void ReduceOpHandle::RunImpl() {
auto pre_in = pre_in_var->Get<framework::LoDTensor>();
VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var);
VariableVisitor::GetMutableTensor(out_var).mutable_data(
out_var_handle->place(), pre_in.type());
out_var_handle->place(), pre_in.dtype());
auto out_p = out_var_handle->place();
int root_id = out_p.device;
......@@ -237,7 +241,8 @@ void ReduceOpHandle::RunImpl() {
out_var_handle->place());
}
int type = platform::ToBKCLDataType(lod_tensor.type());
int type = platform::ToBKCLDataType(
framework::TransToProtoVarType(lod_tensor.dtype()));
size_t numel = static_cast<size_t>(lod_tensor.numel());
all_reduce_calls.emplace_back([buffer, recvbuffer, type, numel, root_id,
&bkcl_ctx] {
......
......@@ -17,6 +17,7 @@
#include <utility>
#include "dgc/dgc.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
......@@ -151,7 +152,9 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
auto &out = *outs[i];
float *out_tensor_buf = out.data<float>();
dtype = (dtype == -1) ? platform::ToNCCLDataType(in.type()) : dtype;
dtype = (dtype == -1) ? platform::ToNCCLDataType(
framework::TransToProtoVarType(in.dtype()))
: dtype;
in_numel = (in_numel == 0) ? static_cast<size_t>(in.numel()) : in_numel;
PADDLE_ENFORCE_EQ(in_numel % 2, 0,
platform::errors::InvalidArgument(
......
......@@ -14,6 +14,7 @@
#include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/selected_rows_utils.h"
namespace pten {
......@@ -115,7 +116,7 @@ struct EnforceShapeAndDTypeEQVisitor {
"The place type of the two variables is not equal. The src place "
"is %s, but the dst place is %s",
src.place().DebugString(), tensor.place().DebugString()));
PADDLE_ENFORCE_EQ(src.type(), tensor.type(),
PADDLE_ENFORCE_EQ(src.dtype(), tensor.dtype(),
platform::errors::PreconditionNotMet(
"The dtype of the two variables is not equal."));
PADDLE_ENFORCE_EQ(
......
......@@ -14,6 +14,8 @@ limitations under the License. */
#include "paddle/fluid/framework/device_worker.h"
#include "paddle/fluid/framework/convert_utils.h"
namespace pten {
class DenseTensor;
} // namespace pten
......@@ -58,11 +60,13 @@ std::string PrintLodTensorIntType(Tensor* tensor, int64_t start, int64_t end) {
std::string PrintLodTensor(Tensor* tensor, int64_t start, int64_t end) {
std::string out_val;
if (tensor->type() == proto::VarType::FP32) {
if (framework::TransToProtoVarType(tensor->dtype()) == proto::VarType::FP32) {
out_val = PrintLodTensorType<float>(tensor, start, end);
} else if (tensor->type() == proto::VarType::INT64) {
} else if (framework::TransToProtoVarType(tensor->dtype()) ==
proto::VarType::INT64) {
out_val = PrintLodTensorIntType(tensor, start, end);
} else if (tensor->type() == proto::VarType::FP64) {
} else if (framework::TransToProtoVarType(tensor->dtype()) ==
proto::VarType::FP64) {
out_val = PrintLodTensorType<double>(tensor, start, end);
} else {
out_val = "unsupported type";
......
......@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/device_worker_factory.h"
#include "paddle/fluid/framework/trainer.h"
......@@ -153,12 +154,13 @@ void DistMultiTrainer::Finalize() {
}
#define MergeCallback(cpp_type, proto_type) \
do { \
if (root_tensor->type() == proto_type) { \
if (thread_tensor->type() != proto_type) { \
if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
if (framework::TransToProtoVarType(thread_tensor->dtype()) != \
proto_type) { \
VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \
<< "] " << need_merge_var_names_[i] \
<< ", root tensor type=" << root_tensor->type() \
<< ", thread tensor type=" << thread_tensor->type(); \
<< ", root tensor type=" << root_tensor->dtype() \
<< ", thread tensor type=" << thread_tensor->dtype(); \
exit(-1); \
} \
MergeToRootScope<cpp_type>(root_tensor, thread_tensor); \
......
......@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/dlpack_tensor.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_type.h"
namespace paddle {
......@@ -134,7 +135,8 @@ DLPackTensor::DLPackTensor(const Tensor &tensor, LaneType lanes) {
t_.device = paddle::platform::VisitPlace(place, internal::DLDeviceVisitor());
// init dtype
t_.dtype = internal::GetDLDataTypeFromTypeIndex(tensor.type());
t_.dtype = internal::GetDLDataTypeFromTypeIndex(
framework::TransToProtoVarType(tensor.dtype()));
t_.dtype.lanes = lanes;
// init ndim, tensor rank
......
......@@ -20,6 +20,7 @@ limitations under the License. */
#include "google/protobuf/text_format.h"
#include "gflags/gflags.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/lod_rank_table.h"
......@@ -235,16 +236,16 @@ void print_lod_tensor(std::string var_name, const LoDTensor& lod_tensor) {
static void print_fetch_var(Scope* scope, const std::string& var_name) {
auto& tensor = scope->FindVar(var_name)->Get<LoDTensor>();
#define PrintLoDTensorCallback(cpp_type, proto_type) \
do { \
if (tensor.type() == proto_type) { \
print_lod_tensor<cpp_type>(var_name, tensor); \
return; \
} \
#define PrintLoDTensorCallback(cpp_type, proto_type) \
do { \
if (framework::TransToProtoVarType(tensor.dtype()) == proto_type) { \
print_lod_tensor<cpp_type>(var_name, tensor); \
return; \
} \
} while (0)
_ForEachDataType_(PrintLoDTensorCallback);
VLOG(1) << "print_fetch_var: unrecognized data type:" << tensor.type();
VLOG(1) << "print_fetch_var: unrecognized data type:" << tensor.dtype();
}
void ExecutorThreadWorker::TrainFilesWithTimer() {
......
......@@ -22,6 +22,7 @@ limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
......@@ -146,13 +147,16 @@ class AscendInstance {
// }
ge::Shape shape(vec_dim);
ge::TensorDesc tensor_desc(shape, ge::Format::FORMAT_ND,
VarTypeToGeType(tensor->type()));
ge::TensorDesc tensor_desc(
shape, ge::Format::FORMAT_ND,
VarTypeToGeType(framework::TransToProtoVarType(tensor->dtype())));
tensor_desc.SetRealDimCnt(vec_dim.size());
const uint8_t *data = reinterpret_cast<const uint8_t *>(tensor->data());
std::vector<uint8_t> dst(numel * GeTypeSize(tensor->type()));
memcpy(dst.data(), data, GeTypeSize(tensor->type()) * numel);
std::vector<uint8_t> dst(
numel * GeTypeSize(framework::TransToProtoVarType(tensor->dtype())));
memcpy(dst.data(), data,
GeTypeSize(framework::TransToProtoVarType(tensor->dtype())) * numel);
ge::Tensor ge_tensor(tensor_desc, dst);
return ge_tensor;
}
......
......@@ -28,6 +28,7 @@ limitations under the License. */
#include "paddle/fluid/framework/fleet/heter_wrapper.h"
#if defined(PADDLE_WITH_PSLIB) && !defined(PADDLE_WITH_HETERPS)
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/device_worker.h"
namespace paddle {
......@@ -90,7 +91,8 @@ void HeterWrapper::SerializeToReq(const std::string& varname, Scope* scope,
LoDTensor* tensor = var->GetMutable<LoDTensor>();
req_var->set_varname(varname);
req_var->set_type(LOD_TENSOR);
req_var->set_data_type(static_cast<VariableMessage::Type>(tensor->type()));
req_var->set_data_type(static_cast<VariableMessage::Type>(
framework::TransToProtoVarType(tensor->dtype())));
for (auto& dim : framework::vectorize(tensor->dims())) {
req_var->add_dims(dim);
......@@ -108,21 +110,27 @@ void HeterWrapper::SerializeToReq(const std::string& varname, Scope* scope,
auto* req_data = req_var->mutable_data();
req_data->clear();
req_data->resize(tensor->numel() * SizeOfType(tensor->type()));
req_data->resize(tensor->numel() *
SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
char* data_ptr = const_cast<char*>(req_data->data());
if (platform::is_cpu_place(tensor->place())) {
memcpy(data_ptr, tensor->data(),
tensor->numel() * SizeOfType(tensor->type()));
tensor->numel() *
SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
} else {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
memory::Copy(platform::CPUPlace(), data_ptr, tensor->place(),
tensor->data(), tensor->numel() * SizeOfType(tensor->type()),
nullptr);
memory::Copy(
platform::CPUPlace(), data_ptr, tensor->place(), tensor->data(),
tensor->numel() *
SizeOfType(framework::TransToProtoVarType(tensor->dtype())),
nullptr);
#endif
#ifdef PADDLE_WITH_XPU
memory::Copy(platform::CPUPlace(), data_ptr, tensor->place(),
tensor->data(), tensor->numel() * SizeOfType(tensor->type()));
memory::Copy(
platform::CPUPlace(), data_ptr, tensor->place(), tensor->data(),
tensor->numel() *
SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
#endif
}
}
......@@ -152,15 +160,18 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope,
}
tensor->set_lod(lod);
void* tensor_data =
tensor->mutable_data(place, ToVarType(req_var.data_type()));
void* tensor_data = tensor->mutable_data(
place, framework::TransToPtenDataType(ToVarType(req_var.data_type())));
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
memory::Copy(place, tensor_data, platform::CPUPlace(), req_var.data().data(),
tensor->numel() * SizeOfType(tensor->type()), stream);
tensor->numel() *
SizeOfType(framework::TransToProtoVarType(tensor->dtype())),
stream);
#else
memcpy(tensor_data, req_var.data().data(),
tensor->numel() * SizeOfType(tensor->type()));
tensor->numel() *
SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
#endif
}
#endif
......@@ -190,15 +201,17 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope,
}
tensor->set_lod(lod);
void* tensor_data =
tensor->mutable_data(place, ToVarType(req_var.data_type()));
void* tensor_data = tensor->mutable_data(
place, framework::TransToPtenDataType(ToVarType(req_var.data_type())));
#ifdef PADDLE_WITH_XPU
memory::Copy(place, tensor_data, platform::CPUPlace(), req_var.data().data(),
tensor->numel() * SizeOfType(tensor->type()));
tensor->numel() *
SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
#else
memcpy(tensor_data, req_var.data().data(),
tensor->numel() * SizeOfType(tensor->type()));
tensor->numel() *
SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
#endif
}
......
......@@ -11,7 +11,9 @@ limitations under the License. */
#if defined(PADDLE_WITH_PSCORE)
#include <float.h>
#include "paddle/fluid/distributed/ps/service/heter_server.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/device_worker.h"
#include "paddle/fluid/framework/executor_gc_helper.h"
#include "paddle/fluid/platform/cpu_helper.h"
......@@ -35,21 +37,23 @@ void SetMicroId(paddle::framework::Scope* scope,
auto* tensor = var->GetMutable<framework::LoDTensor>();
std::vector<int> dims{1};
tensor->Resize(framework::make_ddim(dims));
void* tensor_data =
tensor->mutable_data(place, framework::proto::VarType::FP32);
void* tensor_data = tensor->mutable_data(
place, framework::TransToPtenDataType(framework::proto::VarType::FP32));
if (platform::is_gpu_place(place)) {
#ifdef PADDLE_WITH_CUDA
std::vector<char> temp;
temp.resize(tensor->numel() * framework::SizeOfType(tensor->type()));
temp.resize(tensor->numel() * framework::DataTypeSize(tensor->dtype()));
char* temp_ptr = temp.data();
float* temp_ptr_float = reinterpret_cast<float*>(temp_ptr);
temp_ptr_float[0] = micro_id;
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(*dev_ctx).stream();
memory::Copy(place, tensor_data, platform::CPUPlace(),
reinterpret_cast<void*>(temp_ptr),
tensor->numel() * framework::SizeOfType(tensor->type()),
stream);
memory::Copy(
place, tensor_data, platform::CPUPlace(),
reinterpret_cast<void*>(temp_ptr),
tensor->numel() * framework::SizeOfType(
framework::TransToProtoVarType(tensor->dtype())),
stream);
#endif
} else {
float* temp_ptr = reinterpret_cast<float*>(tensor_data);
......
......@@ -17,6 +17,7 @@ limitations under the License. */
#include <string>
#include <vector>
#include "io/fs.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_feed_factory.h"
#include "paddle/fluid/framework/data_set.h"
#include "paddle/fluid/framework/device_worker_factory.h"
......@@ -136,18 +137,18 @@ void HeterXpuTrainer::CreateThreadParam(const ProgramDesc& program, int num) {
InitializeVariable(ptr, proto::VarType::LOD_TENSOR);
LoDTensor* thread_tensor = ptr->GetMutable<LoDTensor>();
#define HeterMemcpyFunc(cpp_type, proto_type) \
do { \
if (root_tensor->type() == proto_type) { \
HeterMemCpy<cpp_type>(thread_tensor, root_tensor, place, stream); \
} \
#define HeterMemcpyFunc(cpp_type, proto_type) \
do { \
if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
HeterMemCpy<cpp_type>(thread_tensor, root_tensor, place, stream); \
} \
} while (0)
#define HeterMemcpyXpuFunc(cpp_type, proto_type) \
do { \
if (root_tensor->type() == proto_type) { \
HeterMemCpy<cpp_type>(thread_tensor, root_tensor, place); \
} \
#define HeterMemcpyXpuFunc(cpp_type, proto_type) \
do { \
if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
HeterMemCpy<cpp_type>(thread_tensor, root_tensor, place); \
} \
} while (0)
#ifdef PADDLE_WITH_CUDA
_ForEachDataType_(HeterMemcpyFunc);
......@@ -318,12 +319,13 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
// }
#define MergeCallback(cpp_type, proto_type) \
do { \
if (root_tensor->type() == proto_type) { \
if (thread_tensor->type() != proto_type) { \
if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
if (framework::TransToProtoVarType(thread_tensor->dtype()) != \
proto_type) { \
VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \
<< "] " << need_merge_var_names_[i] \
<< ", root tensor type=" << root_tensor->type() \
<< ", thread tensor type=" << thread_tensor->type(); \
<< ", root tensor type=" << root_tensor->dtype() \
<< ", thread tensor type=" << thread_tensor->dtype(); \
exit(-1); \
} \
MergeToRootScope<cpp_type>(root_tensor, thread_tensor); \
......@@ -334,8 +336,10 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
#ifdef PADDLE_WITH_CUDA
auto dev_id = thread_tensor->place().device;
platform::CUDADeviceGuard guard(dev_id);
cudaMemset(thread_tensor->data(), 0,
thread_tensor->numel() * SizeOfType(thread_tensor->type()));
cudaMemset(
thread_tensor->data(), 0,
thread_tensor->numel() * SizeOfType(framework::TransToProtoVarType(
thread_tensor->dtype())));
#endif
#ifdef PADDLE_WITH_XPU
auto place = thread_tensor->place();
......@@ -346,12 +350,16 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
platform::DeviceContext* dev_ctx = pool.Get(place);
const platform::XPUDeviceContext* xpu_ctx =
reinterpret_cast<const platform::XPUDeviceContext*>(dev_ctx);
xpu::memset(xpu_ctx->x_context(), thread_tensor->data(), 0,
thread_tensor->numel() * SizeOfType(thread_tensor->type()));
xpu::memset(
xpu_ctx->x_context(), thread_tensor->data(), 0,
thread_tensor->numel() * SizeOfType(framework::TransToProtoVarType(
thread_tensor->dtype())));
#endif
} else {
memset(thread_tensor->data(), 0,
thread_tensor->numel() * SizeOfType(thread_tensor->type()));
thread_tensor->numel() *
SizeOfType(
framework::TransToProtoVarType(thread_tensor->dtype())));
}
}
auto* merge_var = response->add_vars();
......@@ -361,8 +369,10 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
#ifdef PADDLE_WITH_CUDA
auto dev_id = root_tensor->place().device;
platform::CUDADeviceGuard guard(dev_id);
cudaMemset(root_tensor->data(), 0,
root_tensor->numel() * SizeOfType(root_tensor->type()));
cudaMemset(
root_tensor->data(), 0,
root_tensor->numel() *
SizeOfType(framework::TransToProtoVarType(root_tensor->dtype())));
#endif
#ifdef PADDLE_WITH_XPU
auto place = root_tensor->place();
......@@ -373,12 +383,15 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
platform::DeviceContext* dev_ctx = pool.Get(place);
const platform::XPUDeviceContext* xpu_ctx =
reinterpret_cast<const platform::XPUDeviceContext*>(dev_ctx);
xpu::memset(xpu_ctx->x_context(), root_tensor->data(), 0,
root_tensor->numel() * SizeOfType(root_tensor->type()));
xpu::memset(
xpu_ctx->x_context(), root_tensor->data(), 0,
root_tensor->numel() *
SizeOfType(framework::TransToProtoVarType(root_tensor->dtype())));
#endif
} else {
memset(root_tensor->data(), 0,
root_tensor->numel() * SizeOfType(root_tensor->type()));
root_tensor->numel() * SizeOfType(framework::TransToProtoVarType(
root_tensor->dtype())));
}
}
return 0;
......
......@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <ctime>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/device_worker.h"
#include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h"
......@@ -79,11 +81,11 @@ void HogwildWorker::CreateThreadScope(const ProgramDesc &program) {
LoDTensor *thread_tensor = ptr1->GetMutable<LoDTensor>();
LoDTensor *root_tensor =
root_scope_->FindVar(var->Name())->GetMutable<LoDTensor>();
#define MemsetCallback(cpp_type, proto_type) \
do { \
if (root_tensor->type() == proto_type) { \
SetZero<cpp_type>(thread_tensor, root_tensor, tensor_dim); \
} \
#define MemsetCallback(cpp_type, proto_type) \
do { \
if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
SetZero<cpp_type>(thread_tensor, root_tensor, tensor_dim); \
} \
} while (0)
_ForEachDataType_(MemsetCallback);
}
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/pten_utils.h"
#include "paddle/fluid/platform/enforce.h"
......@@ -134,7 +135,7 @@ class CompatMetaTensor : public pten::MetaTensor {
}
} else {
auto* var = BOOST_GET_CONST(VarDesc*, var_);
return pten::TransToPtenDataType(var->GetDataType());
return paddle::framework::TransToPtenDataType(var->GetDataType());
}
}
......@@ -183,7 +184,7 @@ class CompatMetaTensor : public pten::MetaTensor {
}
} else {
auto* var = BOOST_GET(VarDesc*, var_);
var->SetDataType(pten::TransToProtoVarType(dtype));
var->SetDataType(paddle::framework::TransToProtoVarType(dtype));
}
}
......
......@@ -16,6 +16,7 @@
#include <cmath>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/op_version_registry.h"
namespace pten {
......@@ -216,7 +217,8 @@ void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const {
patterns::PDNodeName(name_scope_, "eltwise_y_in"));
// Set shape && datatype manually
eltwise_y_in_desc.SetShape(framework::vectorize(ac_bias_tensor->dims()));
eltwise_y_in_desc.SetDataType(ac_bias_tensor->type());
eltwise_y_in_desc.SetDataType(
framework::TransToProtoVarType(ac_bias_tensor->dtype()));
eltwise_y_in_desc.SetLoDLevel(ac_bias->Var()->GetLoDLevel());
eltwise_y_in_desc.SetPersistable(true);
......
......@@ -16,6 +16,7 @@
#include <string>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h"
......@@ -285,7 +286,8 @@ void ConvBNFusePass::ApplyImpl(ir::Graph* graph) const {
VarDesc eltwise_y_in_desc(
patterns::PDNodeName("fuse_conv_bn", conv_type() + "_eltwise_y_in"));
eltwise_y_in_desc.SetShape(framework::vectorize(bn_bias_tensor->dims()));
eltwise_y_in_desc.SetDataType(bn_bias_tensor->type());
eltwise_y_in_desc.SetDataType(
framework::TransToProtoVarType(bn_bias_tensor->dtype()));
eltwise_y_in_desc.SetLoDLevel(bn_bias->Var()->GetLoDLevel());
eltwise_y_in_desc.SetPersistable(true);
auto* eltwise_y_in_node = g->CreateVarNode(&eltwise_y_in_desc);
......@@ -531,7 +533,8 @@ void ConvEltwiseAddBNFusePass::ApplyImpl(ir::Graph* graph) const {
name_scope_, "eltwise_y_in" + std::to_string(found_conv_bn_count)));
eltwise_y_in_desc.SetShape(
framework::vectorize(eltwise_y_in_tensor->dims()));
eltwise_y_in_desc.SetDataType(eltwise_y_in_tensor->type());
eltwise_y_in_desc.SetDataType(
framework::TransToProtoVarType(eltwise_y_in_tensor->dtype()));
eltwise_y_in_desc.SetLoDLevel(eltwise_y_in->Var()->GetLoDLevel());
eltwise_y_in_desc.SetPersistable(true);
auto* eltwise_y_in_node = g->CreateVarNode(&eltwise_y_in_desc);
......
......@@ -14,6 +14,7 @@
#include <vector>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/layer_norm_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
......@@ -273,10 +274,11 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const {
"but has %s elements.",
eps_tensor->numel()));
CHECK_TRUE(
eps_tensor->type() == proto::VarType::FP32,
framework::TransToProtoVarType(eps_tensor->dtype()) ==
proto::VarType::FP32,
::paddle::string::Sprintf("The LayerNorm divisor epsilon value "
"must be of FP32 data type, but is %s.",
eps_tensor->type()));
eps_tensor->dtype()));
CHECK_TRUE(validateReduceOpAttrs(x_mean, x_shape, "input mean"),
"Validation of input mean node failed.");
......@@ -333,7 +335,8 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const {
auto* gamma_tensor = scope->FindVar(gamma->Name())->GetMutable<LoDTensor>();
VarDesc new_gamma_desc(patterns::PDNodeName("layer_norm_fuse", "Scale"));
new_gamma_desc.SetShape({layer_norm_x_mat_dims[1]});
new_gamma_desc.SetDataType(gamma_tensor->type());
new_gamma_desc.SetDataType(
framework::TransToProtoVarType(gamma_tensor->dtype()));
new_gamma_desc.SetLoDLevel(gamma->Var()->GetLoDLevel());
new_gamma_desc.SetPersistable(true);
auto* new_gamma_node = g->CreateVarNode(&new_gamma_desc);
......@@ -347,7 +350,8 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const {
auto* beta_tensor = scope->FindVar(beta->Name())->GetMutable<LoDTensor>();
VarDesc new_beta_desc(patterns::PDNodeName("layer_norm_fuse", "Bias"));
new_beta_desc.SetShape({layer_norm_x_mat_dims[1]});
new_beta_desc.SetDataType(beta_tensor->type());
new_beta_desc.SetDataType(
framework::TransToProtoVarType(beta_tensor->dtype()));
new_beta_desc.SetLoDLevel(beta->Var()->GetLoDLevel());
new_beta_desc.SetPersistable(true);
auto* new_beta_node = g->CreateVarNode(&new_beta_desc);
......
......@@ -95,7 +95,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
const char* var_name) {
auto x = scope->Var(var_name);
auto tensor = x->GetMutable<LoDTensor>();
tensor->mutable_data(place, proto::VarType::FP32, 1);
tensor->mutable_data(place,
framework::TransToPtenDataType(proto::VarType::FP32), 1);
}
void MainTest(bool convWithExistingBias) {
......
......@@ -125,7 +125,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
const char* var_name) {
auto x = scope->Var(var_name);
auto tensor = x->GetMutable<LoDTensor>();
tensor->mutable_data(place, proto::VarType::FP32, 1);
tensor->mutable_data(place,
framework::TransToPtenDataType(proto::VarType::FP32), 1);
}
void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
......
......@@ -438,7 +438,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
const char* var_name) {
auto x = scope->Var(var_name);
auto tensor = x->GetMutable<LoDTensor>();
tensor->mutable_data(place, proto::VarType::FP32, 1);
tensor->mutable_data(place,
framework::TransToPtenDataType(proto::VarType::FP32), 1);
}
void PrepareGraph(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog) {
......
......@@ -16,6 +16,7 @@ limitations under the License. */
#include <stdint.h>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/version.h"
namespace paddle {
......@@ -327,7 +328,7 @@ std::vector<LoDTensor> SplitLoDTensor(
for (size_t i = 0; i < places.size(); ++i) {
LoDTensor dst;
dst.Resize(src.dims());
dst.mutable_data(places[i], src.type());
dst.mutable_data(places[i], src.dtype());
if (!src.lod().empty()) {
dst.set_lod(src.lod());
}
......@@ -393,7 +394,7 @@ void MergeLoDTensor(LoDTensor *target,
for (auto *t : lod_tensors) {
if (t->numel() && t->IsInitialized()) {
new_dim = t->dims();
new_type = t->type();
new_type = framework::TransToProtoVarType(t->dtype());
new_layout = t->layout();
break;
}
......@@ -405,11 +406,12 @@ void MergeLoDTensor(LoDTensor *target,
auto *t = lod_tensors[i];
if (t->numel() && t->IsInitialized()) {
PADDLE_ENFORCE_EQ(
new_type, t->type(),
new_type, framework::TransToProtoVarType(t->dtype()),
platform::errors::InvalidArgument(
"LoDTensor data type does not match, expected type is %s, actual "
"type is %s.",
DataTypeToString(new_type), DataTypeToString(t->type())));
DataTypeToString(new_type),
DataTypeToString(framework::TransToProtoVarType(t->dtype()))));
PADDLE_ENFORCE_EQ(
new_layout, t->layout(),
platform::errors::InvalidArgument(
......@@ -444,7 +446,8 @@ void MergeLoDTensor(LoDTensor *target,
target->Resize(new_dim);
target->set_layout(new_layout);
target->set_lod(new_lod);
target->mutable_data(dst_place, new_type);
target->mutable_data(dst_place,
paddle::framework::TransToPtenDataType(new_type));
int begin = 0;
for (auto *src : lod_tensors) {
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <string>
#include "paddle/fluid/framework/device_worker_factory.h"
#include "paddle/fluid/framework/trainer.h"
#include "paddle/fluid/platform/lodtensor_printer.h"
......@@ -250,12 +251,13 @@ void MultiTrainer::Finalize() {
LoDTensor* thread_tensor = thread_var->GetMutable<LoDTensor>();
#define MergeCallback(cpp_type, proto_type) \
do { \
if (root_tensor->type() == proto_type) { \
if (thread_tensor->type() != proto_type) { \
if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
if (framework::TransToProtoVarType(thread_tensor->dtype()) != \
proto_type) { \
VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \
<< "] " << need_merge_var_names_[i] \
<< ", root tensor type=" << root_tensor->type() \
<< ", thread tensor type=" << thread_tensor->type(); \
<< ", root tensor type=" << root_tensor->dtype() \
<< ", thread tensor type=" << thread_tensor->dtype(); \
exit(-1); \
} \
MergeToRootScope<cpp_type>(root_tensor, thread_tensor); \
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/framework/new_executor/data_transfer.h"
#include "paddle/fluid/framework/convert_utils.h"
namespace paddle {
namespace framework {
......@@ -366,7 +367,7 @@ void HandleComplexGradToRealGrad(const OpFuncNode& op_func_node,
continue;
}
// only focus on complex dtype now
auto src_type = grad_tensor->type();
auto src_type = framework::TransToProtoVarType(grad_tensor->dtype());
if (!framework::IsComplexType(src_type)) {
VLOG(3) << "skip grad_tensor with not complexType";
continue;
......@@ -390,7 +391,7 @@ void HandleComplexGradToRealGrad(const OpFuncNode& op_func_node,
platform::errors::Unavailable(
"Forward tensor is nullptr when handle complex data to real."));
// only need record type, the allocation may have been released
auto dst_type = tensor->saved_type();
auto dst_type = framework::TransToProtoVarType(tensor->dtype());
// only focus on real dtype and need casting
if (framework::IsComplexType(dst_type)) {
continue;
......
......@@ -19,6 +19,7 @@ limitations under the License. */
#include <string>
#include "gflags/gflags.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/data_type_transform.h"
#include "paddle/fluid/framework/details/nan_inf_utils.h"
......@@ -109,13 +110,13 @@ static std::string GetDtype(const ScopeBase& scope, const std::string& name) {
if (UNLIKELY(!tensor.IsInitialized())) {
return "";
}
return DataTypeToString(tensor.type());
return DataTypeToString(framework::TransToProtoVarType(tensor.dtype()));
} else if (var->IsType<pten::SelectedRows>()) {
auto tensor = var->Get<pten::SelectedRows>().value();
if (UNLIKELY(!tensor.IsInitialized())) {
return "uninited";
} else {
return DataTypeToString(tensor.type());
return DataTypeToString(framework::TransToProtoVarType(tensor.dtype()));
}
} else if (var->IsType<Strings>()) {
return "strings";
......@@ -1070,8 +1071,8 @@ static void CheckTensorNANOrInf(const std::string& op_type,
if (tensor.memory_size() == 0) {
return;
}
if (tensor.type() != proto::VarType::FP32 &&
tensor.type() != proto::VarType::FP64) {
if (framework::TransToProtoVarType(tensor.dtype()) != proto::VarType::FP32 &&
framework::TransToProtoVarType(tensor.dtype()) != proto::VarType::FP64) {
return;
}
PADDLE_ENFORCE_NE(
......@@ -1536,7 +1537,7 @@ void OperatorWithKernel::HandleComplexGradToRealGrad(
continue;
}
// only focus on complex dtype now
auto src_type = grad_tensor->type();
auto src_type = framework::TransToProtoVarType(grad_tensor->dtype());
if (!IsComplexType(src_type)) {
continue;
}
......@@ -1556,7 +1557,7 @@ void OperatorWithKernel::HandleComplexGradToRealGrad(
platform::errors::Unavailable(
"Forward tensor is nullptr when handle complex data to real."));
// only need record type, the allocation may have been released
auto dst_type = tensor->saved_type();
auto dst_type = framework::TransToProtoVarType(tensor->dtype());
// only focus on real dtype and need casting
if (IsComplexType(dst_type)) {
continue;
......@@ -1770,7 +1771,8 @@ void OperatorWithKernel::ParseInputDataType(
platform::errors::InvalidArgument("The %s Op's Input Variable `%s` "
"contains uninitialized Tensor.",
Type(), name));
proto::VarType::Type tmp = t->type();
proto::VarType::Type tmp =
paddle::framework::TransToProtoVarType(t->dtype());
PADDLE_ENFORCE(tmp == *data_type || *data_type == default_data_type,
platform::errors::InvalidArgument(
"The DataType of %s Op's duplicable or different "
......@@ -1869,8 +1871,8 @@ proto::VarType::Type OperatorWithKernel::IndicateOrPromoteVarDataTypes(
auto* tensor_b = GetTensorFormInputSafely(ctx, name2);
// 2. Get two input types
auto type_a = tensor_a->type();
auto type_b = tensor_b->type();
auto type_a = framework::TransToProtoVarType(tensor_a->dtype());
auto type_b = framework::TransToProtoVarType(tensor_b->dtype());
// 3. Get first input type or promote complex types
auto target_type = PromoteTypesIfComplexExists(type_a, type_b);
......@@ -2168,7 +2170,7 @@ void OperatorWithKernel::BuildPtenKernelContext(
pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(std::string, attr));
} else if (attr_defs[i].type_index ==
std::type_index(typeid(pten::DataType))) {
auto data_type = pten::TransToPtenDataType(
auto data_type = paddle::framework::TransToPtenDataType(
static_cast<framework::proto::VarType::Type>(
BOOST_GET_CONST(int, attr)));
pt_kernel_context->EmplaceBackAttr(data_type);
......
......@@ -40,6 +40,7 @@ limitations under the License. */
#include "paddle/fluid/platform/variant.h"
#include "paddle/utils/flat_hash_map.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/pten/core/compat/arg_map_context.h"
#include "paddle/pten/core/compat/op_utils.h"
#include "paddle/pten/core/kernel_context.h"
......@@ -422,8 +423,8 @@ class ExecutionContext {
"size(%d).",
allocation_ptr->size(), framework::product(dim) * sizeof(T)));
paddle::framework::Tensor temp_tensor(
framework::ToDataType(std::type_index(typeid(T))));
paddle::framework::Tensor temp_tensor(framework::TransToPtenDataType(
framework::ToDataType(std::type_index(typeid(T)))));
temp_tensor.Resize(dim);
temp_tensor.ResetHolder(std::move(shared_allocation));
return temp_tensor;
......
......@@ -27,6 +27,7 @@ limitations under the License. */
#include "cinn/frontend/op_mappers/use_op_mappers.h"
#include "cinn/frontend/var_type_utils.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/errors.h"
......@@ -57,7 +58,7 @@ OpMapperContext::FeedInfo GetCinnFeedInfoFromTensor(
// op
auto tensor_type = ::paddle::framework::proto::VarType::FP32;
if (!skip_trans_type) {
tensor_type = tensor.type();
tensor_type = framework::TransToProtoVarType(tensor.dtype());
}
auto cinn_var_type = TransformVarDataTypeToCinn(tensor_type);
info.type = ::cinn::frontend::utils::CppVarType2CommonType(cinn_var_type);
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#include "gtest/gtest.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.h"
namespace paddle {
......@@ -206,7 +207,9 @@ class CinnGraphSymbolizationTest : public ::testing::Test {
LoDTensor tensor;
DDim dims = {256, 1024};
tensor.Resize(dims);
tensor.mutable_data(platform::CPUPlace(), proto::VarType::FP32);
tensor.mutable_data(
platform::CPUPlace(),
framework::TransToPtenDataType(framework::proto::VarType::FP32));
return tensor;
};
#define FillFeedList(Name) feed_targets[#Name] = create_tensor();
......
......@@ -21,6 +21,7 @@ limitations under the License. */
#include <utility>
#include <vector>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/details/async_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/bind_threaded_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h"
......@@ -775,7 +776,8 @@ void ParallelExecutor::BCastParamsToDevices(
std::vector<void *> buffers;
buffers.reserve(member_->places_.size());
size_t numel = main_tensor.numel();
ncclDataType_t data_type = platform::ToNCCLDataType(main_tensor.type());
ncclDataType_t data_type = platform::ToNCCLDataType(
framework::TransToProtoVarType(main_tensor.dtype()));
for (size_t i = 0; i < member_->places_.size(); ++i) {
auto place = member_->places_[i];
void *buffer;
......@@ -786,7 +788,7 @@ void ParallelExecutor::BCastParamsToDevices(
auto local_scope = member_->local_scopes_[i];
auto *t = local_scope->Var(var)->GetMutable<LoDTensor>();
t->Resize(dims);
buffer = t->mutable_data(place, main_tensor.type());
buffer = t->mutable_data(place, main_tensor.dtype());
}
buffers.push_back(buffer);
}
......@@ -818,7 +820,8 @@ void ParallelExecutor::BCastParamsToDevices(
// but broadcast is equivalent to no type of operation, does not affect
// correctness.
BKCLDataType data_type = BKCL_FLOAT;
// BKCLDataType data_type = platform::ToBKCLDataType(main_tensor.type());
// BKCLDataType data_type =
// platform::ToBKCLDataType(framework::TransToProtoVarType(main_tensor.dtype()));
for (size_t i = 0; i < member_->places_.size(); ++i) {
auto place = member_->places_[i];
void *buffer;
......@@ -829,7 +832,7 @@ void ParallelExecutor::BCastParamsToDevices(
auto local_scope = member_->local_scopes_[i];
auto *t = local_scope->Var(var)->GetMutable<LoDTensor>();
t->Resize(dims);
buffer = t->mutable_data(place, main_tensor.type());
buffer = t->mutable_data(place, main_tensor.dtype());
}
buffers.push_back(buffer);
}
......@@ -848,7 +851,8 @@ void ParallelExecutor::BCastParamsToDevices(
for (size_t i = 0; i < member_->places_.size(); ++i) {
auto &bkcl_ctx = bkcl_ctxs->at(member_->places_[i]);
auto broadcast_numel = numel;
if (main_tensor.type() == framework::proto::VarType::INT64) {
if (framework::TransToProtoVarType(main_tensor.dtype()) ==
framework::proto::VarType::INT64) {
broadcast_numel *= 2;
}
PADDLE_ENFORCE_EQ(
......@@ -873,7 +877,7 @@ void ParallelExecutor::BCastParamsToDevices(
auto copy_memory = [&] {
t->Resize(dims);
t->mutable_data(cpu, main_tensor.type());
t->mutable_data(cpu, main_tensor.dtype());
paddle::framework::TensorCopy(main_tensor, cpu, t);
};
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#include <cstdlib>
#include <string>
#include <vector>
#include "io/fs.h"
#include "paddle/fluid/framework/data_feed_factory.h"
#include "paddle/fluid/framework/data_set.h"
......@@ -232,12 +233,13 @@ void PSGPUTrainer::Finalize() {
}
#define MergeCallback(cpp_type, proto_type) \
do { \
if (root_tensor->type() == proto_type) { \
if (thread_tensor->type() != proto_type) { \
if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
if (framework::TransToProtoVarType(thread_tensor->dtype()) != \
proto_type) { \
VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \
<< "] " << need_merge_var_names_[i] \
<< ", root tensor type=" << root_tensor->type() \
<< ", thread tensor type=" << thread_tensor->type(); \
<< ", root tensor type=" << root_tensor->dtype() \
<< ", thread tensor type=" << thread_tensor->dtype(); \
exit(-1); \
} \
MergeToRootScope<cpp_type>(root_tensor, thread_tensor); \
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#include <sstream>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/pten_utils.h"
#include "paddle/fluid/framework/lod_tensor.h"
......@@ -59,7 +60,7 @@ class KernelArgsNameMakerByOpProto : public KernelArgsNameMaker {
OpKernelType TransPtenKernelKeyToOpKernelType(
const pten::KernelKey& kernel_key) {
proto::VarType::Type data_type =
pten::TransToProtoVarType(kernel_key.dtype());
paddle::framework::TransToProtoVarType(kernel_key.dtype());
// no need to set current device id here
platform::Place place = pten::TransToPtenPlace(kernel_key.backend(), false);
DataLayout data_layout = kernel_key.layout();
......@@ -87,7 +88,7 @@ pten::KernelKey TransOpKernelTypeToPtenKernelKey(
}
paddle::experimental::DataLayout layout = kernel_type.data_layout_;
paddle::experimental::DataType dtype =
pten::TransToPtenDataType(kernel_type.data_type_);
paddle::framework::TransToPtenDataType(kernel_type.data_type_);
return pten::KernelKey(backend, layout, dtype);
}
......
......@@ -16,6 +16,7 @@
#include <fstream>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/imperative/layer.h"
namespace paddle {
......@@ -282,7 +283,7 @@ bool SaveTensorToDisk(const std::string& file_name,
auto tensor = itera.second;
proto::VarType::TensorDesc desc;
desc.set_data_type(tensor->type());
desc.set_data_type(framework::TransToProtoVarType(tensor->dtype()));
auto dims = framework::vectorize(tensor->dims());
auto* pb_dims = desc.mutable_dims();
pb_dims->Resize(static_cast<int>(dims.size()), 0);
......@@ -294,7 +295,7 @@ bool SaveTensorToDisk(const std::string& file_name,
// save tensor
uint64_t data_size =
tensor->numel() * framework::SizeOfType(tensor->type());
tensor->numel() * framework::DataTypeSize(tensor->dtype());
auto* data_ptr = tensor->data();
if (platform::is_gpu_place(tensor->place())) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
......
......@@ -19,6 +19,7 @@ limitations under the License. */
#include <utility>
#include <vector>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/complex.h"
......@@ -55,10 +56,10 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
// than numel()*size(type())
auto dst_ptr =
src.layout() == DataLayout::kMKLDNN
? dst->mutable_data(dst_place, src.type(), src.memory_size())
: dst->mutable_data(dst_place, src.type());
? dst->mutable_data(dst_place, src.dtype(), src.memory_size())
: dst->mutable_data(dst_place, src.dtype());
#else
auto dst_ptr = dst->mutable_data(dst_place, src.type());
auto dst_ptr = dst->mutable_data(dst_place, src.dtype());
#endif
if (src_ptr == dst_ptr && src_place == dst_place) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
......@@ -70,9 +71,9 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
#ifdef PADDLE_WITH_MKLDNN
auto size = src.layout() == DataLayout::kMKLDNN
? src.memory_size()
: src.numel() * SizeOfType(src.type());
: src.numel() * framework::DataTypeSize(src.dtype());
#else
auto size = src.numel() * SizeOfType(src.type());
auto size = src.numel() * framework::DataTypeSize(src.dtype());
#endif
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
......@@ -126,7 +127,7 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
Tensor npu_pinned_tensor;
npu_pinned_tensor.Resize(src.dims());
auto npu_pinned_ptr =
npu_pinned_tensor.mutable_data(npu_pinned_place, src.type());
npu_pinned_tensor.mutable_data(npu_pinned_place, src.dtype());
memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size);
// 2. async copy npu pinned tensor -> npu tensor
......@@ -410,7 +411,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
#endif
auto src_place = src.place();
auto src_ptr = src.data();
auto dst_ptr = dst->mutable_data(dst_place, src.type());
auto dst_ptr = dst->mutable_data(dst_place, src.dtype());
VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr;
if (src_ptr == dst_ptr && src_place == dst_place) {
......@@ -419,7 +420,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
return;
}
auto size = src.numel() * SizeOfType(src.type());
auto size = src.numel() * framework::DataTypeSize(src.dtype());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
}
......@@ -582,8 +583,9 @@ struct AnyDTypeVisitor {
template <typename Predicate, typename DevCtx>
inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor,
const DevCtx& ctx, framework::Tensor* out) {
VisitDataType(tensor.type(), AnyDTypeVisitor<Predicate, DevCtx>(
predicate, tensor, ctx, out));
VisitDataType(
framework::TransToProtoVarType(tensor.dtype()),
AnyDTypeVisitor<Predicate, DevCtx>(predicate, tensor, ctx, out));
}
template <typename Predicate>
......@@ -722,8 +724,9 @@ struct AllDTypeVisitor {
template <typename Predicate, typename DevCtx>
inline void AllImpl(Predicate predicate, const framework::Tensor& tensor,
const DevCtx& ctx, framework::Tensor* out) {
VisitDataType(tensor.type(), AllDTypeVisitor<Predicate, DevCtx>(
predicate, tensor, ctx, out));
VisitDataType(
framework::TransToProtoVarType(tensor.dtype()),
AllDTypeVisitor<Predicate, DevCtx>(predicate, tensor, ctx, out));
}
template <typename Predicate>
......@@ -930,7 +933,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor,
// int32_t size
// void* protobuf message
proto::VarType::TensorDesc desc;
desc.set_data_type(tensor.type());
desc.set_data_type(framework::TransToProtoVarType(tensor.dtype()));
auto dims = framework::vectorize(tensor.dims());
auto* pb_dims = desc.mutable_dims();
pb_dims->Resize(static_cast<int>(dims.size()), 0);
......@@ -941,7 +944,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor,
os.write(out.data(), size);
}
{ // the 3rd field, tensor data
uint64_t size = tensor.numel() * framework::SizeOfType(tensor.type());
uint64_t size = tensor.numel() * framework::DataTypeSize(tensor.dtype());
auto* data_ptr = tensor.data();
PADDLE_ENFORCE_LT(size, (std::numeric_limits<std::streamsize>::max)(),
......@@ -1419,13 +1422,14 @@ std::ostream& operator<<(std::ostream& os, const pten::DenseTensor& t) {
dev_ctx.Wait();
}
#define PrintTensorCallback(cpp_type, proto_type) \
do { \
if (tensor.type() == proto_type) { \
os << " - dtype: " << proto_type << "\n"; \
paddle::framework::print_tensor<cpp_type>(os, tensor); \
return os; \
} \
#define PrintTensorCallback(cpp_type, proto_type) \
do { \
if (paddle::framework::TransToProtoVarType(tensor.dtype()) == \
proto_type) { \
os << " - dtype: " << proto_type << "\n"; \
paddle::framework::print_tensor<cpp_type>(os, tensor); \
return os; \
} \
} while (0)
_ForEachDataType_(PrintTensorCallback);
......
......@@ -160,7 +160,7 @@ void TensorFromArray(const T* src, const size_t& array_size,
Tensor npu_pinned_tensor;
npu_pinned_tensor.Resize(dst->dims());
auto npu_pinned_ptr =
npu_pinned_tensor.mutable_data(npu_pinned_place, dst->type());
npu_pinned_tensor.mutable_data(npu_pinned_place, dst->dtype());
memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size);
// 2. async copy npu pinned tensor -> npu tensor
......@@ -211,7 +211,7 @@ void TensorFromVector(const std::vector<T>& src,
// so pass nullptr as stream to memory::Copy().
else if (platform::is_npu_place(dst_place)) { // NOLINT
// 1. vector -> npu pinned tensor
Tensor npu_pinned_tensor(dst->type());
Tensor npu_pinned_tensor(dst->dtype());
platform::NPUPinnedPlace npu_pinned_place;
auto npu_pinned_ptr =
npu_pinned_tensor.mutable_data<T>(dst->dims(), npu_pinned_place);
......@@ -280,7 +280,7 @@ inline void TensorFromVector(const std::vector<bool>& src,
Tensor npu_pinned_tensor;
npu_pinned_tensor.Resize(dst->dims());
auto npu_pinned_ptr =
npu_pinned_tensor.mutable_data(npu_pinned_place, dst->type());
npu_pinned_tensor.mutable_data(npu_pinned_place, dst->dtype());
memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size);
// 2. async copy npu pinned tensor -> npu tensor
......
......@@ -15,6 +15,7 @@
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/imperative/all_reduce.h"
#include "paddle/fluid/framework/convert_utils.h"
#ifdef PADDLE_WITH_NCCL
#include <nccl.h>
......@@ -62,8 +63,9 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst,
const void *src_ptr = src.data();
dst->Resize(src.dims());
auto *dst_ptr = dst->mutable_data(src.place(), src.type());
auto nccl_dtype = platform::ToNCCLDataType(src.type());
auto *dst_ptr = dst->mutable_data(src.place(), src.dtype());
auto nccl_dtype =
platform::ToNCCLDataType(framework::TransToProtoVarType(src.dtype()));
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
src_ptr, dst_ptr, src.numel(), nccl_dtype, ncclSum, comm->comm(),
stream));
......@@ -82,7 +84,7 @@ static void AllReduce(const pten::SelectedRows &src, pten::SelectedRows *dst,
platform::errors::Unimplemented(
"Imperative mode does not support multi-CPU training yet."));
auto dtype = src_tensor.type();
auto dtype = framework::TransToProtoVarType(src_tensor.dtype());
auto nccl_dtype = platform::ToNCCLDataType(dtype);
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(place));
......@@ -127,7 +129,7 @@ static void AllReduce(const pten::SelectedRows &src, pten::SelectedRows *dst,
dims[0] = rows_num;
auto feature_size = framework::product(dims) / dims[0];
dst_tensor->Resize(dims);
auto *dst_tensor_ptr = dst_tensor->mutable_data(place, dtype);
auto *dst_tensor_ptr = dst_tensor->mutable_data(place, src_tensor.dtype());
const auto *src_tensor_ptr = src_tensor.data();
auto sizeof_dtype = framework::SizeOfType(dtype);
......
......@@ -24,6 +24,7 @@
#include <utility>
#include <vector>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/imperative/gradient_accumulator.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/op_base.h"
......@@ -152,7 +153,8 @@ void BasicEngine::CheckBackwardInputs(const OpBase& op) {
// correct. var->DataType() returns the default dtype, which is float32.
// Here, we use the type of the corresponding forward datatype.
tensor->mutable_data(op.place(), var->ForwardDataType());
tensor->mutable_data(
op.place(), framework::TransToPtenDataType(var->ForwardDataType()));
VLOG(6) << "Set ungenerated Grad: " << var->Name()
<< " as zero with dtype "
<< framework::DataTypeToString(var->ForwardDataType());
......
......@@ -13,13 +13,14 @@
// limitations under the License.
#if defined(PADDLE_WITH_XPU_BKCL)
#include "paddle/fluid/imperative/bkcl_context.h"
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/imperative/bkcl_context.h"
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/xpu/bkcl_helper.h"
#include "paddle/fluid/platform/device_context.h"
......@@ -41,8 +42,9 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst,
const void *src_ptr = src.data();
dst->Resize(src.dims());
auto *dst_ptr = dst->mutable_data(src.place(), src.type());
auto bkcl_dtype = platform::ToBKCLDataType(src.type());
auto *dst_ptr = dst->mutable_data(src.place(), src.dtype());
auto bkcl_dtype =
platform::ToBKCLDataType(framework::TransToProtoVarType(src.dtype()));
PADDLE_ENFORCE_EQ(bkcl_all_reduce(comm->comm(), src_ptr, dst_ptr, src.numel(),
bkcl_dtype, BKCL_ADD, stream),
......@@ -159,7 +161,8 @@ void BKCLParallelContext::Broadcast(framework::Variable *src, int ring_id) {
XPUStream stream = comm->stream();
void *src_ptr = src_tensor->data();
auto data_type = platform::ToBKCLDataType(src_tensor->type());
auto data_type = platform::ToBKCLDataType(
framework::TransToProtoVarType(src_tensor->dtype()));
PADDLE_ENFORCE_EQ(bkcl_broadcast(comm->comm(), src_ptr, src_ptr,
src_tensor->numel(), data_type, 0, stream),
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/imperative/gloo_context.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/fleet/gloo_wrapper.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/device_context.h"
......@@ -109,7 +110,7 @@ void GLOOParallelContext::AllReduce(const framework::Tensor &src_tensor,
framework::Tensor *dst_tensor) {
auto gloo_wrapper = framework::GlooWrapper::GetInstance();
dst_tensor->Resize(src_tensor.dims());
switch (src_tensor.type()) {
switch (framework::TransToProtoVarType(src_tensor.dtype())) {
GLOO_CASE(framework::proto::VarType::FP32, float, gloo_wrapper);
GLOO_CASE(framework::proto::VarType::FP64, double, gloo_wrapper);
GLOO_CASE(framework::proto::VarType::INT32, int, gloo_wrapper);
......@@ -139,7 +140,7 @@ void GLOOParallelContext::AllReduce(const pten::SelectedRows &src,
VLOG(3) << "SelectedRows AllReduce start";
const auto &src_tensor = src.value();
const auto &place = src_tensor.place();
auto dtype = src_tensor.type();
auto dtype = framework::TransToProtoVarType(src_tensor.dtype());
// 1. Gather rows number from all workers. Here use ncclAllGather to do this,
// but we can use other ways to implement is in the future
const auto &src_rows = src.rows();
......@@ -169,7 +170,7 @@ void GLOOParallelContext::AllReduce(const pten::SelectedRows &src,
std::for_each(element_nums.begin(), element_nums.end(),
[feature_size](size_t &x) { x = x * feature_size; });
auto *dst_tensor_ptr = dst_tensor->mutable_data(place, dtype);
auto *dst_tensor_ptr = dst_tensor->mutable_data(place, src_tensor.dtype());
gloo_wrapper->AllGatherVector<int64_t>(const_cast<int64_t *>(src_rows_ptr),
static_cast<int64_t *>(dst_rows_ptr),
rows_num_vector);
......
......@@ -18,6 +18,7 @@
#include <memory>
#include <utility>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/imperative/layer.h"
......@@ -263,10 +264,11 @@ void TensorAdd(const VarType& src, VarType* dst) {
"%zu and the number of elements of destination tensor is %zu.",
numel, dst_tensor->numel()));
auto data_type = src_tensor.type();
auto data_type = framework::TransToProtoVarType(src_tensor.dtype());
auto place = src_tensor.place();
PADDLE_ENFORCE_EQ(dst_tensor->type(), data_type,
PADDLE_ENFORCE_EQ(framework::TransToProtoVarType(dst_tensor->dtype()),
data_type,
platform::errors::PreconditionNotMet(
"The data type of source tensor and destination tensor "
"should be equal, Otherwise, the calculation results "
......@@ -376,7 +378,8 @@ void SelectedRowsAddToTensor(const VarType& src, VarType* dst) {
const pten::SelectedRows& src_selected_rows =
GetInnerTensor<pten::SelectedRows>(src);
auto place = dst_tensor->place();
auto data_type = src_selected_rows.value().type();
auto data_type =
framework::TransToProtoVarType(src_selected_rows.value().dtype());
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
#define PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(dev_ctx_type, cpp_type) \
......@@ -422,13 +425,14 @@ void SelectedRowsAddTensor(const VarType& src_selected_rows_var,
const pten::DenseTensor& src_tensor =
GetInnerTensor<pten::DenseTensor>(src_tensor_var);
const auto& place = src_tensor.place();
auto data_type = src_tensor.type();
auto data_type = framework::TransToProtoVarType(src_tensor.dtype());
auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place);
pten::DenseTensor* dst_tensor =
GetInnerMutableTensor<pten::DenseTensor>(dst_tensor_var);
dst_tensor->Resize(src_tensor.dims());
dst_tensor->mutable_data(place, data_type);
dst_tensor->mutable_data(place, src_tensor.dtype());
#define PADDLE_SELECTED_ROWS_ADD_TENSOR(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
paddle::operators::math::SelectedRowsAddTensor<dev_ctx_type, cpp_type> \
......@@ -477,7 +481,8 @@ std::shared_ptr<VariableWrapper> SelectedRowsMerge(
auto& src_selected_rows1 = src1.Get<pten::SelectedRows>();
auto& src_selected_rows2 = src2.Get<pten::SelectedRows>();
auto place = src_selected_rows1.value().place();
auto data_type = src_selected_rows1.value().type();
auto data_type =
framework::TransToProtoVarType(src_selected_rows1.value().dtype());
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
std::vector<const pten::SelectedRows*> src_selected_rows;
......@@ -702,12 +707,14 @@ void EagerGradientAccumulator::SumGrad(std::shared_ptr<VariableWrapper> var,
VLOG(6) << "Dims of " << dst_var->Name() << " is set as: "
<< var->Var().Get<framework::LoDTensor>().dims();
tensor->Resize(var->Var().Get<framework::LoDTensor>().dims());
tensor->mutable_data(place, var->DataType());
tensor->mutable_data(place,
framework::TransToPtenDataType(var->DataType()));
pten::funcs::set_constant(*dev_ctx, tensor, 0.0);
} else {
auto* tensor =
dst_var->MutableVar()->GetMutable<framework::LoDTensor>();
tensor->mutable_data(place, var->DataType());
tensor->mutable_data(place,
framework::TransToPtenDataType(var->DataType()));
pten::funcs::set_constant(*dev_ctx, tensor, 0.0);
}
}
......@@ -834,12 +841,14 @@ void SortedGradientAccumulator::SumGrad(std::shared_ptr<VariableWrapper> var,
VLOG(6) << "Dims of " << dst_var->Name() << " is set as: "
<< var->Var().Get<framework::LoDTensor>().dims();
tensor->Resize(var->Var().Get<framework::LoDTensor>().dims());
tensor->mutable_data(place, var->DataType());
tensor->mutable_data(place,
framework::TransToPtenDataType(var->DataType()));
pten::funcs::set_constant(*dev_ctx, tensor, 0.0);
} else {
auto* tensor =
dst_var->MutableVar()->GetMutable<framework::LoDTensor>();
tensor->mutable_data(place, var->DataType());
tensor->mutable_data(place,
framework::TransToPtenDataType(var->DataType()));
pten::funcs::set_constant(*dev_ctx, tensor, 0.0);
}
}
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/imperative/hccl_context.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable.h"
......@@ -44,8 +45,9 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst,
void *src_ptr = const_cast<void *>(src.data());
dst->Resize(src.dims());
void *dst_ptr = dst->mutable_data(src.place(), src.type());
HcclDataType hccl_dtype = platform::ToHCCLDataType(src.type());
void *dst_ptr = dst->mutable_data(src.place(), src.dtype());
HcclDataType hccl_dtype =
platform::ToHCCLDataType(framework::TransToProtoVarType(src.dtype()));
PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::HcclAllReduce(
src_ptr, dst_ptr, src.numel(), hccl_dtype, HCCL_REDUCE_SUM, comm->comm(),
......@@ -169,7 +171,8 @@ void HCCLParallelContext::Broadcast(framework::Variable *src, int ring_id) {
void *src_ptr =
reinterpret_cast<void *>(const_cast<void *>(src_tensor->data()));
auto hccl_dtype = platform::ToHCCLDataType(src_tensor->type());
auto hccl_dtype = platform::ToHCCLDataType(
framework::TransToProtoVarType(src_tensor->dtype()));
PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::HcclBroadcast(
src_ptr, src_tensor->numel(), hccl_dtype, 0, comm->comm(),
reinterpret_cast<void *>(stream)));
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/imperative/jit/program_desc_tracer.h"
#include "paddle/fluid/framework/convert_utils.h"
namespace paddle {
namespace imperative {
......@@ -253,7 +254,7 @@ void ProgramDescTracer::InsertVarIfNotExist(
new_var_desc->SetShape(framework::vectorize<int64_t>(tensor.dims()));
new_var_desc->SetLoDLevel(tensor.lod().size());
if (tensor.IsInitialized()) {
new_var_desc->SetDataType(tensor.type());
new_var_desc->SetDataType(framework::TransToProtoVarType(tensor.dtype()));
} else {
new_var_desc->SetDataType(framework::proto::VarType::FP32);
}
......
......@@ -15,6 +15,8 @@
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/imperative/infer_var_type_context.h"
#include "paddle/fluid/imperative/op_base.h"
......@@ -99,7 +101,9 @@ static std::string DebugString(
auto& tensor = var.Get<framework::LoDTensor>();
ss << "LoDTensor<";
if (tensor.IsInitialized()) {
ss << framework::DataTypeToString(tensor.type()) << ", ";
ss << framework::DataTypeToString(
framework::TransToProtoVarType(tensor.dtype()))
<< ", ";
ss << tensor.place() << ", ";
ss << "(" << tensor.dims() << ")";
} else {
......@@ -112,7 +116,9 @@ static std::string DebugString(
auto& tensor = selected_rows.value();
auto& rows = selected_rows.rows();
if (tensor.IsInitialized()) {
ss << framework::DataTypeToString(tensor.type()) << ", ";
ss << framework::DataTypeToString(
framework::TransToProtoVarType(tensor.dtype()))
<< ", ";
ss << tensor.place() << ", ";
ss << "height(" << selected_rows.height() << "), rows(";
std::for_each(rows.cbegin(), rows.cend(),
......
......@@ -25,6 +25,7 @@
#include "paddle/fluid/platform/dynload/nccl.h"
#endif
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
......@@ -143,7 +144,8 @@ void NCCLParallelContext::Broadcast(framework::Variable *src, int ring_id) {
gpuStream_t stream = comm->stream();
void *src_ptr = src_tensor->data();
auto nccl_dtype = platform::ToNCCLDataType(src_tensor->type());
auto nccl_dtype = platform::ToNCCLDataType(
framework::TransToProtoVarType(src_tensor->dtype()));
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast(
src_ptr, src_tensor->numel(), nccl_dtype, 0, comm->comm(), stream));
}
......
......@@ -24,6 +24,7 @@
#include <unordered_set>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/imperative/gradient_accumulator.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/op_base.h"
......@@ -312,9 +313,11 @@ static void FillConstantLike(const VariableWrapper &ref_var,
// we can't get data_type_ directly. We need to check if we can only use
// default data_type for now.
if (ref_var.ForwardDataType() != -1) {
dst_tensor->mutable_data(place, ref_var.ForwardDataType());
dst_tensor->mutable_data(
place, framework::TransToPtenDataType(ref_var.ForwardDataType()));
} else {
dst_tensor->mutable_data(place, ref_var.DataType());
dst_tensor->mutable_data(
place, framework::TransToPtenDataType(ref_var.DataType()));
}
pten::funcs::set_constant(*dev_ctx, dst_tensor, value);
}
......@@ -739,7 +742,8 @@ PartialGradTask::PartialGradTask(
platform::errors::InvalidArgument(
"The %d-th grad_output's shape does not match the %d-th output",
i, i));
PADDLE_ENFORCE_EQ(grad_tensor.type(), out_tensor.type(),
PADDLE_ENFORCE_EQ(framework::TransToProtoVarType(grad_tensor.dtype()),
framework::TransToProtoVarType(out_tensor.dtype()),
platform::errors::InvalidArgument(
"The %d-th grad_output's data type does not "
"match the %d-th output",
......
......@@ -29,6 +29,7 @@
#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/imperative/var_helper.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/selected_rows.h"
......@@ -425,7 +426,7 @@ void BuildDygraphPtenKernelContext(
kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(std::string, attr));
} else if (attr_defs[i].type_index ==
std::type_index(typeid(pten::DataType))) {
auto data_type = pten::TransToPtenDataType(
auto data_type = framework::TransToPtenDataType(
static_cast<framework::proto::VarType::Type>(
BOOST_GET_CONST(int, attr)));
kernel_ctx->EmplaceBackAttr(data_type);
......
......@@ -446,7 +446,7 @@ void Reducer::InitializeGroups(
InitializeDenseGroups(variable_indices_, &group);
auto tensor = group.dense_contents_.GetMutable<framework::LoDTensor>();
tensor->Resize(framework::make_ddim({group.all_length_}))
.mutable_data(place_, group.dtype_);
.mutable_data(place_, framework::TransToPtenDataType(group.dtype_));
}
// map variables to this group by VariableLocator
......@@ -737,7 +737,8 @@ void Reducer::MarkVarReady(const size_t var_index, const bool is_used_var) {
// by avoiding tensor construction
if (!group_tensor.IsInitialized()) {
group_tensor.Resize({static_cast<int64_t>(length)});
group_tensor.mutable_data(place_, group.dtype_);
group_tensor.mutable_data(place_,
framework::TransToPtenDataType(group.dtype_));
}
#ifdef PADDLE_WITH_XPU_BKCL
......
......@@ -17,6 +17,7 @@
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/imperative/gradient_accumulator.h"
#include "paddle/fluid/memory/memcpy.h"
......@@ -224,8 +225,10 @@ static bool IsEqualVar(const framework::Variable& var1,
auto* t1_p = t1.data();
auto* t2_p = t2.data();
return std::memcmp(t1_p, t2_p,
t1.numel() * framework::SizeOfType(t1.type())) == 0;
return std::memcmp(
t1_p, t2_p,
t1.numel() * framework::SizeOfType(
framework::TransToProtoVarType(t1.dtype()))) == 0;
}
template <typename T>
......
......@@ -86,7 +86,7 @@ void GroupConcatSplit(Place place, size_t size) {
tmp.ShareDataWith(*tensor).Resize({static_cast<int64_t>(len)});
group.dense_tensors_.push_back(std::move(tmp));
group.all_length_ += len;
group.dtype_ = tensor->type();
group.dtype_ = framework::TransToProtoVarType(tensor->dtype());
}
paddle::platform::DeviceContextPool& pool =
......@@ -96,7 +96,7 @@ void GroupConcatSplit(Place place, size_t size) {
{ // concat
auto* tensor = group.dense_contents_.GetMutable<framework::LoDTensor>();
tensor->Resize(framework::make_ddim({group.all_length_}))
.mutable_data(place, group.dtype_);
.mutable_data(place, framework::TransToPtenDataType(group.dtype_));
group.ConcatTensors(*dev_ctx);
group.DivNRanks(*dev_ctx, 1);
......
......@@ -15,6 +15,7 @@
#include "paddle/fluid/imperative/var_helper.h"
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor.h"
......@@ -170,9 +171,11 @@ template <>
framework::proto::VarType::Type GetDataType<egr::EagerTensor>(
std::shared_ptr<egr::EagerTensor> var) {
if (var->Var().IsType<pten::SelectedRows>()) {
return var->Var().Get<pten::SelectedRows>().value().type();
return framework::TransToProtoVarType(
var->Var().Get<pten::SelectedRows>().value().type());
} else if (var->Var().IsType<framework::LoDTensor>()) {
return var->Var().Get<framework::LoDTensor>().type();
return framework::TransToProtoVarType(
var->Var().Get<framework::LoDTensor>().type());
} else {
PADDLE_THROW(paddle::platform::errors::PermissionDenied(
"We only support pten::SelectedRows and framework::LoDTensor in "
......
......@@ -19,6 +19,7 @@
#include <string>
#include <utility>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/op_kernel_type.h"
#include "paddle/fluid/framework/string_array.h"
#include "paddle/fluid/framework/variable.h"
......@@ -169,7 +170,7 @@ class VariableWrapper {
}
}
if (tensor && tensor->IsInitialized()) {
return tensor->type();
return framework::TransToProtoVarType(tensor->dtype());
} else {
VLOG(6) << "The tensor of variable " << name_ << " is not initialized";
......
......@@ -551,7 +551,7 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx);
auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var);
auto type = fetch.type();
auto type = framework::TransToProtoVarType(fetch.dtype());
auto output = &(outputs->at(i));
output->name = fetches_[idx]->Input("X")[0];
if (type == framework::proto::VarType::FP32) {
......
......@@ -327,7 +327,7 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx);
auto fetch = BOOST_GET_CONST(framework::LoDTensor, fetch_var);
auto type = fetch.type();
auto type = framework::TransToProtoVarType(fetch.dtype());
auto output = &(outputs->at(i));
output->name = fetchs_[idx]->Input("X")[0];
if (type == framework::DataTypeTrait<float>::DataType()) {
......
......@@ -18,6 +18,7 @@ limitations under the License. */
#include <thread> // NOLINT
#include "gflags/gflags.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/tests/test_helper.h"
......@@ -36,13 +37,16 @@ namespace paddle {
PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {
PaddleTensor pt;
if (t->type() == framework::proto::VarType::INT64) {
if (framework::TransToProtoVarType(t->dtype()) ==
framework::proto::VarType::INT64) {
pt.data.Reset(t->data(), t->numel() * sizeof(int64_t));
pt.dtype = PaddleDType::INT64;
} else if (t->type() == framework::proto::VarType::FP32) {
} else if (framework::TransToProtoVarType(t->dtype()) ==
framework::proto::VarType::FP32) {
pt.data.Reset(t->data(), t->numel() * sizeof(float));
pt.dtype = PaddleDType::FLOAT32;
} else if (t->type() == framework::proto::VarType::INT32) {
} else if (framework::TransToProtoVarType(t->dtype()) ==
framework::proto::VarType::INT32) {
pt.data.Reset(t->data(), t->numel() * sizeof(int32_t));
pt.dtype = PaddleDType::INT32;
} else {
......
......@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
......@@ -122,7 +123,7 @@ T *Tensor::data(PlaceType *place, int *size) const {
DataType Tensor::type() const {
EAGER_GET_TENSOR(paddle::framework::LoDTensor);
auto type = tensor->type();
auto type = paddle::framework::TransToProtoVarType(tensor->dtype());
if (type == paddle::framework::proto::VarType::FP32) {
return DataType::FLOAT32;
} else if (type == paddle::framework::proto::VarType::FP16) {
......
......@@ -172,7 +172,8 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
snprintf(output_i.name, output_names[i].length() + 1, "%s",
output_names[i].c_str());
auto output_t = predictor->GetOutputTensor(output_names[i]);
output_i.dtype = ConvertToPDDataType(output_t->type());
output_i.dtype =
ConvertToPDDataType(framework::TransToProtoVarType(output_t->dtype()));
std::vector<int> output_shape = output_t->shape();
output_i.shape = new int[output_shape.size()];
memmove(output_i.shape, output_shape.data(),
......@@ -256,7 +257,8 @@ void PD_SetZeroCopyInput(PD_Predictor* predictor,
void PD_GetZeroCopyOutput(PD_Predictor* predictor, PD_ZeroCopyTensor* tensor) {
auto output = predictor->predictor->GetOutputTensor(tensor->name);
tensor->dtype = ConvertToPDDataType(output->type());
tensor->dtype =
ConvertToPDDataType(framework::TransToProtoVarType(output->dtype()));
auto shape = output->shape();
size_t shape_size = shape.size();
if (tensor->shape.capacity < shape_size * sizeof(int)) {
......@@ -271,7 +273,8 @@ void PD_GetZeroCopyOutput(PD_Predictor* predictor, PD_ZeroCopyTensor* tensor) {
int n =
std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
size_t length = n * paddle::PaddleDtypeSize(output->type());
size_t length = n * paddle::PaddleDtypeSize(
framework::TransToProtoVarType(output->dtype()));
if (tensor->data.capacity < length) {
if (tensor->data.data) {
std::free(tensor->data.data);
......
......@@ -16,6 +16,7 @@
#include <functional>
#include <map>
#include <memory>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/inference/lite/engine.h"
#include "paddle/fluid/memory/allocation/allocator.h"
......@@ -185,9 +186,11 @@ void InitDstTensor(paddle::lite_api::Tensor* dst,
// the input tensor.
constexpr int empty_size = 0;
dst->Resize({empty_size});
GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()),
GetLiteTargetType(src.place()));
dst->SetPrecision(GetLitePrecisionType(src.type()));
GetLiteTensorDataPtr(
dst, GetLitePrecisionType(framework::TransToProtoVarType(src.dtype())),
GetLiteTargetType(src.place()));
dst->SetPrecision(
GetLitePrecisionType(framework::TransToProtoVarType(src.dtype())));
paddle::lite::LoD lite_lod;
SetLoD(&lite_lod, src.lod());
dst->SetLoD(lite_lod);
......@@ -195,8 +198,9 @@ void InitDstTensor(paddle::lite_api::Tensor* dst,
void InitDstTensor(framework::LoDTensor* dst,
const paddle::lite_api::Tensor& src) {
dst->mutable_data(inference::lite::utils::GetNativePlace(src.target()),
GetNativePrecisionType(src.precision()));
dst->mutable_data(
inference::lite::utils::GetNativePlace(src.target()),
framework::TransToPtenDataType(GetNativePrecisionType(src.precision())));
SetLoD(dst->mutable_lod(), src.lod());
}
......@@ -208,14 +212,16 @@ void TensorCopyAsync(paddle::lite_api::Tensor* dst,
const platform::Place& src_place = src.place();
const platform::Place& dst_place = GetNativePlace(dst->target());
const size_t bytes =
static_cast<size_t>(src.numel()) * framework::SizeOfType(src.type());
static_cast<size_t>(src.numel()) * framework::DataTypeSize(src.dtype());
dst->Resize(framework::vectorize(src.dims()));
const void* src_data = src.data();
void* dst_data{nullptr};
dst_data = GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()),
GetLiteTargetType(src.place()));
dst_data = GetLiteTensorDataPtr(
dst, GetLitePrecisionType(framework::TransToProtoVarType(src.dtype())),
GetLiteTargetType(src.place()));
VLOG(3) << "[CopyAsync fluid -> lite] Bytes = " << bytes << ", src = " << &src
<< ", dst = " << dst << ", src_type = " << src.type();
<< ", dst = " << dst
<< ", src_type = " << framework::TransToProtoVarType(src.dtype());
MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
VLOG(3) << "[Lite memory size] Bytes = " << bytes;
}
......@@ -229,12 +235,13 @@ void TensorCopyAsync(framework::LoDTensor* dst,
const platform::Place& src_place = GetNativePlace(src.target());
const platform::Place& dst_place = dst->place();
int64_t src_numel = GetLiteTensorNumel(src);
const size_t bytes = src_numel * framework::SizeOfType(dst->type());
const size_t bytes = src_numel * framework::DataTypeSize(dst->dtype());
const void* src_data = src.data<void>();
// When Lite is ready, the source type needs to be modified here.
void* dst_data = dst->mutable_data(dst_place, dst->type());
void* dst_data = dst->mutable_data(dst_place, dst->dtype());
VLOG(3) << "[CopyAsync lite -> fluid] Bytes = " << bytes << ", src = " << &src
<< ", dst = " << dst << ", src_type = " << dst->type();
<< ", dst = " << dst
<< ", src_type = " << framework::TransToProtoVarType(dst->dtype());
MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
VLOG(3) << "[Lite memory size] Bytes = " << bytes;
}
......@@ -244,7 +251,8 @@ void TensorDataShare(paddle::lite_api::Tensor* dst, framework::LoDTensor* src) {
dst->Resize(framework::vectorize(src->dims()));
dst->ShareExternalMemory(src->data(), src->memory_size(),
GetLiteTargetType(src->place()));
dst->SetPrecision(GetLitePrecisionType(src->type()));
dst->SetPrecision(
GetLitePrecisionType(framework::TransToProtoVarType(src->dtype())));
paddle::lite::LoD lite_lod;
SetLoD(&lite_lod, src->lod());
dst->SetLoD(lite_lod);
......@@ -261,7 +269,9 @@ void TensorDataShare(framework::LoDTensor* dst, paddle::lite_api::Tensor* src) {
src_raw_data, memory_size, GetNativePlace(src->target())));
dst->Resize(paddle::framework::make_ddim(src->shape()));
SetLoD(dst->mutable_lod(), src->lod());
dst->ResetHolderWithType(holder, GetNativePrecisionType(src->precision()));
dst->ResetHolderWithType(
holder,
framework::TransToPtenDataType(GetNativePrecisionType(src->precision())));
}
} // namespace utils
......
......@@ -1020,7 +1020,7 @@ static bool CompareTensorData(const framework::LoDTensor &a,
}
for (size_t i = 0; i < a_size; i++) {
if (a.type() == VarType::FP32) {
if (framework::TransToProtoVarType(a.dtype()) == VarType::FP32) {
const auto *a_data = a.data<float>();
const auto *b_data = b.data<float>();
if (std::abs(a_data[i] - b_data[i]) > 1e-3) {
......@@ -1029,7 +1029,7 @@ static bool CompareTensorData(const framework::LoDTensor &a,
b_data[i]);
return false;
}
} else if (a.type() == VarType::INT64) {
} else if (framework::TransToProtoVarType(a.dtype()) == VarType::INT64) {
const auto *a_data = a.data<int64_t>();
const auto *b_data = b.data<int64_t>();
if (std::abs(a_data[i] - b_data[i]) > 1e-3) {
......
......@@ -140,8 +140,9 @@ class AbsDoubleGradOp : public framework::OperatorWithKernel {
framework::OpKernelType GetKernelTypeForVar(
const std::string& var_name, const framework::Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const {
return framework::OpKernelType(tensor.type(), tensor.place(),
tensor.layout());
return framework::OpKernelType(
framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
tensor.layout());
}
};
......
......@@ -38,10 +38,12 @@ class ActivationMLUKernel : public framework::OpKernel<T> {
output->mutable_data<T>(ctx.GetPlace());
MLUCnnlActivationDesc act_desc(act_mode, alpha);
MLUCnnlTensorDesc input_desc(*input, CNNL_LAYOUT_ARRAY,
ToCnnlDataType(input->type()));
MLUCnnlTensorDesc output_desc(*output, CNNL_LAYOUT_ARRAY,
ToCnnlDataType(output->type()));
MLUCnnlTensorDesc input_desc(
*input, CNNL_LAYOUT_ARRAY,
ToCnnlDataType(framework::TransToProtoVarType(input->dtype())));
MLUCnnlTensorDesc output_desc(
*output, CNNL_LAYOUT_ARRAY,
ToCnnlDataType(framework::TransToProtoVarType(output->dtype())));
MLUCnnl::Active(ctx, act_desc.get(), input_desc.get(),
reinterpret_cast<const void*>(input->data<T>()),
......@@ -61,12 +63,15 @@ class ActivationGradMLUKernel : public framework::OpKernel<T> {
dx->mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc dout_desc(*dout, CNNL_LAYOUT_ARRAY,
ToCnnlDataType(dout->type()));
MLUCnnlTensorDesc out_desc(*out, CNNL_LAYOUT_ARRAY,
ToCnnlDataType(out->type()));
MLUCnnlTensorDesc dx_desc(*dx, CNNL_LAYOUT_ARRAY,
ToCnnlDataType(dx->type()));
MLUCnnlTensorDesc dout_desc(
*dout, CNNL_LAYOUT_ARRAY,
ToCnnlDataType(framework::TransToProtoVarType(dout->dtype())));
MLUCnnlTensorDesc out_desc(
*out, CNNL_LAYOUT_ARRAY,
ToCnnlDataType(framework::TransToProtoVarType(out->dtype())));
MLUCnnlTensorDesc dx_desc(
*dx, CNNL_LAYOUT_ARRAY,
ToCnnlDataType(framework::TransToProtoVarType(dx->dtype())));
MLUCnnlActivationDesc act_desc(act_mode, alpha);
MLUCnnl::ActiveGrad(
ctx, act_desc.get(), nullptr, nullptr, nullptr, nullptr,
......
......@@ -76,13 +76,13 @@ class PowGradNPUKernel : public framework::OpKernel<T> {
// Step 2: Construct a broadcast factor, which has the same shape with x.
// 2.1 Get a factor tensor with shape [1].
Tensor factor_tensor(framework::proto::VarType::FP32);
Tensor factor_tensor(experimental::DataType::FLOAT32);
factor_tensor.mutable_data<float>({1}, place);
FillNpuTensorWithConstant<float>(&factor_tensor, factor);
// 2.2 Get the factor which has the shape with x and the same value with
// factor.
Tensor factor_bc_tensor(framework::proto::VarType::FP32);
Tensor factor_bc_tensor(experimental::DataType::FLOAT32);
factor_bc_tensor.mutable_data<float>(x_dims, place);
const auto& runner_bc =
NpuOpRunner("FillD", {factor_tensor}, {factor_bc_tensor},
......@@ -659,14 +659,15 @@ class HardSwishGradNPUKernel : public framework::OpKernel<T> {
{{"dims", framework::vectorize(x->dims())}});
runner_fill.Run(stream);
Tensor tmp_bool(framework::proto::VarType::BOOL);
Tensor tmp_bool(experimental::DataType::BOOL);
tmp_bool.mutable_data<bool>(x->dims(), place);
const auto& runner_less =
NpuOpRunner("Less", {add_offset_val, tensor_threshold}, {tmp_bool});
runner_less.Run(stream);
Tensor tmp4(x->type());
tmp4.mutable_data<T>(x->dims(), place);
auto dst_dtype = ConvertToNpuDtype(x->type());
auto dst_dtype =
ConvertToNpuDtype(framework::TransToProtoVarType(x->type()));
const auto& runner_cast =
NpuOpRunner("Cast", {tmp_bool}, {tmp4},
{{"dst_type", static_cast<int>(dst_dtype)}});
......
......@@ -59,10 +59,13 @@ class AllcloseKernel : public framework::OpKernel<T> {
rtol->numel(), 1,
platform::errors::InvalidArgument(
"Input(Rtol) size must be 1, but get %d.", rtol->numel()));
PADDLE_ENFORCE_EQ(rtol->type(), framework::proto::VarType::FP64,
platform::errors::InvalidArgument(
"Input(Rtol) type must be double, but get %s.",
framework::DataTypeToString(rtol->type())));
PADDLE_ENFORCE_EQ(
framework::TransToProtoVarType(rtol->dtype()),
framework::proto::VarType::FP64,
platform::errors::InvalidArgument(
"Input(Rtol) type must be double, but get %s.",
framework::DataTypeToString(
framework::TransToProtoVarType(rtol->dtype()))));
rtol_v = get_tensor_value(dev_ctx, *rtol);
}
if (ctx.HasInput("Atol")) {
......@@ -71,10 +74,13 @@ class AllcloseKernel : public framework::OpKernel<T> {
atol->numel(), 1,
platform::errors::InvalidArgument(
"Input(Atol) size must be 1, but get %d", atol->numel()));
PADDLE_ENFORCE_EQ(atol->type(), framework::proto::VarType::FP64,
platform::errors::InvalidArgument(
"Input(Atol) type must be double, but get %s",
framework::DataTypeToString(atol->type())));
PADDLE_ENFORCE_EQ(
framework::TransToProtoVarType(atol->dtype()),
framework::proto::VarType::FP64,
platform::errors::InvalidArgument(
"Input(Atol) type must be double, but get %s",
framework::DataTypeToString(
framework::TransToProtoVarType(atol->dtype()))));
atol_v = get_tensor_value(dev_ctx, *atol);
}
......
......@@ -42,7 +42,7 @@ void Update(const platform::NPUDeviceContext& ctx,
platform::NPUMemsetAsync(static_cast<void*>(g), 0,
good_out_tensor->numel() * sizeof(int), stream);
// bad_out_data = bad_in_data + 1
Tensor factor_tensor(bad_out_tensor->type());
Tensor factor_tensor(bad_out_tensor->dtype());
factor_tensor.mutable_data<int>({1}, place);
FillNpuTensorWithConstant<int>(&factor_tensor, static_cast<int>(1));
const auto& runner_p2 = NpuOpRunner("Add", {*bad_in_tensor, factor_tensor},
......@@ -91,7 +91,7 @@ void Update(const platform::NPUDeviceContext& ctx,
bad_out_tensor->numel() * sizeof(int), stream);
// good_out_data = good_in_data + 1
Tensor factor_tensor(good_out_tensor->type());
Tensor factor_tensor(good_out_tensor->dtype());
factor_tensor.mutable_data<int>({1}, place);
FillNpuTensorWithConstant<int>(&factor_tensor, static_cast<int>(1));
const auto& runner_p2 = NpuOpRunner("Add", {*good_in_tensor, factor_tensor},
......@@ -188,7 +188,7 @@ class LazyZerosNPU {
if (!found_inf_vec[0]) {
framework::TensorCopy(*x, place, dev_ctx, out);
} else if (zero_ptr != dst_ptr) {
auto size = out->numel() * framework::SizeOfType(out->type());
auto size = out->numel() * framework::DataTypeSize(out->dtype());
memory::Copy(place, dst_ptr, place, zero_ptr, size, stream);
}
}
......
......@@ -75,15 +75,16 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
framework::NPUAttributeMap attr = {{"axis", -1},
{"descending", descending}};
Tensor indices_tmp(framework::proto::VarType::INT32);
Tensor indices_tmp(experimental::DataType::INT32);
indices_tmp.Resize(indices->dims());
if (input->type() == framework::proto::VarType::INT64) {
Tensor input_fp32(framework::proto::VarType::FP32);
if (framework::TransToProtoVarType(input->dtype()) ==
framework::proto::VarType::INT64) {
Tensor input_fp32(experimental::DataType::FLOAT32);
input_fp32.Resize(input->dims());
CastToFP32(ctx, stream, *input, &input_fp32);
Tensor output_fp32(framework::proto::VarType::FP32);
Tensor output_fp32(experimental::DataType::FLOAT32);
output_fp32.Resize(output->dims());
if (axis == -1 || axis + 1 == in_dims.size()) {
......@@ -112,7 +113,7 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
TranposeNPU<float>(ctx, stream, &perm, input_fp32, &trans_input);
Tensor trans_output(input_fp32.type());
Tensor trans_indices(framework::proto::VarType::INT32);
Tensor trans_indices(experimental::DataType::INT32);
trans_output.mutable_data<float>(trans_dims, ctx.GetPlace());
trans_indices.mutable_data<int32_t>(trans_dims, ctx.GetPlace());
......@@ -150,7 +151,7 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
TranposeNPU<T>(ctx, stream, &perm, *input, &trans_input);
Tensor trans_output(input->type());
Tensor trans_indices(framework::proto::VarType::INT32);
Tensor trans_indices(experimental::DataType::INT32);
trans_output.mutable_data<T>(trans_dims, ctx.GetPlace());
trans_indices.mutable_data<int32_t>(trans_dims, ctx.GetPlace());
......
......@@ -66,7 +66,8 @@ struct ArrayToLoDFunctor : public boost::static_visitor<void> {
ArrayToLoDFunctorImpl<DeviceContext> functor;
functor.dev_ctx_ = dev_ctx;
functor.prev_functor_ = this;
framework::VisitDataType(out->type(), functor);
framework::VisitDataType(framework::TransToProtoVarType(out->dtype()),
functor);
}
};
......@@ -101,7 +102,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
"There's no element in the input array."));
int rank = x[0].dims().size();
platform::Place place = x[0].place();
auto data_type = x[0].type();
auto data_type = x[0].dtype();
int64_t batch_size = x[0].dims()[0];
framework::DDim ins_dims = rank > 1
? framework::slice_ddim(x[0].dims(), 1, rank)
......@@ -124,12 +125,12 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
"The current place is %d, and the previous place is %d.",
i, x[i].place(), place));
PADDLE_ENFORCE_EQ(
x[i].type(), data_type,
x[i].dtype(), data_type,
platform::errors::InvalidArgument(
"The date type of the %zu'th element in LoDTensorArray "
"differs from previous ones."
"The current data type is %d, and the previous data type is %d.",
i, x[i].type(), data_type));
i, x[i].dtype(), data_type));
batch_size += x[i].dims()[0];
}
auto ins_dim_vec = framework::vectorize(ins_dims);
......
......@@ -151,15 +151,19 @@ framework::OpKernelType BatchNormOp::GetExpectedKernelType(
bn_param_type = framework::proto::VarType::FP64;
}
PADDLE_ENFORCE_EQ(
bn_param_type, ctx.Input<Tensor>("Scale")->type(),
bn_param_type,
framework::TransToProtoVarType(ctx.Input<Tensor>("Scale")->dtype()),
platform::errors::InvalidArgument("Scale input should be of float type"));
PADDLE_ENFORCE_EQ(
bn_param_type, ctx.Input<Tensor>("Bias")->type(),
bn_param_type,
framework::TransToProtoVarType(ctx.Input<Tensor>("Bias")->dtype()),
platform::errors::InvalidArgument("Bias input should be of float type"));
PADDLE_ENFORCE_EQ(
bn_param_type, ctx.Input<Tensor>("Mean")->type(),
bn_param_type,
framework::TransToProtoVarType(ctx.Input<Tensor>("Mean")->dtype()),
platform::errors::InvalidArgument("Mean input should be of float type"));
PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input<Tensor>("Variance")->type(),
PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType(
ctx.Input<Tensor>("Variance")->dtype()),
platform::errors::InvalidArgument(
"Variance input should be of float type"));
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#include <string>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/operators/beam_search_decode_op.h"
#include "paddle/fluid/platform/device_context.h"
......@@ -192,7 +193,7 @@ class BeamSearchDecodeOp : public framework::OperatorBase {
LoDTensor* sentenceScores = ctx.Output<LoDTensor>("SentenceScores");
framework::VisitDataType(
scores->at(0).type(),
framework::TransToProtoVarType(scores->at(0).dtype()),
BeamSearchDecodeFunctor(*ids, *scores, sentenceIds, sentenceScores,
beam_size, end_id));
}
......
......@@ -112,7 +112,7 @@ void BincountCUDAInner(const framework::ExecutionContext& context) {
PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
input_data, input_numel, has_weights, weights_data, output_data);
} else {
const auto& weights_type = weights->type();
const auto& weights_type = framework::TransToProtoVarType(weights->dtype());
if (weights_type == framework::proto::VarType::FP32) {
float* output_data = output->mutable_data<float>(context.GetPlace());
......@@ -141,7 +141,7 @@ class BincountCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<framework::Tensor>("X");
const auto& input_type = input->type();
const auto& input_type = framework::TransToProtoVarType(input->dtype());
if (input_type == framework::proto::VarType::INT32) {
BincountCUDAInner<DeviceContext, T, int>(context);
......
......@@ -61,7 +61,7 @@ void BincountInner(const framework::ExecutionContext& context) {
if (has_weights) {
const T* weights_data = weights->data<T>();
const auto& weights_type = weights->type();
const auto& weights_type = framework::TransToProtoVarType(weights->dtype());
if (weights_type == framework::proto::VarType::FP32) {
float* output_data = output->mutable_data<float>(context.GetPlace());
pten::funcs::SetConstant<DeviceContext, float>()(
......@@ -95,7 +95,7 @@ class BincountKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<framework::Tensor>("X");
const auto& input_type = input->type();
const auto& input_type = framework::TransToProtoVarType(input->dtype());
if (input_type == framework::proto::VarType::INT32) {
BincountInner<DeviceContext, T, int>(context);
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/cast_op.h"
#include <memory>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/float16.h"
#ifdef PADDLE_WITH_MLU
......@@ -82,7 +83,9 @@ class CastOp : public framework::OperatorWithKernel {
auto &tensor_place = tensor->place();
// NOTE: cuda pinned tensor need to copy its data to target place
if (platform::is_cuda_pinned_place(tensor_place)) {
return framework::OpKernelType(tensor->type(), ctx.device_context());
return framework::OpKernelType(
framework::TransToProtoVarType(tensor->dtype()),
ctx.device_context());
}
#ifdef PADDLE_WITH_MKLDNN
......@@ -100,26 +103,32 @@ class CastOp : public framework::OperatorWithKernel {
return true;
};
if (this->CanMKLDNNBeUsed(ctx, tensor->type()) && MKLDNNSupportsCast()) {
return framework::OpKernelType(tensor->type(), ctx.GetPlace(),
framework::DataLayout::kMKLDNN,
framework::LibraryType::kMKLDNN);
if (this->CanMKLDNNBeUsed(
ctx, framework::TransToProtoVarType(tensor->dtype())) &&
MKLDNNSupportsCast()) {
return framework::OpKernelType(
framework::TransToProtoVarType(tensor->dtype()), ctx.GetPlace(),
framework::DataLayout::kMKLDNN, framework::LibraryType::kMKLDNN);
}
#endif
#ifdef PADDLE_WITH_MLU
auto src_type = static_cast<VT::Type>(ctx.Attr<int>("in_dtype"));
auto dst_type = static_cast<VT::Type>(ctx.Attr<int>("out_dtype"));
if (src_type == dst_type || MLUSupportsCast(src_type, dst_type)) {
return framework::OpKernelType(tensor->type(), tensor_place);
return framework::OpKernelType(
framework::TransToProtoVarType(tensor->dtype()), tensor_place);
} else {
VLOG(3) << "MLU not support cast type: "
<< framework::DataTypeToString(src_type)
<< " to type: " << framework::DataTypeToString(dst_type)
<< ", fallbacking to CPU one!";
return framework::OpKernelType(tensor->type(), platform::CPUPlace());
return framework::OpKernelType(
framework::TransToProtoVarType(tensor->dtype()),
platform::CPUPlace());
}
#endif
return framework::OpKernelType(tensor->type(), tensor_place);
return framework::OpKernelType(
framework::TransToProtoVarType(tensor->dtype()), tensor_place);
}
};
......
......@@ -63,7 +63,7 @@ class CastOpKernel : public framework::OpKernel<InT> {
out->mutable_data(dev_ctx.GetPlace(),
static_cast<framework::proto::VarType::Type>(out_dtype));
auto pt_out_dtype = pten::TransToPtenDataType(
auto pt_out_dtype = framework::TransToPtenDataType(
static_cast<framework::proto::VarType::Type>(out_dtype));
// call new kernel
......
......@@ -43,7 +43,7 @@ class CastNPUKernel : public framework::OpKernel<T> {
auto* out = ctx.Output<Tensor>("Out");
auto place = ctx.GetPlace();
if (x->type() == dtype) {
if (framework::TransToProtoVarType(x->dtype()) == dtype) {
// NOTE(zhiqiu): NPU cast op may result in wrong value, so
// add special case here.
VLOG(4) << "cast to same dtype:" << dtype;
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include <memory>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/cast_op.h"
#include "paddle/fluid/platform/float16.h"
......@@ -45,7 +46,7 @@ class CastXPUKernel : public framework::OpKernel<InT> {
out->mutable_data(dev_ctx.GetPlace(),
static_cast<framework::proto::VarType::Type>(out_dtype));
auto pt_out_dtype = pten::TransToPtenDataType(
auto pt_out_dtype = framework::TransToPtenDataType(
static_cast<framework::proto::VarType::Type>(out_dtype));
// call pten kernel
pten::CastKernel<InT>(
......
......@@ -338,8 +338,9 @@ class ClassCenterSampleCUDAKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
num_classes_per_device_ptr, num_classes_per_device_ptr,
num_classes_per_device.numel(),
platform::ToNCCLDataType(num_classes_per_device.type()), ncclSum,
comm->comm(), calcu_stream));
platform::ToNCCLDataType(
framework::TransToProtoVarType(num_classes_per_device.dtype())),
ncclSum, comm->comm(), calcu_stream));
}
#endif
......
......@@ -23,6 +23,7 @@
#ifdef PADDLE_WITH_ASCEND_CL
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#endif
#include "paddle/fluid/framework/convert_utils.h"
namespace paddle {
namespace operators {
......@@ -53,7 +54,7 @@ struct FillConstantVisitor {
* = nullptr) const {
#ifdef PADDLE_WITH_ASCEND_CL
if (platform::is_npu_place(dev_ctx_.GetPlace())) {
Tensor tensor_tmp(dtype_);
Tensor tensor_tmp(framework::TransToPtenDataType(dtype_));
tensor_tmp.mutable_data<T>({1}, context_.GetPlace());
FillNpuTensorWithConstant<T>(&tensor_tmp, static_cast<T>(value_));
......@@ -193,7 +194,8 @@ class CoalesceTensorOpKernel : public framework::OpKernel<T> {
void *fused_tensor_ptr =
fused_tensor
->Resize(framework::make_ddim({static_cast<int64_t>(numel)}))
.mutable_data(context.GetPlace(), dtype);
.mutable_data(context.GetPlace(),
framework::TransToPtenDataType(dtype));
VLOG(10) << "Fused tensor addr " << fused_tensor_ptr;
// Init the continuous space
......
......@@ -41,7 +41,8 @@ class AllReduceOpKernel : public framework::OpKernel<T> {
auto in = ctx.Input<framework::Tensor>("X");
auto out = ctx.Output<framework::Tensor>("Out");
int dtype = platform::ToNCCLDataType(in->type());
int dtype =
platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));
int64_t numel = in->numel();
auto* sendbuff = in->data();
out->Resize(in->dims());
......
......@@ -31,7 +31,8 @@ class AllToAllOpCUDAKernel : public framework::OpKernel<T> {
auto x = ctx.Input<framework::LoDTensor>("X");
auto out = ctx.Output<framework::LoDTensor>("Out");
int send_numel = x->numel();
ncclDataType_t dtype = platform::ToNCCLDataType(x->type());
ncclDataType_t dtype =
platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype()));
int ring_id = ctx.Attr<int>("ring_id");
PADDLE_ENFORCE_GE(
......
......@@ -31,7 +31,8 @@ class BarrierOpCUDAKernel : public framework::OpKernel<T> {
auto out = ctx.Output<framework::Tensor>("Out");
auto place = ctx.GetPlace();
ncclDataType_t dtype = platform::ToNCCLDataType(in->type());
ncclDataType_t dtype =
platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));
int64_t numel = in->numel();
const void* sendbuff = in->data();
void* recvbuff = out->mutable_data<T>(place);
......
......@@ -17,6 +17,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#endif
#include "paddle/fluid/framework/convert_utils.h"
namespace ops = paddle::operators;
namespace plat = paddle::platform;
......@@ -56,7 +57,8 @@ class NCCLBroadcastOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast(
send_recv_buffer, static_cast<size_t>(in->numel()),
platform::ToNCCLDataType(in->type()), root_dev_id, comm, stream));
platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype())),
root_dev_id, comm, stream));
VLOG(3) << "Bcast " << ctx.InputNames("X")[0] << ", (" << in->numel() << ")"
<< " From " << root_dev_id << " to " << dev_id;
......
......@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#endif
#include "paddle/fluid/framework/convert_utils.h"
namespace paddle {
namespace operators {
......@@ -29,7 +30,8 @@ class CAllGatherOpCUDAKernel : public framework::OpKernel<T> {
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
auto in = ctx.Input<framework::Tensor>("X");
auto out = ctx.Output<framework::Tensor>("Out");
ncclDataType_t dtype = platform::ToNCCLDataType(in->type());
ncclDataType_t dtype =
platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));
int nranks = ctx.Attr<int>("nranks");
int rid = ctx.Attr<int>("ring_id");
......
......@@ -31,7 +31,8 @@ class CAllGatherOpASCENDKernel : public framework::OpKernel<T> {
#if defined(PADDLE_WITH_ASCEND_CL)
auto in = ctx.Input<framework::Tensor>("X");
auto out = ctx.Output<framework::Tensor>("Out");
HcclDataType dtype = platform::ToHCCLDataType(in->type());
HcclDataType dtype =
platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype()));
int ring_id = ctx.Attr<int>("ring_id");
std::string group =
......
......@@ -173,7 +173,8 @@ class CAllReduceOpASCENDKernel : public framework::OpKernel<T> {
auto in = ctx.Input<framework::Tensor>("X");
auto out = ctx.Output<framework::Tensor>("Out");
auto place = ctx.GetPlace();
HcclDataType dtype = platform::ToHCCLDataType(in->type());
HcclDataType dtype =
platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype()));
int64_t numel = in->numel();
void* sendbuff = reinterpret_cast<void*>(const_cast<T*>(in->data<T>()));
......@@ -231,7 +232,7 @@ class CAllReduceOpASCENDKernel : public framework::OpKernel<T> {
bool found_nan = false;
auto d_type = in->type();
auto d_type = framework::TransToProtoVarType(in->dtype());
switch (d_type) {
case framework::proto::VarType::FP16: {
break;
......@@ -284,7 +285,8 @@ class CAllReduceOpXPUKernel : public framework::OpKernel<T> {
auto out = ctx.Output<framework::Tensor>("Out");
auto place = ctx.GetPlace();
BKCLDataType dtype = platform::ToBKCLDataType(in->type());
BKCLDataType dtype =
platform::ToBKCLDataType(framework::TransToProtoVarType(in->dtype()));
int64_t numel = in->numel();
const void* sendbuff = in->data<T>();
out->Resize(in->dims());
......@@ -346,7 +348,8 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel<T> {
auto out = ctx.Output<framework::Tensor>("Out");
auto place = ctx.GetPlace();
ncclDataType_t dtype = platform::ToNCCLDataType(in->type());
ncclDataType_t dtype =
platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));
int64_t numel = in->numel();
const void* sendbuff = in->data<T>();
out->Resize(in->dims());
......
......@@ -30,7 +30,8 @@ class CBroadcastOpCUDAKernel : public framework::OpKernel<T> {
auto x = ctx.Input<framework::LoDTensor>("X");
auto out = ctx.Output<framework::LoDTensor>("Out");
int numel = x->numel();
ncclDataType_t dtype = platform::ToNCCLDataType(x->type());
ncclDataType_t dtype =
platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype()));
int rid = ctx.Attr<int>("ring_id");
auto place = ctx.GetPlace();
......
......@@ -30,7 +30,8 @@ class CBroadcastOpASCENDKernel : public framework::OpKernel<T> {
auto x = ctx.Input<framework::LoDTensor>("X");
void* ptr = reinterpret_cast<void*>(const_cast<T*>(x->data<T>()));
int numel = x->numel();
HcclDataType dtype = platform::ToHCCLDataType(x->type());
HcclDataType dtype =
platform::ToHCCLDataType(framework::TransToProtoVarType(x->dtype()));
auto out = ctx.Output<framework::LoDTensor>("Out");
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册