未验证 提交 7e7e9404 编写于 作者: A Aurelius84 提交者: GitHub

[PTen]Migrate proto::VarType outside of Pten (#39411)

* #1 migrate dist-related type()-> dtype()

* move datatype function from pten -> fluid/framework

* change type() in imperative into convert(dtype())

* modify xx_tensor->type into xx_tensor->dtype

* change the set_type interface and the caller

* modify xx_tensor.type into xx_tensor.dtype

* fix mutable_data(place, dtype())

* change caller of mutable_data in pten and distributed

* change the caller of mutable_data in fluid/framework

* change the caller of mutable_data in imperative directory

* mutable_data: inference

* update the call of mutable_data

* transfer MakePenScalarArray MakePtenScalar ResetHolderWithType

* pass the compile. the next step is remove VarType in Pten

* fix all and remove VarType from pten. success in linux. Next task is other platform

* fix conflict with develop

* fix compiled error

* Fix reset conversion

* fix conflict

* fix compiled problem

* fix typo

* Fix << in tensor_utils.cc

* fix type->dtype

* fix unittest

* fix tensor init constructor

* fix DataTypeSize for BFloat16

* fix code style

* fix npu compiled error

* fix npu

* compile npu sucessfully

* fix conflict

* fix conflict
Co-authored-by: Nxiongkun <xiongkun03@baidu.com>
上级 9c2cee1c
...@@ -562,7 +562,7 @@ bool DistModel::FetchResults(std::vector<DistModelTensor> *output_data, ...@@ -562,7 +562,7 @@ bool DistModel::FetchResults(std::vector<DistModelTensor> *output_data,
framework::FetchType &fetch_var = framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx); framework::GetFetchVariable(*scope, "fetch", idx);
auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var); auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var);
auto type = fetch.type(); auto type = framework::TransToProtoVarType(fetch.dtype());
auto output = &(output_data->at(i)); auto output = &(output_data->at(i));
output->name = idx_to_fetches_[idx]; output->name = idx_to_fetches_[idx];
bool rst = false; bool rst = false;
......
...@@ -13,8 +13,11 @@ See the License for the specific language governing permissions and ...@@ -13,8 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/distributed/ps/service/brpc_utils.h" #include "paddle/fluid/distributed/ps/service/brpc_utils.h"
#include <arpa/inet.h> #include <arpa/inet.h>
#include <netdb.h> #include <netdb.h>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
...@@ -98,25 +101,29 @@ void SerializeLodTensor(framework::Variable* var, ...@@ -98,25 +101,29 @@ void SerializeLodTensor(framework::Variable* var,
} }
} }
} }
var_msg->set_data_type(static_cast<VarMsg::Type>(tensor->type())); var_msg->set_data_type(static_cast<VarMsg::Type>(
framework::TransToProtoVarType(tensor->dtype())));
for (auto& dim : framework::vectorize(tensor->dims())) { for (auto& dim : framework::vectorize(tensor->dims())) {
var_msg->add_dims(dim); var_msg->add_dims(dim);
} }
// IO Buffer // IO Buffer
if (platform::is_cpu_place(tensor->place())) { if (platform::is_cpu_place(tensor->place())) {
auto data_len = tensor->numel() * framework::SizeOfType(tensor->type()); auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8); iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(tensor->data()), data_len); iobuf->append(reinterpret_cast<const char*>(tensor->data()), data_len);
} else { } else {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
char* temp_ptr = new char[tensor->numel() * char* temp_ptr =
framework::SizeOfType(tensor->type())]; // NOLINT new char[tensor->numel() *
framework::DataTypeSize(tensor->dtype())]; // NOLINT
auto stream = auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream(); reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy( memory::Copy(
platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(), platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(),
tensor->numel() * framework::SizeOfType(tensor->type()), stream); tensor->numel() * framework::SizeOfType(
auto data_len = tensor->numel() * framework::SizeOfType(tensor->type()); framework::TransToProtoVarType(tensor->dtype())),
stream);
auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8); iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(temp_ptr), data_len); iobuf->append(reinterpret_cast<const char*>(temp_ptr), data_len);
delete[] temp_ptr; delete[] temp_ptr;
...@@ -139,25 +146,29 @@ void SerializeSelectedRows(framework::Variable* var, ...@@ -139,25 +146,29 @@ void SerializeSelectedRows(framework::Variable* var,
var_data->resize(rows->size() * sizeof(int64_t)); var_data->resize(rows->size() * sizeof(int64_t));
char* data_ptr = const_cast<char*>(var_data->data()); char* data_ptr = const_cast<char*>(var_data->data());
memcpy(data_ptr, &((*rows)[0]), rows->size() * sizeof(int64_t)); memcpy(data_ptr, &((*rows)[0]), rows->size() * sizeof(int64_t));
var_msg->set_data_type(static_cast<VarMsg::Type>(tensor->type())); var_msg->set_data_type(static_cast<VarMsg::Type>(
framework::TransToProtoVarType(tensor->dtype())));
for (auto& dim : framework::vectorize(tensor->dims())) { for (auto& dim : framework::vectorize(tensor->dims())) {
var_msg->add_dims(dim); var_msg->add_dims(dim);
} }
// IO Buffer // IO Buffer
if (platform::is_cpu_place(tensor->place())) { if (platform::is_cpu_place(tensor->place())) {
auto data_len = tensor->numel() * framework::SizeOfType(tensor->type()); auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8); iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(tensor->data()), data_len); iobuf->append(reinterpret_cast<const char*>(tensor->data()), data_len);
} else { } else {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
char* temp_ptr = new char[tensor->numel() * char* temp_ptr =
framework::SizeOfType(tensor->type())]; // NOLINT new char[tensor->numel() *
framework::DataTypeSize(tensor->dtype())]; // NOLINT
auto stream = auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream(); reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy( memory::Copy(
platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(), platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(),
tensor->numel() * framework::SizeOfType(tensor->type()), stream); tensor->numel() * framework::SizeOfType(
auto data_len = tensor->numel() * framework::SizeOfType(tensor->type()); framework::TransToProtoVarType(tensor->dtype())),
stream);
auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
iobuf->append(reinterpret_cast<const char*>(&data_len), 8); iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
iobuf->append(reinterpret_cast<const char*>(temp_ptr), data_len); iobuf->append(reinterpret_cast<const char*>(temp_ptr), data_len);
delete[] temp_ptr; delete[] temp_ptr;
...@@ -225,8 +236,9 @@ void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg, ...@@ -225,8 +236,9 @@ void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg,
} }
tensor->set_lod(lod); tensor->set_lod(lod);
void* tensor_data = void* tensor_data = tensor->mutable_data(
tensor->mutable_data(place, VarMessageToVarType(msg.data_type())); place,
framework::TransToPtenDataType(VarMessageToVarType(msg.data_type())));
// IO Buffer // IO Buffer
if (platform::is_cpu_place(place)) { if (platform::is_cpu_place(place)) {
...@@ -236,15 +248,16 @@ void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg, ...@@ -236,15 +248,16 @@ void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg,
} else if (platform::is_gpu_place(place)) { } else if (platform::is_gpu_place(place)) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
unsigned long data_len; // NOLINT unsigned long data_len; // NOLINT
char* temp_ptr = new char[tensor->numel() * char* temp_ptr =
framework::SizeOfType(tensor->type())]; // NOLINT new char[tensor->numel() *
io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT framework::DataTypeSize(tensor->dtype())]; // NOLINT
io_buffer_itr.copy_and_forward((void*)temp_ptr, data_len); // NOLINT io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT
io_buffer_itr.copy_and_forward((void*)temp_ptr, data_len); // NOLINT
auto stream = auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream(); reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy( memory::Copy(
place, tensor_data, platform::CPUPlace(), (void*)temp_ptr, // NOLINT place, tensor_data, platform::CPUPlace(), (void*)temp_ptr, // NOLINT
tensor->numel() * framework::SizeOfType(tensor->type()), stream); tensor->numel() * framework::DataTypeSize(tensor->dtype()), stream);
delete[] temp_ptr; delete[] temp_ptr;
#endif #endif
} }
...@@ -266,8 +279,9 @@ void DeserializeSelectedRows( ...@@ -266,8 +279,9 @@ void DeserializeSelectedRows(
vec_dim.push_back(x); vec_dim.push_back(x);
} }
tensor->Resize(framework::make_ddim(vec_dim)); tensor->Resize(framework::make_ddim(vec_dim));
void* tensor_data = void* tensor_data = tensor->mutable_data(
tensor->mutable_data(place, VarMessageToVarType(msg.data_type())); place,
framework::TransToPtenDataType(VarMessageToVarType(msg.data_type())));
// IO Buffer // IO Buffer
if (platform::is_cpu_place(place)) { if (platform::is_cpu_place(place)) {
unsigned long data_len; // NOLINT unsigned long data_len; // NOLINT
...@@ -275,15 +289,16 @@ void DeserializeSelectedRows( ...@@ -275,15 +289,16 @@ void DeserializeSelectedRows(
io_buffer_itr.copy_and_forward(tensor_data, data_len); io_buffer_itr.copy_and_forward(tensor_data, data_len);
} else if (platform::is_gpu_place(place)) { } else if (platform::is_gpu_place(place)) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
char* temp_ptr = new char[tensor->numel() * char* temp_ptr =
framework::SizeOfType(tensor->type())]; // NOLINT new char[tensor->numel() *
unsigned long data_len; // NOLINT framework::DataTypeSize(tensor->dtype())]; // NOLINT
io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT unsigned long data_len; // NOLINT
io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT
io_buffer_itr.copy_and_forward(temp_ptr, data_len); io_buffer_itr.copy_and_forward(temp_ptr, data_len);
auto stream = auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream(); reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(place, tensor_data, platform::CPUPlace(), temp_ptr, memory::Copy(place, tensor_data, platform::CPUPlace(), temp_ptr,
tensor->numel() * framework::SizeOfType(tensor->type()), tensor->numel() * framework::DataTypeSize(tensor->dtype()),
stream); stream);
delete[] temp_ptr; delete[] temp_ptr;
#endif #endif
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/distributed/ps/service/heter_client.h" #include "paddle/fluid/distributed/ps/service/heter_client.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/string/split.h" #include "paddle/fluid/string/split.h"
...@@ -39,13 +41,13 @@ int GetMicroId(const platform::DeviceContext& ctx, ...@@ -39,13 +41,13 @@ int GetMicroId(const platform::DeviceContext& ctx,
} else { } else {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
std::vector<char> temp; std::vector<char> temp;
temp.resize(tensor->numel() * framework::SizeOfType(tensor->type())); temp.resize(tensor->numel() * framework::DataTypeSize(tensor->dtype()));
char* temp_ptr = temp.data(); char* temp_ptr = temp.data();
auto stream = auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream(); reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy( memory::Copy(
platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(), platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(),
tensor->numel() * framework::SizeOfType(tensor->type()), stream); tensor->numel() * framework::DataTypeSize(tensor->dtype()), stream);
float* temp_ptr_float = reinterpret_cast<float*>(temp_ptr); float* temp_ptr_float = reinterpret_cast<float*>(temp_ptr);
micro_id = static_cast<int>(temp_ptr_float[0]); micro_id = static_cast<int>(temp_ptr_float[0]);
#endif #endif
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "paddle/fluid/eager/grad_tensor_holder.h" #include "paddle/fluid/eager/grad_tensor_holder.h"
#include "paddle/fluid/imperative/gradient_accumulator.h" #include "paddle/fluid/imperative/gradient_accumulator.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/framework/var_type.h"
#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/pten/kernels/funcs/math_function.h"
......
...@@ -452,4 +452,10 @@ endif() ...@@ -452,4 +452,10 @@ endif()
cc_test(scope_guard_test SRCS scope_guard_test.cc) cc_test(scope_guard_test SRCS scope_guard_test.cc)
cc_test(pten_utils_test SRCS pten_utils_test.cc DEPS pten_utils) cc_test(pten_utils_test SRCS pten_utils_test.cc DEPS pten_utils)
if(WITH_GPU OR WITH_ROCM)
cc_library(fluid_convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info)
else()
cc_library(fluid_convert_utils SRCS convert_utils.cc DEPS data_type place)
endif()
cc_test(custom_kernel_test SRCS custom_kernel_test.cc DEPS custom_kernel pten_tensor) cc_test(custom_kernel_test SRCS custom_kernel_test.cc DEPS custom_kernel pten_tensor)
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/convert_utils.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
namespace paddle {
namespace framework {
paddle::experimental::DataType TransToPtenDataType(
const paddle::framework::proto::VarType::Type& dtype) {
// Set the order of case branches according to the frequency with
// the data type is used
switch (dtype) {
case paddle::framework::proto::VarType::FP32:
return DataType::FLOAT32;
case paddle::framework::proto::VarType::FP64:
return DataType::FLOAT64;
case paddle::framework::proto::VarType::INT64:
return DataType::INT64;
case paddle::framework::proto::VarType::INT32:
return DataType::INT32;
case paddle::framework::proto::VarType::INT8:
return DataType::INT8;
case paddle::framework::proto::VarType::UINT8:
return DataType::UINT8;
case paddle::framework::proto::VarType::INT16:
return DataType::INT16;
case paddle::framework::proto::VarType::COMPLEX64:
return DataType::COMPLEX64;
case paddle::framework::proto::VarType::COMPLEX128:
return DataType::COMPLEX128;
case paddle::framework::proto::VarType::FP16:
return DataType::FLOAT16;
case paddle::framework::proto::VarType::BF16:
return DataType::BFLOAT16;
case paddle::framework::proto::VarType::BOOL:
return DataType::BOOL;
default:
return DataType::UNDEFINED;
}
}
paddle::framework::proto::VarType::Type TransToProtoVarType(
const paddle::experimental::DataType& dtype) {
// Set the order of case branches according to the frequency with
// the data type is used
switch (dtype) {
case DataType::FLOAT32:
return paddle::framework::proto::VarType::FP32;
case DataType::FLOAT64:
return paddle::framework::proto::VarType::FP64;
case DataType::INT64:
return paddle::framework::proto::VarType::INT64;
case DataType::INT32:
return paddle::framework::proto::VarType::INT32;
case DataType::INT8:
return paddle::framework::proto::VarType::INT8;
case DataType::UINT8:
return paddle::framework::proto::VarType::UINT8;
case DataType::INT16:
return paddle::framework::proto::VarType::INT16;
case DataType::COMPLEX64:
return paddle::framework::proto::VarType::COMPLEX64;
case DataType::COMPLEX128:
return paddle::framework::proto::VarType::COMPLEX128;
case DataType::FLOAT16:
return paddle::framework::proto::VarType::FP16;
case DataType::BFLOAT16:
return paddle::framework::proto::VarType::BF16;
case DataType::BOOL:
return paddle::framework::proto::VarType::BOOL;
default:
PADDLE_THROW(paddle::platform::errors::Unimplemented(
"Unsupported data type `%s` when casting it into "
"paddle data type.",
dtype));
}
}
size_t DataTypeSize(DataType dtype) {
switch (dtype) {
case DataType::UNDEFINED:
return 0;
case DataType::BOOL:
return sizeof(bool);
case DataType::INT8:
return sizeof(int8_t);
case DataType::UINT8:
return sizeof(uint8_t);
case DataType::INT16:
return sizeof(int16_t);
case DataType::INT32:
return sizeof(int);
case DataType::INT64:
return sizeof(int64_t);
case DataType::BFLOAT16:
return sizeof(paddle::platform::bfloat16);
case DataType::FLOAT16:
return sizeof(paddle::platform::float16);
case DataType::FLOAT32:
return sizeof(float);
case DataType::FLOAT64:
return sizeof(double);
case DataType::COMPLEX64:
return sizeof(paddle::platform::complex<float>);
case DataType::COMPLEX128:
return sizeof(paddle::platform::complex<double>);
default:
return 0;
}
}
DataType String2DataType(const std::string& str) {
if (str == "bool") {
return DataType::BOOL;
} else if (str == "float16") {
return DataType::FLOAT16;
} else if (str == "float32") {
return DataType::FLOAT32;
} else if (str == "float64") {
return DataType::FLOAT64;
} else if (str == "int8") {
return DataType::INT8;
} else if (str == "int16") {
return DataType::INT16;
} else if (str == "int32") {
return DataType::INT32;
} else if (str == "int64") {
return DataType::INT64;
} else if (str == "uint8") {
return DataType::UINT8;
} else if (str == "complex64") {
return DataType::COMPLEX64;
} else if (str == "complex128") {
return DataType::COMPLEX128;
} else {
return DataType::UNDEFINED;
}
}
std::string DataType2String(DataType dtype) {
switch (dtype) {
case DataType::BOOL:
return "bool";
case DataType::INT8:
return "int8";
case DataType::UINT8:
return "uint8";
case DataType::INT16:
return "int16";
case DataType::INT32:
return "int32";
case DataType::INT64:
return "int64";
case DataType::FLOAT16:
return "float16";
case DataType::FLOAT32:
return "float32";
case DataType::FLOAT64:
return "float64";
case DataType::COMPLEX64:
return "complex64";
case DataType::COMPLEX128:
return "complex128";
default:
PADDLE_THROW(paddle::platform::errors::InvalidArgument(
"Unknow pten::DataType, the int value = %d.",
static_cast<int>(dtype)));
return "";
}
}
} // namespace framework
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/pten/common/backend.h"
#include "paddle/pten/common/data_type.h"
#include "paddle/pten/common/layout.h"
#include "paddle/pten/core/tensor_meta.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/platform/place.h"
// TODO(chenweihang): this file may need to be removed
namespace paddle {
namespace framework {
using DataType = paddle::experimental::DataType;
using DataLayout = paddle::experimental::DataLayout;
DataType TransToPtenDataType(
const paddle::framework::proto::VarType::Type& dtype);
paddle::framework::proto::VarType::Type TransToProtoVarType(
const DataType& dtype);
size_t DataTypeSize(DataType dtype);
DataType String2DataType(const std::string& str);
std::string DataType2String(DataType dtype);
} // namespace framework
} // namespace paddle
...@@ -26,6 +26,7 @@ limitations under the License. */ ...@@ -26,6 +26,7 @@ limitations under the License. */
#include <vector> #include <vector>
#include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/op_meta_info_helper.h" #include "paddle/fluid/framework/op_meta_info_helper.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
...@@ -777,12 +778,14 @@ void RegisterOperatorWithMetaInfo(const std::vector<OpMetaInfo>& op_meta_infos, ...@@ -777,12 +778,14 @@ void RegisterOperatorWithMetaInfo(const std::vector<OpMetaInfo>& op_meta_infos,
std::vector<DataType> vec_custom_dtype; std::vector<DataType> vec_custom_dtype;
for (size_t i = 0; i < ctx->InputSize(in_name); ++i) { for (size_t i = 0; i < ctx->InputSize(in_name); ++i) {
auto dtype = ctx->GetInputDataType(in_name, i); auto dtype = ctx->GetInputDataType(in_name, i);
vec_custom_dtype.emplace_back(pten::TransToPtenDataType(dtype)); vec_custom_dtype.emplace_back(
paddle::framework::TransToPtenDataType(dtype));
} }
vec_input_dtypes.emplace_back(vec_custom_dtype); vec_input_dtypes.emplace_back(vec_custom_dtype);
} else { } else {
auto dtype = ctx->GetInputDataType(in_name); auto dtype = ctx->GetInputDataType(in_name);
input_dtypes.emplace_back(pten::TransToPtenDataType(dtype)); input_dtypes.emplace_back(
paddle::framework::TransToPtenDataType(dtype));
} }
} }
...@@ -794,12 +797,14 @@ void RegisterOperatorWithMetaInfo(const std::vector<OpMetaInfo>& op_meta_infos, ...@@ -794,12 +797,14 @@ void RegisterOperatorWithMetaInfo(const std::vector<OpMetaInfo>& op_meta_infos,
auto out_name = op_outputs[i]; auto out_name = op_outputs[i];
if (detail::IsDuplicableVar(out_name)) { if (detail::IsDuplicableVar(out_name)) {
for (size_t j = 0; j < output_dtypes.size(); ++j) { for (size_t j = 0; j < output_dtypes.size(); ++j) {
auto dtype = pten::TransToProtoVarType(output_dtypes[i]); auto dtype =
paddle::framework::TransToProtoVarType(output_dtypes[i]);
ctx->SetOutputDataType(out_name, dtype, j); ctx->SetOutputDataType(out_name, dtype, j);
} }
} else { } else {
ctx->SetOutputDataType(out_name, ctx->SetOutputDataType(
pten::TransToProtoVarType(output_dtypes[i])); out_name,
paddle::framework::TransToProtoVarType(output_dtypes[i]));
} }
} }
}; };
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/fluid/platform/mkldnn_reuse.h"
#endif #endif
#include "paddle/fluid/framework/convert_utils.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -79,10 +80,10 @@ void TransDataLayout(const OpKernelType& kernel_type_for_var, ...@@ -79,10 +80,10 @@ void TransDataLayout(const OpKernelType& kernel_type_for_var,
} }
out->Resize(make_ddim(dst_dim)); out->Resize(make_ddim(dst_dim));
out->mutable_data(expected_kernel_type.place_, in.type()); out->mutable_data(expected_kernel_type.place_, in.dtype());
framework::VisitDataType( framework::VisitDataType(
in.type(), framework::TransToProtoVarType(in.dtype()),
CastDataLayout(pool.Get(expected_kernel_type.place_), axis, in, out)); CastDataLayout(pool.Get(expected_kernel_type.place_), axis, in, out));
out->set_layout(expected_kernel_type.data_layout_); out->set_layout(expected_kernel_type.data_layout_);
...@@ -153,11 +154,13 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout, ...@@ -153,11 +154,13 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
auto in_tz = paddle::framework::vectorize<int64_t>(in.dims()); auto in_tz = paddle::framework::vectorize<int64_t>(in.dims());
auto out_tz = in_tz; auto out_tz = in_tz;
memory::data_type in_type = ToMKLDNNDataType(in.type()); memory::data_type in_type =
PADDLE_ENFORCE_NE(in_type, memory::data_type::undef, ToMKLDNNDataType(framework::TransToProtoVarType(in.dtype()));
platform::errors::InvalidArgument( PADDLE_ENFORCE_NE(
"Input tensor type (%s) is not supported.", in_type, memory::data_type::undef,
DataTypeToString(in.type()))); platform::errors::InvalidArgument(
"Input tensor type (%s) is not supported.",
DataTypeToString(framework::TransToProtoVarType(in.dtype()))));
auto in_format = platform::MKLDNNFormatForSize(in_tz.size(), in.format()); auto in_format = platform::MKLDNNFormatForSize(in_tz.size(), in.format());
auto out_format = auto out_format =
...@@ -169,8 +172,8 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout, ...@@ -169,8 +172,8 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
if ((in_format != out_format) || always_copy) { if ((in_format != out_format) || always_copy) {
void* in_data = GetDataFromTensor(in, in_type); void* in_data = GetDataFromTensor(in, in_type);
platform::ReorderMKLDNNHandler handler(in_tz, in.type(), in_type, platform::ReorderMKLDNNHandler handler(
cpu_engine); in_tz, framework::TransToProtoVarType(in.dtype()), in_type, cpu_engine);
auto reorder_src_memory_p = handler.AcquireSrcMemory(in_format, in_data); auto reorder_src_memory_p = handler.AcquireSrcMemory(in_format, in_data);
auto reorder_dst_memory_p = auto reorder_dst_memory_p =
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <string> #include <string>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
TEST(DataType, float16) { TEST(DataType, float16) {
...@@ -27,10 +28,11 @@ TEST(DataType, float16) { ...@@ -27,10 +28,11 @@ TEST(DataType, float16) {
Tensor tensor; Tensor tensor;
CPUPlace cpu; CPUPlace cpu;
tensor.mutable_data(cpu, dtype); tensor.mutable_data(cpu, f::TransToPtenDataType(dtype));
// test fp16 tensor // test fp16 tensor
EXPECT_EQ(tensor.type(), f::ToDataType(typeid(float16))); EXPECT_EQ(f::TransToProtoVarType(tensor.dtype()),
f::ToDataType(typeid(float16)));
// test fp16 size // test fp16 size
EXPECT_EQ(f::SizeOfType(dtype), 2u); EXPECT_EQ(f::SizeOfType(dtype), 2u);
...@@ -49,10 +51,11 @@ TEST(DataType, bfloat16) { ...@@ -49,10 +51,11 @@ TEST(DataType, bfloat16) {
Tensor tensor; Tensor tensor;
CPUPlace cpu; CPUPlace cpu;
tensor.mutable_data(cpu, dtype); tensor.mutable_data(cpu, f::TransToPtenDataType(dtype));
// test bf16 tensor // test bf16 tensor
EXPECT_EQ(tensor.type(), f::ToDataType(typeid(bfloat16))); EXPECT_EQ(f::TransToProtoVarType(tensor.dtype()),
f::ToDataType(typeid(bfloat16)));
// test bf16 size // test bf16 size
EXPECT_EQ(f::SizeOfType(dtype), 2u); EXPECT_EQ(f::SizeOfType(dtype), 2u);
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/data_type_transform.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/selected_rows_utils.h" #include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/platform/transform.h" #include "paddle/fluid/platform/transform.h"
...@@ -65,12 +66,14 @@ struct CastDataType { ...@@ -65,12 +66,14 @@ struct CastDataType {
void TransDataType(const OpKernelType& kernel_type_for_var, void TransDataType(const OpKernelType& kernel_type_for_var,
const OpKernelType& expected_kernel_type, const Tensor& in, const OpKernelType& expected_kernel_type, const Tensor& in,
Tensor* out) { Tensor* out) {
PADDLE_ENFORCE_EQ(in.type(), kernel_type_for_var.data_type_, PADDLE_ENFORCE_EQ(
platform::errors::InvalidArgument( framework::TransToProtoVarType(in.dtype()),
"The src dtype(%s) of input tensor and kernel_type(%s) " kernel_type_for_var.data_type_,
"are not conststent.", platform::errors::InvalidArgument(
DataTypeToString(in.type()), "The src dtype(%s) of input tensor and kernel_type(%s) "
DataTypeToString(kernel_type_for_var.data_type_))); "are not conststent.",
DataTypeToString(framework::TransToProtoVarType(in.dtype())),
DataTypeToString(kernel_type_for_var.data_type_)));
auto dst_type = expected_kernel_type.data_type_; auto dst_type = expected_kernel_type.data_type_;
TransDataType(in, dst_type, out); TransDataType(in, dst_type, out);
} }
...@@ -81,7 +84,7 @@ void TransDataType(const Tensor& in, ...@@ -81,7 +84,7 @@ void TransDataType(const Tensor& in,
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
out->Resize(in.dims()); out->Resize(in.dims());
auto src_type = in.type(); auto src_type = framework::TransToProtoVarType(in.dtype());
auto dst_type = type; auto dst_type = type;
auto ctx = pool.Get(in.place()); auto ctx = pool.Get(in.place());
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/all_reduce_op_handle.h" #include "paddle/fluid/framework/details/all_reduce_op_handle.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h" #include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
...@@ -127,7 +128,7 @@ void AllReduceOpHandle::AllReduceImpl( ...@@ -127,7 +128,7 @@ void AllReduceOpHandle::AllReduceImpl(
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
"The numel of tensor %s should be > 0, but got numel is %d.", "The numel of tensor %s should be > 0, but got numel is %d.",
in_var_handles[i]->name(), numel)); in_var_handles[i]->name(), numel));
dtype = lod_tensor.type(); dtype = framework::TransToProtoVarType(lod_tensor.dtype());
is_gpu_place = platform::is_gpu_place(lod_tensor.place()); is_gpu_place = platform::is_gpu_place(lod_tensor.place());
#if defined(PADDLE_WITH_XPU_BKCL) #if defined(PADDLE_WITH_XPU_BKCL)
is_xpu_place = platform::is_xpu_place(lod_tensor.place()); is_xpu_place = platform::is_xpu_place(lod_tensor.place());
...@@ -139,7 +140,7 @@ void AllReduceOpHandle::AllReduceImpl( ...@@ -139,7 +140,7 @@ void AllReduceOpHandle::AllReduceImpl(
"The size of tensors of the same variable in different local " "The size of tensors of the same variable in different local "
"scopes should be equal.")); "scopes should be equal."));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
dtype, lod_tensor.type(), dtype, framework::TransToProtoVarType(lod_tensor.dtype()),
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
"The dtype of tensors of the same variable in different local " "The dtype of tensors of the same variable in different local "
"scopes should be equal.")); "scopes should be equal."));
...@@ -227,14 +228,15 @@ void AllReduceOpHandle::AllReduceFunc( ...@@ -227,14 +228,15 @@ void AllReduceOpHandle::AllReduceFunc(
// Reduce All Tensor to trg in CPU // Reduce All Tensor to trg in CPU
ReduceBufferData func(lod_tensor_data, trg.data(), numel); ReduceBufferData func(lod_tensor_data, trg.data(), numel);
VisitDataType(trg.type(), func); VisitDataType(framework::TransToProtoVarType(trg.dtype()), func);
for (size_t i = 1; i < local_exec_scopes_.size(); ++i) { for (size_t i = 1; i < local_exec_scopes_.size(); ++i) {
auto &scope = local_exec_scopes_[i]; auto &scope = local_exec_scopes_[i];
auto &p = places[i]; auto &p = places[i];
auto *var = scope->FindVar(out_var_names[i]); auto *var = scope->FindVar(out_var_names[i]);
size_t size = numel * SizeOfType(trg.type()); size_t size =
numel * SizeOfType(framework::TransToProtoVarType(trg.dtype()));
RunAndRecordEvent(p, [&trg, var, p, size] { RunAndRecordEvent(p, [&trg, var, p, size] {
auto dst_ptr = var->GetMutable<framework::LoDTensor>()->data(); auto dst_ptr = var->GetMutable<framework::LoDTensor>()->data();
platform::CPUPlace cpu_place; platform::CPUPlace cpu_place;
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "paddle/fluid/framework/details/broadcast_op_handle.h" #include "paddle/fluid/framework/details/broadcast_op_handle.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
...@@ -87,7 +88,8 @@ void BroadcastOpHandle::BroadcastOneVar( ...@@ -87,7 +88,8 @@ void BroadcastOpHandle::BroadcastOneVar(
int root_id = in_tensor.place().device; int root_id = in_tensor.place().device;
std::vector<std::function<void()>> broadcast_calls; std::vector<std::function<void()>> broadcast_calls;
int type = platform::ToNCCLDataType(in_tensor.type()); int type = platform::ToNCCLDataType(
framework::TransToProtoVarType(in_tensor.dtype()));
size_t numel = static_cast<size_t>(in_tensor.numel()); size_t numel = static_cast<size_t>(in_tensor.numel());
for (auto out_var_handle : out_var_handles) { for (auto out_var_handle : out_var_handles) {
...@@ -147,7 +149,8 @@ void BroadcastOpHandle::BroadcastOneVar( ...@@ -147,7 +149,8 @@ void BroadcastOpHandle::BroadcastOneVar(
int root_id = in_tensor.place().device; int root_id = in_tensor.place().device;
std::vector<std::function<void()>> broadcast_calls; std::vector<std::function<void()>> broadcast_calls;
int type = platform::ToBKCLDataType(in_tensor.type()); int type = platform::ToBKCLDataType(
framework::TransToProtoVarType(in_tensor.dtype()));
size_t numel = static_cast<size_t>(in_tensor.numel()); size_t numel = static_cast<size_t>(in_tensor.numel());
for (auto out_var_handle : out_var_handles) { for (auto out_var_handle : out_var_handles) {
...@@ -239,7 +242,7 @@ void BroadcastOpHandle::InitOutputValue( ...@@ -239,7 +242,7 @@ void BroadcastOpHandle::InitOutputValue(
} }
VariableVisitor::ShareDimsAndLoD(*in_var, out_var); VariableVisitor::ShareDimsAndLoD(*in_var, out_var);
VariableVisitor::GetMutableTensor(out_var).mutable_data(t_out_p, VariableVisitor::GetMutableTensor(out_var).mutable_data(t_out_p,
in_tensor.type()); in_tensor.dtype());
} }
} }
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <string> #include <string>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
namespace paddle { namespace paddle {
...@@ -49,7 +50,7 @@ static void CheckTensorAttrs(const LoDTensor *tensor, ...@@ -49,7 +50,7 @@ static void CheckTensorAttrs(const LoDTensor *tensor,
if (tensor->numel() && tensor->IsInitialized()) { if (tensor->numel() && tensor->IsInitialized()) {
// step1: check type // step1: check type
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
type, tensor->type(), type, framework::TransToProtoVarType(tensor->dtype()),
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The data type of fetched Tensors or the items of fetched " "The data type of fetched Tensors or the items of fetched "
"LoDTensorArray are different from each other on different " "LoDTensorArray are different from each other on different "
...@@ -57,7 +58,7 @@ static void CheckTensorAttrs(const LoDTensor *tensor, ...@@ -57,7 +58,7 @@ static void CheckTensorAttrs(const LoDTensor *tensor,
"(th) fetched variable. Please set the " "(th) fetched variable. Please set the "
"parameter `return_merged = False` when you " "parameter `return_merged = False` when you "
"call the `Executor.run()` method.", "call the `Executor.run()` method.",
DataTypeToString(type), DataTypeToString(tensor->type()), offset)); DataTypeToString(type), tensor->dtype(), offset));
// step2: check layout // step2: check layout
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -139,7 +140,7 @@ void FetchAsyncOpHandle::FetchMergedLodTensor( ...@@ -139,7 +140,7 @@ void FetchAsyncOpHandle::FetchMergedLodTensor(
for (auto *t : src_lodtensors) { for (auto *t : src_lodtensors) {
if (t->numel() && t->IsInitialized()) { if (t->numel() && t->IsInitialized()) {
check_dim = t->dims(); check_dim = t->dims();
new_type = t->type(); new_type = paddle::framework::TransToProtoVarType(t->dtype());
new_layout = t->layout(); new_layout = t->layout();
break; break;
} }
...@@ -169,10 +170,10 @@ void FetchAsyncOpHandle::FetchMergedLodTensor( ...@@ -169,10 +170,10 @@ void FetchAsyncOpHandle::FetchMergedLodTensor(
dst_lodtensor->set_lod(src_lodtensors[0]->lod()); dst_lodtensor->set_lod(src_lodtensors[0]->lod());
if (platform::is_gpu_place(src_lodtensors[0]->place())) { if (platform::is_gpu_place(src_lodtensors[0]->place())) {
dst_lodtensor->mutable_data(platform::CUDAPinnedPlace(), dst_lodtensor->mutable_data(platform::CUDAPinnedPlace(),
src_lodtensors[0]->type()); src_lodtensors[0]->dtype());
} else { } else {
dst_lodtensor->mutable_data(platform::CPUPlace(), dst_lodtensor->mutable_data(platform::CPUPlace(),
src_lodtensors[0]->type()); src_lodtensors[0]->dtype());
} }
// slice and memcpy // slice and memcpy
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/fused_all_reduce_op_handle.h" #include "paddle/fluid/framework/details/fused_all_reduce_op_handle.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/platform/device_memory_aligment.h" #include "paddle/fluid/platform/device_memory_aligment.h"
...@@ -337,7 +338,8 @@ void FusedAllReduceOpHandle::GetDTypeAndNumel( ...@@ -337,7 +338,8 @@ void FusedAllReduceOpHandle::GetDTypeAndNumel(
size_t size_of_dtype = 0; size_t size_of_dtype = 0;
for (size_t i = 0; i < grad_tensor.size(); ++i) { for (size_t i = 0; i < grad_tensor.size(); ++i) {
// Get dtype // Get dtype
auto ele_dtype = grad_tensor.at(i).second->type(); auto ele_dtype =
framework::TransToProtoVarType(grad_tensor.at(i).second->dtype());
if (i == 0) { if (i == 0) {
*dtype = ele_dtype; *dtype = ele_dtype;
size_of_dtype = framework::SizeOfType(ele_dtype); size_of_dtype = framework::SizeOfType(ele_dtype);
......
...@@ -115,7 +115,7 @@ void GatherOpHandle::RunImpl() { ...@@ -115,7 +115,7 @@ void GatherOpHandle::RunImpl() {
DDim out_dim = pre_in_value.GetCompleteDims(); DDim out_dim = pre_in_value.GetCompleteDims();
out_dim[0] = static_cast<int64_t>(rows); out_dim[0] = static_cast<int64_t>(rows);
out_value->mutable_value()->Resize(out_dim).mutable_data( out_value->mutable_value()->Resize(out_dim).mutable_data(
t_out_p, pre_in_value.value().type()); t_out_p, pre_in_value.value().dtype());
Tensor *out_tensor = out_value->mutable_value(); Tensor *out_tensor = out_value->mutable_value();
// copy // copy
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#ifdef PADDLE_WITH_ASCEND_CL #ifdef PADDLE_WITH_ASCEND_CL
#include "paddle/fluid/platform/device/npu/npu_op_runner.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#endif #endif
#include "paddle/fluid/framework/convert_utils.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -307,7 +308,7 @@ void tensor_check<platform::CPUDeviceContext>(const std::string& op_type, ...@@ -307,7 +308,7 @@ void tensor_check<platform::CPUDeviceContext>(const std::string& op_type,
const platform::Place& place) { const platform::Place& place) {
TensorCheckerVisitor<platform::CPUDeviceContext> vistor(op_type, var_name, TensorCheckerVisitor<platform::CPUDeviceContext> vistor(op_type, var_name,
tensor, place); tensor, place);
VisitDataType(tensor.type(), vistor); VisitDataType(framework::TransToProtoVarType(tensor.dtype()), vistor);
} }
void CheckVarHasNanOrInf(const std::string& op_type, void CheckVarHasNanOrInf(const std::string& op_type,
...@@ -348,7 +349,8 @@ void CheckVarHasNanOrInf(const std::string& op_type, ...@@ -348,7 +349,8 @@ void CheckVarHasNanOrInf(const std::string& op_type,
return; return;
} else if (platform::is_xpu_place(tensor->place())) { } else if (platform::is_xpu_place(tensor->place())) {
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
if (tensor->type() != proto::VarType::FP32) { if (framework::TransToProtoVarType(tensor->dtype()) !=
proto::VarType::FP32) {
return; return;
} }
...@@ -377,14 +379,15 @@ void CheckVarHasNanOrInf(const std::string& op_type, ...@@ -377,14 +379,15 @@ void CheckVarHasNanOrInf(const std::string& op_type,
return; return;
} else if (platform::is_npu_place(tensor->place())) { } else if (platform::is_npu_place(tensor->place())) {
#ifdef PADDLE_WITH_ASCEND_CL #ifdef PADDLE_WITH_ASCEND_CL
if (tensor->type() != proto::VarType::FP32) { if (framework::TransToProtoVarType(tensor->dtype()) !=
proto::VarType::FP32) {
return; return;
} }
framework::LoDTensor cpu_tensor; framework::LoDTensor cpu_tensor;
cpu_tensor.Resize(tensor->dims()); cpu_tensor.Resize(tensor->dims());
float* cpu_data = static_cast<float*>( float* cpu_data = static_cast<float*>(
cpu_tensor.mutable_data(platform::CPUPlace(), tensor->type())); cpu_tensor.mutable_data(platform::CPUPlace(), tensor->dtype()));
framework::TensorCopySync(*tensor, platform::CPUPlace(), &cpu_tensor); framework::TensorCopySync(*tensor, platform::CPUPlace(), &cpu_tensor);
bool flag = false; bool flag = false;
...@@ -475,8 +478,10 @@ void PrintNpuVarInfo(const std::string& op_type, const std::string& var_name, ...@@ -475,8 +478,10 @@ void PrintNpuVarInfo(const std::string& op_type, const std::string& var_name,
return; return;
} }
if ((tensor->type() != proto::VarType::FP32) && if ((framework::TransToProtoVarType(tensor->dtype()) !=
(tensor->type() != proto::VarType::FP16)) { proto::VarType::FP32) &&
(framework::TransToProtoVarType(tensor->dtype()) !=
proto::VarType::FP16)) {
return; return;
} }
...@@ -490,16 +495,17 @@ void PrintNpuVarInfo(const std::string& op_type, const std::string& var_name, ...@@ -490,16 +495,17 @@ void PrintNpuVarInfo(const std::string& op_type, const std::string& var_name,
framework::Tensor cpu_tensor; framework::Tensor cpu_tensor;
cpu_tensor.Resize(tensor->dims()); cpu_tensor.Resize(tensor->dims());
cpu_tensor.mutable_data(platform::CPUPlace(), tensor->type()); cpu_tensor.mutable_data(platform::CPUPlace(), tensor->dtype());
framework::TensorCopySync(*tensor, platform::CPUPlace(), &cpu_tensor); framework::TensorCopySync(*tensor, platform::CPUPlace(), &cpu_tensor);
LOG(WARNING) << "print [" << var_name << "] tensor info:"; LOG(WARNING) << "print [" << var_name << "] tensor info:";
// use env strategy control in future, -1=print_all. // use env strategy control in future, -1=print_all.
int print_num = 3; int print_num = 3;
if (tensor->type() == proto::VarType::FP32) { if (framework::TransToProtoVarType(tensor->dtype()) == proto::VarType::FP32) {
const float* value = cpu_tensor.data<float>(); const float* value = cpu_tensor.data<float>();
PrintNanInf(value, tensor->numel(), print_num, op_type, var_name, false); PrintNanInf(value, tensor->numel(), print_num, op_type, var_name, false);
} else if (tensor->type() == proto::VarType::FP16) { } else if (framework::TransToProtoVarType(tensor->dtype()) ==
proto::VarType::FP16) {
const paddle::platform::float16* value = const paddle::platform::float16* value =
cpu_tensor.data<paddle::platform::float16>(); cpu_tensor.data<paddle::platform::float16>();
PrintNanInf(value, tensor->numel(), print_num, op_type, var_name, false); PrintNanInf(value, tensor->numel(), print_num, op_type, var_name, false);
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <unordered_map> #include <unordered_map>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/convert_utils.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -208,7 +209,7 @@ void tensor_check<platform::CUDADeviceContext>(const std::string& op_type, ...@@ -208,7 +209,7 @@ void tensor_check<platform::CUDADeviceContext>(const std::string& op_type,
TensorCheckerVisitor<platform::CUDADeviceContext> vistor(op_type, var_name, TensorCheckerVisitor<platform::CUDADeviceContext> vistor(op_type, var_name,
tensor, place); tensor, place);
VisitDataType(tensor.type(), vistor); VisitDataType(framework::TransToProtoVarType(tensor.dtype()), vistor);
} }
} // namespace details } // namespace details
......
...@@ -130,7 +130,8 @@ struct GatherLocalSelectedRowsFunctor { ...@@ -130,7 +130,8 @@ struct GatherLocalSelectedRowsFunctor {
DDim out_dim = pre_in->GetCompleteDims(); DDim out_dim = pre_in->GetCompleteDims();
out_dim[0] = static_cast<int64_t>(rows); out_dim[0] = static_cast<int64_t>(rows);
dst_tensor.mutable_value()->Resize(out_dim); dst_tensor.mutable_value()->Resize(out_dim);
dst_tensor.mutable_value()->mutable_data(out_place, pre_in->value().type()); dst_tensor.mutable_value()->mutable_data(out_place,
pre_in->value().dtype());
} }
void operator()() { void operator()() {
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "paddle/fluid/framework/details/reduce_op_handle.h" #include "paddle/fluid/framework/details/reduce_op_handle.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h" #include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/framework/details/variable_visitor.h"
...@@ -150,7 +151,8 @@ void ReduceOpHandle::RunImpl() { ...@@ -150,7 +151,8 @@ void ReduceOpHandle::RunImpl() {
if (!FLAGS_cpu_deterministic) { if (!FLAGS_cpu_deterministic) {
ReduceLoDTensor func(lod_tensors, ReduceLoDTensor func(lod_tensors,
out_var->GetMutable<framework::LoDTensor>()); out_var->GetMutable<framework::LoDTensor>());
VisitDataType(lod_tensors[0]->type(), func); VisitDataType(framework::TransToProtoVarType(lod_tensors[0]->dtype()),
func);
} else { } else {
// We sum lod_tensors to reduce_sum_trg which is in local_scopes_0 // We sum lod_tensors to reduce_sum_trg which is in local_scopes_0
// here, but it doesn't mean reduce_sum_trg must be in local_scopes_0. // here, but it doesn't mean reduce_sum_trg must be in local_scopes_0.
...@@ -158,7 +160,8 @@ void ReduceOpHandle::RunImpl() { ...@@ -158,7 +160,8 @@ void ReduceOpHandle::RunImpl() {
->FindVar(out_var_handle->name()) ->FindVar(out_var_handle->name())
->GetMutable<framework::LoDTensor>(); ->GetMutable<framework::LoDTensor>();
ReduceLoDTensor func(lod_tensors, &reduce_sum_trg); ReduceLoDTensor func(lod_tensors, &reduce_sum_trg);
VisitDataType(lod_tensors[0]->type(), func); VisitDataType(framework::TransToProtoVarType(lod_tensors[0]->dtype()),
func);
auto trg = out_var->GetMutable<framework::LoDTensor>(); auto trg = out_var->GetMutable<framework::LoDTensor>();
if (reduce_sum_trg.data() != trg->data()) { if (reduce_sum_trg.data() != trg->data()) {
...@@ -171,7 +174,7 @@ void ReduceOpHandle::RunImpl() { ...@@ -171,7 +174,7 @@ void ReduceOpHandle::RunImpl() {
auto pre_in = pre_in_var->Get<framework::LoDTensor>(); auto pre_in = pre_in_var->Get<framework::LoDTensor>();
VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var); VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var);
VariableVisitor::GetMutableTensor(out_var).mutable_data( VariableVisitor::GetMutableTensor(out_var).mutable_data(
out_var_handle->place(), pre_in.type()); out_var_handle->place(), pre_in.dtype());
auto out_p = out_var_handle->place(); auto out_p = out_var_handle->place();
int root_id = out_p.device; int root_id = out_p.device;
...@@ -191,7 +194,8 @@ void ReduceOpHandle::RunImpl() { ...@@ -191,7 +194,8 @@ void ReduceOpHandle::RunImpl() {
out_var_handle->place()); out_var_handle->place());
} }
int type = platform::ToNCCLDataType(lod_tensor.type()); int type = platform::ToNCCLDataType(
framework::TransToProtoVarType(lod_tensor.dtype()));
size_t numel = static_cast<size_t>(lod_tensor.numel()); size_t numel = static_cast<size_t>(lod_tensor.numel());
all_reduce_calls.emplace_back( all_reduce_calls.emplace_back(
[buffer, recvbuffer, type, numel, root_id, &nccl_ctx] { [buffer, recvbuffer, type, numel, root_id, &nccl_ctx] {
...@@ -217,7 +221,7 @@ void ReduceOpHandle::RunImpl() { ...@@ -217,7 +221,7 @@ void ReduceOpHandle::RunImpl() {
auto pre_in = pre_in_var->Get<framework::LoDTensor>(); auto pre_in = pre_in_var->Get<framework::LoDTensor>();
VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var); VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var);
VariableVisitor::GetMutableTensor(out_var).mutable_data( VariableVisitor::GetMutableTensor(out_var).mutable_data(
out_var_handle->place(), pre_in.type()); out_var_handle->place(), pre_in.dtype());
auto out_p = out_var_handle->place(); auto out_p = out_var_handle->place();
int root_id = out_p.device; int root_id = out_p.device;
...@@ -237,7 +241,8 @@ void ReduceOpHandle::RunImpl() { ...@@ -237,7 +241,8 @@ void ReduceOpHandle::RunImpl() {
out_var_handle->place()); out_var_handle->place());
} }
int type = platform::ToBKCLDataType(lod_tensor.type()); int type = platform::ToBKCLDataType(
framework::TransToProtoVarType(lod_tensor.dtype()));
size_t numel = static_cast<size_t>(lod_tensor.numel()); size_t numel = static_cast<size_t>(lod_tensor.numel());
all_reduce_calls.emplace_back([buffer, recvbuffer, type, numel, root_id, all_reduce_calls.emplace_back([buffer, recvbuffer, type, numel, root_id,
&bkcl_ctx] { &bkcl_ctx] {
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <utility> #include <utility>
#include "dgc/dgc.h" #include "dgc/dgc.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h" #include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/framework/details/variable_visitor.h"
...@@ -151,7 +152,9 @@ void SparseAllReduceOpHandle::RunImplEncoded() { ...@@ -151,7 +152,9 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
auto &out = *outs[i]; auto &out = *outs[i];
float *out_tensor_buf = out.data<float>(); float *out_tensor_buf = out.data<float>();
dtype = (dtype == -1) ? platform::ToNCCLDataType(in.type()) : dtype; dtype = (dtype == -1) ? platform::ToNCCLDataType(
framework::TransToProtoVarType(in.dtype()))
: dtype;
in_numel = (in_numel == 0) ? static_cast<size_t>(in.numel()) : in_numel; in_numel = (in_numel == 0) ? static_cast<size_t>(in.numel()) : in_numel;
PADDLE_ENFORCE_EQ(in_numel % 2, 0, PADDLE_ENFORCE_EQ(in_numel % 2, 0,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/selected_rows_utils.h" #include "paddle/fluid/framework/selected_rows_utils.h"
namespace pten { namespace pten {
...@@ -115,7 +116,7 @@ struct EnforceShapeAndDTypeEQVisitor { ...@@ -115,7 +116,7 @@ struct EnforceShapeAndDTypeEQVisitor {
"The place type of the two variables is not equal. The src place " "The place type of the two variables is not equal. The src place "
"is %s, but the dst place is %s", "is %s, but the dst place is %s",
src.place().DebugString(), tensor.place().DebugString())); src.place().DebugString(), tensor.place().DebugString()));
PADDLE_ENFORCE_EQ(src.type(), tensor.type(), PADDLE_ENFORCE_EQ(src.dtype(), tensor.dtype(),
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
"The dtype of the two variables is not equal.")); "The dtype of the two variables is not equal."));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
......
...@@ -14,6 +14,8 @@ limitations under the License. */ ...@@ -14,6 +14,8 @@ limitations under the License. */
#include "paddle/fluid/framework/device_worker.h" #include "paddle/fluid/framework/device_worker.h"
#include "paddle/fluid/framework/convert_utils.h"
namespace pten { namespace pten {
class DenseTensor; class DenseTensor;
} // namespace pten } // namespace pten
...@@ -58,11 +60,13 @@ std::string PrintLodTensorIntType(Tensor* tensor, int64_t start, int64_t end) { ...@@ -58,11 +60,13 @@ std::string PrintLodTensorIntType(Tensor* tensor, int64_t start, int64_t end) {
std::string PrintLodTensor(Tensor* tensor, int64_t start, int64_t end) { std::string PrintLodTensor(Tensor* tensor, int64_t start, int64_t end) {
std::string out_val; std::string out_val;
if (tensor->type() == proto::VarType::FP32) { if (framework::TransToProtoVarType(tensor->dtype()) == proto::VarType::FP32) {
out_val = PrintLodTensorType<float>(tensor, start, end); out_val = PrintLodTensorType<float>(tensor, start, end);
} else if (tensor->type() == proto::VarType::INT64) { } else if (framework::TransToProtoVarType(tensor->dtype()) ==
proto::VarType::INT64) {
out_val = PrintLodTensorIntType(tensor, start, end); out_val = PrintLodTensorIntType(tensor, start, end);
} else if (tensor->type() == proto::VarType::FP64) { } else if (framework::TransToProtoVarType(tensor->dtype()) ==
proto::VarType::FP64) {
out_val = PrintLodTensorType<double>(tensor, start, end); out_val = PrintLodTensorType<double>(tensor, start, end);
} else { } else {
out_val = "unsupported type"; out_val = "unsupported type";
......
...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/device_worker_factory.h" #include "paddle/fluid/framework/device_worker_factory.h"
#include "paddle/fluid/framework/trainer.h" #include "paddle/fluid/framework/trainer.h"
...@@ -153,12 +154,13 @@ void DistMultiTrainer::Finalize() { ...@@ -153,12 +154,13 @@ void DistMultiTrainer::Finalize() {
} }
#define MergeCallback(cpp_type, proto_type) \ #define MergeCallback(cpp_type, proto_type) \
do { \ do { \
if (root_tensor->type() == proto_type) { \ if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
if (thread_tensor->type() != proto_type) { \ if (framework::TransToProtoVarType(thread_tensor->dtype()) != \
proto_type) { \
VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \ VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \
<< "] " << need_merge_var_names_[i] \ << "] " << need_merge_var_names_[i] \
<< ", root tensor type=" << root_tensor->type() \ << ", root tensor type=" << root_tensor->dtype() \
<< ", thread tensor type=" << thread_tensor->type(); \ << ", thread tensor type=" << thread_tensor->dtype(); \
exit(-1); \ exit(-1); \
} \ } \
MergeToRootScope<cpp_type>(root_tensor, thread_tensor); \ MergeToRootScope<cpp_type>(root_tensor, thread_tensor); \
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/dlpack_tensor.h" #include "paddle/fluid/framework/dlpack_tensor.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
namespace paddle { namespace paddle {
...@@ -134,7 +135,8 @@ DLPackTensor::DLPackTensor(const Tensor &tensor, LaneType lanes) { ...@@ -134,7 +135,8 @@ DLPackTensor::DLPackTensor(const Tensor &tensor, LaneType lanes) {
t_.device = paddle::platform::VisitPlace(place, internal::DLDeviceVisitor()); t_.device = paddle::platform::VisitPlace(place, internal::DLDeviceVisitor());
// init dtype // init dtype
t_.dtype = internal::GetDLDataTypeFromTypeIndex(tensor.type()); t_.dtype = internal::GetDLDataTypeFromTypeIndex(
framework::TransToProtoVarType(tensor.dtype()));
t_.dtype.lanes = lanes; t_.dtype.lanes = lanes;
// init ndim, tensor rank // init ndim, tensor rank
......
...@@ -20,6 +20,7 @@ limitations under the License. */ ...@@ -20,6 +20,7 @@ limitations under the License. */
#include "google/protobuf/text_format.h" #include "google/protobuf/text_format.h"
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/lod_rank_table.h"
...@@ -235,16 +236,16 @@ void print_lod_tensor(std::string var_name, const LoDTensor& lod_tensor) { ...@@ -235,16 +236,16 @@ void print_lod_tensor(std::string var_name, const LoDTensor& lod_tensor) {
static void print_fetch_var(Scope* scope, const std::string& var_name) { static void print_fetch_var(Scope* scope, const std::string& var_name) {
auto& tensor = scope->FindVar(var_name)->Get<LoDTensor>(); auto& tensor = scope->FindVar(var_name)->Get<LoDTensor>();
#define PrintLoDTensorCallback(cpp_type, proto_type) \ #define PrintLoDTensorCallback(cpp_type, proto_type) \
do { \ do { \
if (tensor.type() == proto_type) { \ if (framework::TransToProtoVarType(tensor.dtype()) == proto_type) { \
print_lod_tensor<cpp_type>(var_name, tensor); \ print_lod_tensor<cpp_type>(var_name, tensor); \
return; \ return; \
} \ } \
} while (0) } while (0)
_ForEachDataType_(PrintLoDTensorCallback); _ForEachDataType_(PrintLoDTensorCallback);
VLOG(1) << "print_fetch_var: unrecognized data type:" << tensor.type(); VLOG(1) << "print_fetch_var: unrecognized data type:" << tensor.dtype();
} }
void ExecutorThreadWorker::TrainFilesWithTimer() { void ExecutorThreadWorker::TrainFilesWithTimer() {
......
...@@ -22,6 +22,7 @@ limitations under the License. */ ...@@ -22,6 +22,7 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h"
...@@ -146,13 +147,16 @@ class AscendInstance { ...@@ -146,13 +147,16 @@ class AscendInstance {
// } // }
ge::Shape shape(vec_dim); ge::Shape shape(vec_dim);
ge::TensorDesc tensor_desc(shape, ge::Format::FORMAT_ND, ge::TensorDesc tensor_desc(
VarTypeToGeType(tensor->type())); shape, ge::Format::FORMAT_ND,
VarTypeToGeType(framework::TransToProtoVarType(tensor->dtype())));
tensor_desc.SetRealDimCnt(vec_dim.size()); tensor_desc.SetRealDimCnt(vec_dim.size());
const uint8_t *data = reinterpret_cast<const uint8_t *>(tensor->data()); const uint8_t *data = reinterpret_cast<const uint8_t *>(tensor->data());
std::vector<uint8_t> dst(numel * GeTypeSize(tensor->type())); std::vector<uint8_t> dst(
memcpy(dst.data(), data, GeTypeSize(tensor->type()) * numel); numel * GeTypeSize(framework::TransToProtoVarType(tensor->dtype())));
memcpy(dst.data(), data,
GeTypeSize(framework::TransToProtoVarType(tensor->dtype())) * numel);
ge::Tensor ge_tensor(tensor_desc, dst); ge::Tensor ge_tensor(tensor_desc, dst);
return ge_tensor; return ge_tensor;
} }
......
...@@ -28,6 +28,7 @@ limitations under the License. */ ...@@ -28,6 +28,7 @@ limitations under the License. */
#include "paddle/fluid/framework/fleet/heter_wrapper.h" #include "paddle/fluid/framework/fleet/heter_wrapper.h"
#if defined(PADDLE_WITH_PSLIB) && !defined(PADDLE_WITH_HETERPS) #if defined(PADDLE_WITH_PSLIB) && !defined(PADDLE_WITH_HETERPS)
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/device_worker.h" #include "paddle/fluid/framework/device_worker.h"
namespace paddle { namespace paddle {
...@@ -90,7 +91,8 @@ void HeterWrapper::SerializeToReq(const std::string& varname, Scope* scope, ...@@ -90,7 +91,8 @@ void HeterWrapper::SerializeToReq(const std::string& varname, Scope* scope,
LoDTensor* tensor = var->GetMutable<LoDTensor>(); LoDTensor* tensor = var->GetMutable<LoDTensor>();
req_var->set_varname(varname); req_var->set_varname(varname);
req_var->set_type(LOD_TENSOR); req_var->set_type(LOD_TENSOR);
req_var->set_data_type(static_cast<VariableMessage::Type>(tensor->type())); req_var->set_data_type(static_cast<VariableMessage::Type>(
framework::TransToProtoVarType(tensor->dtype())));
for (auto& dim : framework::vectorize(tensor->dims())) { for (auto& dim : framework::vectorize(tensor->dims())) {
req_var->add_dims(dim); req_var->add_dims(dim);
...@@ -108,21 +110,27 @@ void HeterWrapper::SerializeToReq(const std::string& varname, Scope* scope, ...@@ -108,21 +110,27 @@ void HeterWrapper::SerializeToReq(const std::string& varname, Scope* scope,
auto* req_data = req_var->mutable_data(); auto* req_data = req_var->mutable_data();
req_data->clear(); req_data->clear();
req_data->resize(tensor->numel() * SizeOfType(tensor->type())); req_data->resize(tensor->numel() *
SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
char* data_ptr = const_cast<char*>(req_data->data()); char* data_ptr = const_cast<char*>(req_data->data());
if (platform::is_cpu_place(tensor->place())) { if (platform::is_cpu_place(tensor->place())) {
memcpy(data_ptr, tensor->data(), memcpy(data_ptr, tensor->data(),
tensor->numel() * SizeOfType(tensor->type())); tensor->numel() *
SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
} else { } else {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
memory::Copy(platform::CPUPlace(), data_ptr, tensor->place(), memory::Copy(
tensor->data(), tensor->numel() * SizeOfType(tensor->type()), platform::CPUPlace(), data_ptr, tensor->place(), tensor->data(),
nullptr); tensor->numel() *
SizeOfType(framework::TransToProtoVarType(tensor->dtype())),
nullptr);
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
memory::Copy(platform::CPUPlace(), data_ptr, tensor->place(), memory::Copy(
tensor->data(), tensor->numel() * SizeOfType(tensor->type())); platform::CPUPlace(), data_ptr, tensor->place(), tensor->data(),
tensor->numel() *
SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
#endif #endif
} }
} }
...@@ -152,15 +160,18 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope, ...@@ -152,15 +160,18 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope,
} }
tensor->set_lod(lod); tensor->set_lod(lod);
void* tensor_data = void* tensor_data = tensor->mutable_data(
tensor->mutable_data(place, ToVarType(req_var.data_type())); place, framework::TransToPtenDataType(ToVarType(req_var.data_type())));
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
memory::Copy(place, tensor_data, platform::CPUPlace(), req_var.data().data(), memory::Copy(place, tensor_data, platform::CPUPlace(), req_var.data().data(),
tensor->numel() * SizeOfType(tensor->type()), stream); tensor->numel() *
SizeOfType(framework::TransToProtoVarType(tensor->dtype())),
stream);
#else #else
memcpy(tensor_data, req_var.data().data(), memcpy(tensor_data, req_var.data().data(),
tensor->numel() * SizeOfType(tensor->type())); tensor->numel() *
SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
#endif #endif
} }
#endif #endif
...@@ -190,15 +201,17 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope, ...@@ -190,15 +201,17 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope,
} }
tensor->set_lod(lod); tensor->set_lod(lod);
void* tensor_data = void* tensor_data = tensor->mutable_data(
tensor->mutable_data(place, ToVarType(req_var.data_type())); place, framework::TransToPtenDataType(ToVarType(req_var.data_type())));
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
memory::Copy(place, tensor_data, platform::CPUPlace(), req_var.data().data(), memory::Copy(place, tensor_data, platform::CPUPlace(), req_var.data().data(),
tensor->numel() * SizeOfType(tensor->type())); tensor->numel() *
SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
#else #else
memcpy(tensor_data, req_var.data().data(), memcpy(tensor_data, req_var.data().data(),
tensor->numel() * SizeOfType(tensor->type())); tensor->numel() *
SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
#endif #endif
} }
......
...@@ -11,7 +11,9 @@ limitations under the License. */ ...@@ -11,7 +11,9 @@ limitations under the License. */
#if defined(PADDLE_WITH_PSCORE) #if defined(PADDLE_WITH_PSCORE)
#include <float.h> #include <float.h>
#include "paddle/fluid/distributed/ps/service/heter_server.h" #include "paddle/fluid/distributed/ps/service/heter_server.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/device_worker.h" #include "paddle/fluid/framework/device_worker.h"
#include "paddle/fluid/framework/executor_gc_helper.h" #include "paddle/fluid/framework/executor_gc_helper.h"
#include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/cpu_helper.h"
...@@ -35,21 +37,23 @@ void SetMicroId(paddle::framework::Scope* scope, ...@@ -35,21 +37,23 @@ void SetMicroId(paddle::framework::Scope* scope,
auto* tensor = var->GetMutable<framework::LoDTensor>(); auto* tensor = var->GetMutable<framework::LoDTensor>();
std::vector<int> dims{1}; std::vector<int> dims{1};
tensor->Resize(framework::make_ddim(dims)); tensor->Resize(framework::make_ddim(dims));
void* tensor_data = void* tensor_data = tensor->mutable_data(
tensor->mutable_data(place, framework::proto::VarType::FP32); place, framework::TransToPtenDataType(framework::proto::VarType::FP32));
if (platform::is_gpu_place(place)) { if (platform::is_gpu_place(place)) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
std::vector<char> temp; std::vector<char> temp;
temp.resize(tensor->numel() * framework::SizeOfType(tensor->type())); temp.resize(tensor->numel() * framework::DataTypeSize(tensor->dtype()));
char* temp_ptr = temp.data(); char* temp_ptr = temp.data();
float* temp_ptr_float = reinterpret_cast<float*>(temp_ptr); float* temp_ptr_float = reinterpret_cast<float*>(temp_ptr);
temp_ptr_float[0] = micro_id; temp_ptr_float[0] = micro_id;
auto stream = auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(*dev_ctx).stream(); reinterpret_cast<const platform::CUDADeviceContext&>(*dev_ctx).stream();
memory::Copy(place, tensor_data, platform::CPUPlace(), memory::Copy(
reinterpret_cast<void*>(temp_ptr), place, tensor_data, platform::CPUPlace(),
tensor->numel() * framework::SizeOfType(tensor->type()), reinterpret_cast<void*>(temp_ptr),
stream); tensor->numel() * framework::SizeOfType(
framework::TransToProtoVarType(tensor->dtype())),
stream);
#endif #endif
} else { } else {
float* temp_ptr = reinterpret_cast<float*>(tensor_data); float* temp_ptr = reinterpret_cast<float*>(tensor_data);
......
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "io/fs.h" #include "io/fs.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_feed_factory.h" #include "paddle/fluid/framework/data_feed_factory.h"
#include "paddle/fluid/framework/data_set.h" #include "paddle/fluid/framework/data_set.h"
#include "paddle/fluid/framework/device_worker_factory.h" #include "paddle/fluid/framework/device_worker_factory.h"
...@@ -136,18 +137,18 @@ void HeterXpuTrainer::CreateThreadParam(const ProgramDesc& program, int num) { ...@@ -136,18 +137,18 @@ void HeterXpuTrainer::CreateThreadParam(const ProgramDesc& program, int num) {
InitializeVariable(ptr, proto::VarType::LOD_TENSOR); InitializeVariable(ptr, proto::VarType::LOD_TENSOR);
LoDTensor* thread_tensor = ptr->GetMutable<LoDTensor>(); LoDTensor* thread_tensor = ptr->GetMutable<LoDTensor>();
#define HeterMemcpyFunc(cpp_type, proto_type) \ #define HeterMemcpyFunc(cpp_type, proto_type) \
do { \ do { \
if (root_tensor->type() == proto_type) { \ if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
HeterMemCpy<cpp_type>(thread_tensor, root_tensor, place, stream); \ HeterMemCpy<cpp_type>(thread_tensor, root_tensor, place, stream); \
} \ } \
} while (0) } while (0)
#define HeterMemcpyXpuFunc(cpp_type, proto_type) \ #define HeterMemcpyXpuFunc(cpp_type, proto_type) \
do { \ do { \
if (root_tensor->type() == proto_type) { \ if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
HeterMemCpy<cpp_type>(thread_tensor, root_tensor, place); \ HeterMemCpy<cpp_type>(thread_tensor, root_tensor, place); \
} \ } \
} while (0) } while (0)
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
_ForEachDataType_(HeterMemcpyFunc); _ForEachDataType_(HeterMemcpyFunc);
...@@ -318,12 +319,13 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request, ...@@ -318,12 +319,13 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
// } // }
#define MergeCallback(cpp_type, proto_type) \ #define MergeCallback(cpp_type, proto_type) \
do { \ do { \
if (root_tensor->type() == proto_type) { \ if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
if (thread_tensor->type() != proto_type) { \ if (framework::TransToProtoVarType(thread_tensor->dtype()) != \
proto_type) { \
VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \ VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \
<< "] " << need_merge_var_names_[i] \ << "] " << need_merge_var_names_[i] \
<< ", root tensor type=" << root_tensor->type() \ << ", root tensor type=" << root_tensor->dtype() \
<< ", thread tensor type=" << thread_tensor->type(); \ << ", thread tensor type=" << thread_tensor->dtype(); \
exit(-1); \ exit(-1); \
} \ } \
MergeToRootScope<cpp_type>(root_tensor, thread_tensor); \ MergeToRootScope<cpp_type>(root_tensor, thread_tensor); \
...@@ -334,8 +336,10 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request, ...@@ -334,8 +336,10 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
auto dev_id = thread_tensor->place().device; auto dev_id = thread_tensor->place().device;
platform::CUDADeviceGuard guard(dev_id); platform::CUDADeviceGuard guard(dev_id);
cudaMemset(thread_tensor->data(), 0, cudaMemset(
thread_tensor->numel() * SizeOfType(thread_tensor->type())); thread_tensor->data(), 0,
thread_tensor->numel() * SizeOfType(framework::TransToProtoVarType(
thread_tensor->dtype())));
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
auto place = thread_tensor->place(); auto place = thread_tensor->place();
...@@ -346,12 +350,16 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request, ...@@ -346,12 +350,16 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
platform::DeviceContext* dev_ctx = pool.Get(place); platform::DeviceContext* dev_ctx = pool.Get(place);
const platform::XPUDeviceContext* xpu_ctx = const platform::XPUDeviceContext* xpu_ctx =
reinterpret_cast<const platform::XPUDeviceContext*>(dev_ctx); reinterpret_cast<const platform::XPUDeviceContext*>(dev_ctx);
xpu::memset(xpu_ctx->x_context(), thread_tensor->data(), 0, xpu::memset(
thread_tensor->numel() * SizeOfType(thread_tensor->type())); xpu_ctx->x_context(), thread_tensor->data(), 0,
thread_tensor->numel() * SizeOfType(framework::TransToProtoVarType(
thread_tensor->dtype())));
#endif #endif
} else { } else {
memset(thread_tensor->data(), 0, memset(thread_tensor->data(), 0,
thread_tensor->numel() * SizeOfType(thread_tensor->type())); thread_tensor->numel() *
SizeOfType(
framework::TransToProtoVarType(thread_tensor->dtype())));
} }
} }
auto* merge_var = response->add_vars(); auto* merge_var = response->add_vars();
...@@ -361,8 +369,10 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request, ...@@ -361,8 +369,10 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
auto dev_id = root_tensor->place().device; auto dev_id = root_tensor->place().device;
platform::CUDADeviceGuard guard(dev_id); platform::CUDADeviceGuard guard(dev_id);
cudaMemset(root_tensor->data(), 0, cudaMemset(
root_tensor->numel() * SizeOfType(root_tensor->type())); root_tensor->data(), 0,
root_tensor->numel() *
SizeOfType(framework::TransToProtoVarType(root_tensor->dtype())));
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
auto place = root_tensor->place(); auto place = root_tensor->place();
...@@ -373,12 +383,15 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request, ...@@ -373,12 +383,15 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
platform::DeviceContext* dev_ctx = pool.Get(place); platform::DeviceContext* dev_ctx = pool.Get(place);
const platform::XPUDeviceContext* xpu_ctx = const platform::XPUDeviceContext* xpu_ctx =
reinterpret_cast<const platform::XPUDeviceContext*>(dev_ctx); reinterpret_cast<const platform::XPUDeviceContext*>(dev_ctx);
xpu::memset(xpu_ctx->x_context(), root_tensor->data(), 0, xpu::memset(
root_tensor->numel() * SizeOfType(root_tensor->type())); xpu_ctx->x_context(), root_tensor->data(), 0,
root_tensor->numel() *
SizeOfType(framework::TransToProtoVarType(root_tensor->dtype())));
#endif #endif
} else { } else {
memset(root_tensor->data(), 0, memset(root_tensor->data(), 0,
root_tensor->numel() * SizeOfType(root_tensor->type())); root_tensor->numel() * SizeOfType(framework::TransToProtoVarType(
root_tensor->dtype())));
} }
} }
return 0; return 0;
......
...@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <ctime> #include <ctime>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/device_worker.h" #include "paddle/fluid/framework/device_worker.h"
#include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h" #include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h"
...@@ -79,11 +81,11 @@ void HogwildWorker::CreateThreadScope(const ProgramDesc &program) { ...@@ -79,11 +81,11 @@ void HogwildWorker::CreateThreadScope(const ProgramDesc &program) {
LoDTensor *thread_tensor = ptr1->GetMutable<LoDTensor>(); LoDTensor *thread_tensor = ptr1->GetMutable<LoDTensor>();
LoDTensor *root_tensor = LoDTensor *root_tensor =
root_scope_->FindVar(var->Name())->GetMutable<LoDTensor>(); root_scope_->FindVar(var->Name())->GetMutable<LoDTensor>();
#define MemsetCallback(cpp_type, proto_type) \ #define MemsetCallback(cpp_type, proto_type) \
do { \ do { \
if (root_tensor->type() == proto_type) { \ if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
SetZero<cpp_type>(thread_tensor, root_tensor, tensor_dim); \ SetZero<cpp_type>(thread_tensor, root_tensor, tensor_dim); \
} \ } \
} while (0) } while (0)
_ForEachDataType_(MemsetCallback); _ForEachDataType_(MemsetCallback);
} }
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/pten_utils.h" #include "paddle/fluid/framework/pten_utils.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
...@@ -134,7 +135,7 @@ class CompatMetaTensor : public pten::MetaTensor { ...@@ -134,7 +135,7 @@ class CompatMetaTensor : public pten::MetaTensor {
} }
} else { } else {
auto* var = BOOST_GET_CONST(VarDesc*, var_); auto* var = BOOST_GET_CONST(VarDesc*, var_);
return pten::TransToPtenDataType(var->GetDataType()); return paddle::framework::TransToPtenDataType(var->GetDataType());
} }
} }
...@@ -183,7 +184,7 @@ class CompatMetaTensor : public pten::MetaTensor { ...@@ -183,7 +184,7 @@ class CompatMetaTensor : public pten::MetaTensor {
} }
} else { } else {
auto* var = BOOST_GET(VarDesc*, var_); auto* var = BOOST_GET(VarDesc*, var_);
var->SetDataType(pten::TransToProtoVarType(dtype)); var->SetDataType(paddle::framework::TransToProtoVarType(dtype));
} }
} }
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <cmath> #include <cmath>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
namespace pten { namespace pten {
...@@ -216,7 +217,8 @@ void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const { ...@@ -216,7 +217,8 @@ void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const {
patterns::PDNodeName(name_scope_, "eltwise_y_in")); patterns::PDNodeName(name_scope_, "eltwise_y_in"));
// Set shape && datatype manually // Set shape && datatype manually
eltwise_y_in_desc.SetShape(framework::vectorize(ac_bias_tensor->dims())); eltwise_y_in_desc.SetShape(framework::vectorize(ac_bias_tensor->dims()));
eltwise_y_in_desc.SetDataType(ac_bias_tensor->type()); eltwise_y_in_desc.SetDataType(
framework::TransToProtoVarType(ac_bias_tensor->dtype()));
eltwise_y_in_desc.SetLoDLevel(ac_bias->Var()->GetLoDLevel()); eltwise_y_in_desc.SetLoDLevel(ac_bias->Var()->GetLoDLevel());
eltwise_y_in_desc.SetPersistable(true); eltwise_y_in_desc.SetPersistable(true);
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <string> #include <string>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
...@@ -285,7 +286,8 @@ void ConvBNFusePass::ApplyImpl(ir::Graph* graph) const { ...@@ -285,7 +286,8 @@ void ConvBNFusePass::ApplyImpl(ir::Graph* graph) const {
VarDesc eltwise_y_in_desc( VarDesc eltwise_y_in_desc(
patterns::PDNodeName("fuse_conv_bn", conv_type() + "_eltwise_y_in")); patterns::PDNodeName("fuse_conv_bn", conv_type() + "_eltwise_y_in"));
eltwise_y_in_desc.SetShape(framework::vectorize(bn_bias_tensor->dims())); eltwise_y_in_desc.SetShape(framework::vectorize(bn_bias_tensor->dims()));
eltwise_y_in_desc.SetDataType(bn_bias_tensor->type()); eltwise_y_in_desc.SetDataType(
framework::TransToProtoVarType(bn_bias_tensor->dtype()));
eltwise_y_in_desc.SetLoDLevel(bn_bias->Var()->GetLoDLevel()); eltwise_y_in_desc.SetLoDLevel(bn_bias->Var()->GetLoDLevel());
eltwise_y_in_desc.SetPersistable(true); eltwise_y_in_desc.SetPersistable(true);
auto* eltwise_y_in_node = g->CreateVarNode(&eltwise_y_in_desc); auto* eltwise_y_in_node = g->CreateVarNode(&eltwise_y_in_desc);
...@@ -531,7 +533,8 @@ void ConvEltwiseAddBNFusePass::ApplyImpl(ir::Graph* graph) const { ...@@ -531,7 +533,8 @@ void ConvEltwiseAddBNFusePass::ApplyImpl(ir::Graph* graph) const {
name_scope_, "eltwise_y_in" + std::to_string(found_conv_bn_count))); name_scope_, "eltwise_y_in" + std::to_string(found_conv_bn_count)));
eltwise_y_in_desc.SetShape( eltwise_y_in_desc.SetShape(
framework::vectorize(eltwise_y_in_tensor->dims())); framework::vectorize(eltwise_y_in_tensor->dims()));
eltwise_y_in_desc.SetDataType(eltwise_y_in_tensor->type()); eltwise_y_in_desc.SetDataType(
framework::TransToProtoVarType(eltwise_y_in_tensor->dtype()));
eltwise_y_in_desc.SetLoDLevel(eltwise_y_in->Var()->GetLoDLevel()); eltwise_y_in_desc.SetLoDLevel(eltwise_y_in->Var()->GetLoDLevel());
eltwise_y_in_desc.SetPersistable(true); eltwise_y_in_desc.SetPersistable(true);
auto* eltwise_y_in_node = g->CreateVarNode(&eltwise_y_in_desc); auto* eltwise_y_in_node = g->CreateVarNode(&eltwise_y_in_desc);
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <vector> #include <vector>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/layer_norm_fuse_pass.h" #include "paddle/fluid/framework/ir/layer_norm_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
...@@ -273,10 +274,11 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const { ...@@ -273,10 +274,11 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const {
"but has %s elements.", "but has %s elements.",
eps_tensor->numel())); eps_tensor->numel()));
CHECK_TRUE( CHECK_TRUE(
eps_tensor->type() == proto::VarType::FP32, framework::TransToProtoVarType(eps_tensor->dtype()) ==
proto::VarType::FP32,
::paddle::string::Sprintf("The LayerNorm divisor epsilon value " ::paddle::string::Sprintf("The LayerNorm divisor epsilon value "
"must be of FP32 data type, but is %s.", "must be of FP32 data type, but is %s.",
eps_tensor->type())); eps_tensor->dtype()));
CHECK_TRUE(validateReduceOpAttrs(x_mean, x_shape, "input mean"), CHECK_TRUE(validateReduceOpAttrs(x_mean, x_shape, "input mean"),
"Validation of input mean node failed."); "Validation of input mean node failed.");
...@@ -333,7 +335,8 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const { ...@@ -333,7 +335,8 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const {
auto* gamma_tensor = scope->FindVar(gamma->Name())->GetMutable<LoDTensor>(); auto* gamma_tensor = scope->FindVar(gamma->Name())->GetMutable<LoDTensor>();
VarDesc new_gamma_desc(patterns::PDNodeName("layer_norm_fuse", "Scale")); VarDesc new_gamma_desc(patterns::PDNodeName("layer_norm_fuse", "Scale"));
new_gamma_desc.SetShape({layer_norm_x_mat_dims[1]}); new_gamma_desc.SetShape({layer_norm_x_mat_dims[1]});
new_gamma_desc.SetDataType(gamma_tensor->type()); new_gamma_desc.SetDataType(
framework::TransToProtoVarType(gamma_tensor->dtype()));
new_gamma_desc.SetLoDLevel(gamma->Var()->GetLoDLevel()); new_gamma_desc.SetLoDLevel(gamma->Var()->GetLoDLevel());
new_gamma_desc.SetPersistable(true); new_gamma_desc.SetPersistable(true);
auto* new_gamma_node = g->CreateVarNode(&new_gamma_desc); auto* new_gamma_node = g->CreateVarNode(&new_gamma_desc);
...@@ -347,7 +350,8 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const { ...@@ -347,7 +350,8 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const {
auto* beta_tensor = scope->FindVar(beta->Name())->GetMutable<LoDTensor>(); auto* beta_tensor = scope->FindVar(beta->Name())->GetMutable<LoDTensor>();
VarDesc new_beta_desc(patterns::PDNodeName("layer_norm_fuse", "Bias")); VarDesc new_beta_desc(patterns::PDNodeName("layer_norm_fuse", "Bias"));
new_beta_desc.SetShape({layer_norm_x_mat_dims[1]}); new_beta_desc.SetShape({layer_norm_x_mat_dims[1]});
new_beta_desc.SetDataType(beta_tensor->type()); new_beta_desc.SetDataType(
framework::TransToProtoVarType(beta_tensor->dtype()));
new_beta_desc.SetLoDLevel(beta->Var()->GetLoDLevel()); new_beta_desc.SetLoDLevel(beta->Var()->GetLoDLevel());
new_beta_desc.SetPersistable(true); new_beta_desc.SetPersistable(true);
auto* new_beta_node = g->CreateVarNode(&new_beta_desc); auto* new_beta_node = g->CreateVarNode(&new_beta_desc);
......
...@@ -95,7 +95,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place, ...@@ -95,7 +95,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
const char* var_name) { const char* var_name) {
auto x = scope->Var(var_name); auto x = scope->Var(var_name);
auto tensor = x->GetMutable<LoDTensor>(); auto tensor = x->GetMutable<LoDTensor>();
tensor->mutable_data(place, proto::VarType::FP32, 1); tensor->mutable_data(place,
framework::TransToPtenDataType(proto::VarType::FP32), 1);
} }
void MainTest(bool convWithExistingBias) { void MainTest(bool convWithExistingBias) {
......
...@@ -125,7 +125,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place, ...@@ -125,7 +125,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
const char* var_name) { const char* var_name) {
auto x = scope->Var(var_name); auto x = scope->Var(var_name);
auto tensor = x->GetMutable<LoDTensor>(); auto tensor = x->GetMutable<LoDTensor>();
tensor->mutable_data(place, proto::VarType::FP32, 1); tensor->mutable_data(place,
framework::TransToPtenDataType(proto::VarType::FP32), 1);
} }
void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog, void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
......
...@@ -438,7 +438,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place, ...@@ -438,7 +438,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
const char* var_name) { const char* var_name) {
auto x = scope->Var(var_name); auto x = scope->Var(var_name);
auto tensor = x->GetMutable<LoDTensor>(); auto tensor = x->GetMutable<LoDTensor>();
tensor->mutable_data(place, proto::VarType::FP32, 1); tensor->mutable_data(place,
framework::TransToPtenDataType(proto::VarType::FP32), 1);
} }
void PrepareGraph(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog) { void PrepareGraph(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog) {
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <stdint.h> #include <stdint.h>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/version.h" #include "paddle/fluid/framework/version.h"
namespace paddle { namespace paddle {
...@@ -327,7 +328,7 @@ std::vector<LoDTensor> SplitLoDTensor( ...@@ -327,7 +328,7 @@ std::vector<LoDTensor> SplitLoDTensor(
for (size_t i = 0; i < places.size(); ++i) { for (size_t i = 0; i < places.size(); ++i) {
LoDTensor dst; LoDTensor dst;
dst.Resize(src.dims()); dst.Resize(src.dims());
dst.mutable_data(places[i], src.type()); dst.mutable_data(places[i], src.dtype());
if (!src.lod().empty()) { if (!src.lod().empty()) {
dst.set_lod(src.lod()); dst.set_lod(src.lod());
} }
...@@ -393,7 +394,7 @@ void MergeLoDTensor(LoDTensor *target, ...@@ -393,7 +394,7 @@ void MergeLoDTensor(LoDTensor *target,
for (auto *t : lod_tensors) { for (auto *t : lod_tensors) {
if (t->numel() && t->IsInitialized()) { if (t->numel() && t->IsInitialized()) {
new_dim = t->dims(); new_dim = t->dims();
new_type = t->type(); new_type = framework::TransToProtoVarType(t->dtype());
new_layout = t->layout(); new_layout = t->layout();
break; break;
} }
...@@ -405,11 +406,12 @@ void MergeLoDTensor(LoDTensor *target, ...@@ -405,11 +406,12 @@ void MergeLoDTensor(LoDTensor *target,
auto *t = lod_tensors[i]; auto *t = lod_tensors[i];
if (t->numel() && t->IsInitialized()) { if (t->numel() && t->IsInitialized()) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
new_type, t->type(), new_type, framework::TransToProtoVarType(t->dtype()),
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"LoDTensor data type does not match, expected type is %s, actual " "LoDTensor data type does not match, expected type is %s, actual "
"type is %s.", "type is %s.",
DataTypeToString(new_type), DataTypeToString(t->type()))); DataTypeToString(new_type),
DataTypeToString(framework::TransToProtoVarType(t->dtype()))));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
new_layout, t->layout(), new_layout, t->layout(),
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
...@@ -444,7 +446,8 @@ void MergeLoDTensor(LoDTensor *target, ...@@ -444,7 +446,8 @@ void MergeLoDTensor(LoDTensor *target,
target->Resize(new_dim); target->Resize(new_dim);
target->set_layout(new_layout); target->set_layout(new_layout);
target->set_lod(new_lod); target->set_lod(new_lod);
target->mutable_data(dst_place, new_type); target->mutable_data(dst_place,
paddle::framework::TransToPtenDataType(new_type));
int begin = 0; int begin = 0;
for (auto *src : lod_tensors) { for (auto *src : lod_tensors) {
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <string> #include <string>
#include "paddle/fluid/framework/device_worker_factory.h" #include "paddle/fluid/framework/device_worker_factory.h"
#include "paddle/fluid/framework/trainer.h" #include "paddle/fluid/framework/trainer.h"
#include "paddle/fluid/platform/lodtensor_printer.h" #include "paddle/fluid/platform/lodtensor_printer.h"
...@@ -250,12 +251,13 @@ void MultiTrainer::Finalize() { ...@@ -250,12 +251,13 @@ void MultiTrainer::Finalize() {
LoDTensor* thread_tensor = thread_var->GetMutable<LoDTensor>(); LoDTensor* thread_tensor = thread_var->GetMutable<LoDTensor>();
#define MergeCallback(cpp_type, proto_type) \ #define MergeCallback(cpp_type, proto_type) \
do { \ do { \
if (root_tensor->type() == proto_type) { \ if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
if (thread_tensor->type() != proto_type) { \ if (framework::TransToProtoVarType(thread_tensor->dtype()) != \
proto_type) { \
VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \ VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \
<< "] " << need_merge_var_names_[i] \ << "] " << need_merge_var_names_[i] \
<< ", root tensor type=" << root_tensor->type() \ << ", root tensor type=" << root_tensor->dtype() \
<< ", thread tensor type=" << thread_tensor->type(); \ << ", thread tensor type=" << thread_tensor->dtype(); \
exit(-1); \ exit(-1); \
} \ } \
MergeToRootScope<cpp_type>(root_tensor, thread_tensor); \ MergeToRootScope<cpp_type>(root_tensor, thread_tensor); \
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/new_executor/data_transfer.h" #include "paddle/fluid/framework/new_executor/data_transfer.h"
#include "paddle/fluid/framework/convert_utils.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -366,7 +367,7 @@ void HandleComplexGradToRealGrad(const OpFuncNode& op_func_node, ...@@ -366,7 +367,7 @@ void HandleComplexGradToRealGrad(const OpFuncNode& op_func_node,
continue; continue;
} }
// only focus on complex dtype now // only focus on complex dtype now
auto src_type = grad_tensor->type(); auto src_type = framework::TransToProtoVarType(grad_tensor->dtype());
if (!framework::IsComplexType(src_type)) { if (!framework::IsComplexType(src_type)) {
VLOG(3) << "skip grad_tensor with not complexType"; VLOG(3) << "skip grad_tensor with not complexType";
continue; continue;
...@@ -390,7 +391,7 @@ void HandleComplexGradToRealGrad(const OpFuncNode& op_func_node, ...@@ -390,7 +391,7 @@ void HandleComplexGradToRealGrad(const OpFuncNode& op_func_node,
platform::errors::Unavailable( platform::errors::Unavailable(
"Forward tensor is nullptr when handle complex data to real.")); "Forward tensor is nullptr when handle complex data to real."));
// only need record type, the allocation may have been released // only need record type, the allocation may have been released
auto dst_type = tensor->saved_type(); auto dst_type = framework::TransToProtoVarType(tensor->dtype());
// only focus on real dtype and need casting // only focus on real dtype and need casting
if (framework::IsComplexType(dst_type)) { if (framework::IsComplexType(dst_type)) {
continue; continue;
......
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include <string> #include <string>
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_transform.h" #include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/data_type_transform.h"
#include "paddle/fluid/framework/details/nan_inf_utils.h" #include "paddle/fluid/framework/details/nan_inf_utils.h"
...@@ -109,13 +110,13 @@ static std::string GetDtype(const ScopeBase& scope, const std::string& name) { ...@@ -109,13 +110,13 @@ static std::string GetDtype(const ScopeBase& scope, const std::string& name) {
if (UNLIKELY(!tensor.IsInitialized())) { if (UNLIKELY(!tensor.IsInitialized())) {
return ""; return "";
} }
return DataTypeToString(tensor.type()); return DataTypeToString(framework::TransToProtoVarType(tensor.dtype()));
} else if (var->IsType<pten::SelectedRows>()) { } else if (var->IsType<pten::SelectedRows>()) {
auto tensor = var->Get<pten::SelectedRows>().value(); auto tensor = var->Get<pten::SelectedRows>().value();
if (UNLIKELY(!tensor.IsInitialized())) { if (UNLIKELY(!tensor.IsInitialized())) {
return "uninited"; return "uninited";
} else { } else {
return DataTypeToString(tensor.type()); return DataTypeToString(framework::TransToProtoVarType(tensor.dtype()));
} }
} else if (var->IsType<Strings>()) { } else if (var->IsType<Strings>()) {
return "strings"; return "strings";
...@@ -1070,8 +1071,8 @@ static void CheckTensorNANOrInf(const std::string& op_type, ...@@ -1070,8 +1071,8 @@ static void CheckTensorNANOrInf(const std::string& op_type,
if (tensor.memory_size() == 0) { if (tensor.memory_size() == 0) {
return; return;
} }
if (tensor.type() != proto::VarType::FP32 && if (framework::TransToProtoVarType(tensor.dtype()) != proto::VarType::FP32 &&
tensor.type() != proto::VarType::FP64) { framework::TransToProtoVarType(tensor.dtype()) != proto::VarType::FP64) {
return; return;
} }
PADDLE_ENFORCE_NE( PADDLE_ENFORCE_NE(
...@@ -1536,7 +1537,7 @@ void OperatorWithKernel::HandleComplexGradToRealGrad( ...@@ -1536,7 +1537,7 @@ void OperatorWithKernel::HandleComplexGradToRealGrad(
continue; continue;
} }
// only focus on complex dtype now // only focus on complex dtype now
auto src_type = grad_tensor->type(); auto src_type = framework::TransToProtoVarType(grad_tensor->dtype());
if (!IsComplexType(src_type)) { if (!IsComplexType(src_type)) {
continue; continue;
} }
...@@ -1556,7 +1557,7 @@ void OperatorWithKernel::HandleComplexGradToRealGrad( ...@@ -1556,7 +1557,7 @@ void OperatorWithKernel::HandleComplexGradToRealGrad(
platform::errors::Unavailable( platform::errors::Unavailable(
"Forward tensor is nullptr when handle complex data to real.")); "Forward tensor is nullptr when handle complex data to real."));
// only need record type, the allocation may have been released // only need record type, the allocation may have been released
auto dst_type = tensor->saved_type(); auto dst_type = framework::TransToProtoVarType(tensor->dtype());
// only focus on real dtype and need casting // only focus on real dtype and need casting
if (IsComplexType(dst_type)) { if (IsComplexType(dst_type)) {
continue; continue;
...@@ -1770,7 +1771,8 @@ void OperatorWithKernel::ParseInputDataType( ...@@ -1770,7 +1771,8 @@ void OperatorWithKernel::ParseInputDataType(
platform::errors::InvalidArgument("The %s Op's Input Variable `%s` " platform::errors::InvalidArgument("The %s Op's Input Variable `%s` "
"contains uninitialized Tensor.", "contains uninitialized Tensor.",
Type(), name)); Type(), name));
proto::VarType::Type tmp = t->type(); proto::VarType::Type tmp =
paddle::framework::TransToProtoVarType(t->dtype());
PADDLE_ENFORCE(tmp == *data_type || *data_type == default_data_type, PADDLE_ENFORCE(tmp == *data_type || *data_type == default_data_type,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The DataType of %s Op's duplicable or different " "The DataType of %s Op's duplicable or different "
...@@ -1869,8 +1871,8 @@ proto::VarType::Type OperatorWithKernel::IndicateOrPromoteVarDataTypes( ...@@ -1869,8 +1871,8 @@ proto::VarType::Type OperatorWithKernel::IndicateOrPromoteVarDataTypes(
auto* tensor_b = GetTensorFormInputSafely(ctx, name2); auto* tensor_b = GetTensorFormInputSafely(ctx, name2);
// 2. Get two input types // 2. Get two input types
auto type_a = tensor_a->type(); auto type_a = framework::TransToProtoVarType(tensor_a->dtype());
auto type_b = tensor_b->type(); auto type_b = framework::TransToProtoVarType(tensor_b->dtype());
// 3. Get first input type or promote complex types // 3. Get first input type or promote complex types
auto target_type = PromoteTypesIfComplexExists(type_a, type_b); auto target_type = PromoteTypesIfComplexExists(type_a, type_b);
...@@ -2168,7 +2170,7 @@ void OperatorWithKernel::BuildPtenKernelContext( ...@@ -2168,7 +2170,7 @@ void OperatorWithKernel::BuildPtenKernelContext(
pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(std::string, attr)); pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(std::string, attr));
} else if (attr_defs[i].type_index == } else if (attr_defs[i].type_index ==
std::type_index(typeid(pten::DataType))) { std::type_index(typeid(pten::DataType))) {
auto data_type = pten::TransToPtenDataType( auto data_type = paddle::framework::TransToPtenDataType(
static_cast<framework::proto::VarType::Type>( static_cast<framework::proto::VarType::Type>(
BOOST_GET_CONST(int, attr))); BOOST_GET_CONST(int, attr)));
pt_kernel_context->EmplaceBackAttr(data_type); pt_kernel_context->EmplaceBackAttr(data_type);
......
...@@ -40,6 +40,7 @@ limitations under the License. */ ...@@ -40,6 +40,7 @@ limitations under the License. */
#include "paddle/fluid/platform/variant.h" #include "paddle/fluid/platform/variant.h"
#include "paddle/utils/flat_hash_map.h" #include "paddle/utils/flat_hash_map.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/pten/core/compat/arg_map_context.h" #include "paddle/pten/core/compat/arg_map_context.h"
#include "paddle/pten/core/compat/op_utils.h" #include "paddle/pten/core/compat/op_utils.h"
#include "paddle/pten/core/kernel_context.h" #include "paddle/pten/core/kernel_context.h"
...@@ -422,8 +423,8 @@ class ExecutionContext { ...@@ -422,8 +423,8 @@ class ExecutionContext {
"size(%d).", "size(%d).",
allocation_ptr->size(), framework::product(dim) * sizeof(T))); allocation_ptr->size(), framework::product(dim) * sizeof(T)));
paddle::framework::Tensor temp_tensor( paddle::framework::Tensor temp_tensor(framework::TransToPtenDataType(
framework::ToDataType(std::type_index(typeid(T)))); framework::ToDataType(std::type_index(typeid(T)))));
temp_tensor.Resize(dim); temp_tensor.Resize(dim);
temp_tensor.ResetHolder(std::move(shared_allocation)); temp_tensor.ResetHolder(std::move(shared_allocation));
return temp_tensor; return temp_tensor;
......
...@@ -27,6 +27,7 @@ limitations under the License. */ ...@@ -27,6 +27,7 @@ limitations under the License. */
#include "cinn/frontend/op_mappers/use_op_mappers.h" #include "cinn/frontend/op_mappers/use_op_mappers.h"
#include "cinn/frontend/var_type_utils.h" #include "cinn/frontend/var_type_utils.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/errors.h"
...@@ -57,7 +58,7 @@ OpMapperContext::FeedInfo GetCinnFeedInfoFromTensor( ...@@ -57,7 +58,7 @@ OpMapperContext::FeedInfo GetCinnFeedInfoFromTensor(
// op // op
auto tensor_type = ::paddle::framework::proto::VarType::FP32; auto tensor_type = ::paddle::framework::proto::VarType::FP32;
if (!skip_trans_type) { if (!skip_trans_type) {
tensor_type = tensor.type(); tensor_type = framework::TransToProtoVarType(tensor.dtype());
} }
auto cinn_var_type = TransformVarDataTypeToCinn(tensor_type); auto cinn_var_type = TransformVarDataTypeToCinn(tensor_type);
info.type = ::cinn::frontend::utils::CppVarType2CommonType(cinn_var_type); info.type = ::cinn::frontend::utils::CppVarType2CommonType(cinn_var_type);
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.h" #include "paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.h"
namespace paddle { namespace paddle {
...@@ -206,7 +207,9 @@ class CinnGraphSymbolizationTest : public ::testing::Test { ...@@ -206,7 +207,9 @@ class CinnGraphSymbolizationTest : public ::testing::Test {
LoDTensor tensor; LoDTensor tensor;
DDim dims = {256, 1024}; DDim dims = {256, 1024};
tensor.Resize(dims); tensor.Resize(dims);
tensor.mutable_data(platform::CPUPlace(), proto::VarType::FP32); tensor.mutable_data(
platform::CPUPlace(),
framework::TransToPtenDataType(framework::proto::VarType::FP32));
return tensor; return tensor;
}; };
#define FillFeedList(Name) feed_targets[#Name] = create_tensor(); #define FillFeedList(Name) feed_targets[#Name] = create_tensor();
......
...@@ -21,6 +21,7 @@ limitations under the License. */ ...@@ -21,6 +21,7 @@ limitations under the License. */
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/details/async_ssa_graph_executor.h" #include "paddle/fluid/framework/details/async_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/bind_threaded_ssa_graph_executor.h" #include "paddle/fluid/framework/details/bind_threaded_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h" #include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h"
...@@ -775,7 +776,8 @@ void ParallelExecutor::BCastParamsToDevices( ...@@ -775,7 +776,8 @@ void ParallelExecutor::BCastParamsToDevices(
std::vector<void *> buffers; std::vector<void *> buffers;
buffers.reserve(member_->places_.size()); buffers.reserve(member_->places_.size());
size_t numel = main_tensor.numel(); size_t numel = main_tensor.numel();
ncclDataType_t data_type = platform::ToNCCLDataType(main_tensor.type()); ncclDataType_t data_type = platform::ToNCCLDataType(
framework::TransToProtoVarType(main_tensor.dtype()));
for (size_t i = 0; i < member_->places_.size(); ++i) { for (size_t i = 0; i < member_->places_.size(); ++i) {
auto place = member_->places_[i]; auto place = member_->places_[i];
void *buffer; void *buffer;
...@@ -786,7 +788,7 @@ void ParallelExecutor::BCastParamsToDevices( ...@@ -786,7 +788,7 @@ void ParallelExecutor::BCastParamsToDevices(
auto local_scope = member_->local_scopes_[i]; auto local_scope = member_->local_scopes_[i];
auto *t = local_scope->Var(var)->GetMutable<LoDTensor>(); auto *t = local_scope->Var(var)->GetMutable<LoDTensor>();
t->Resize(dims); t->Resize(dims);
buffer = t->mutable_data(place, main_tensor.type()); buffer = t->mutable_data(place, main_tensor.dtype());
} }
buffers.push_back(buffer); buffers.push_back(buffer);
} }
...@@ -818,7 +820,8 @@ void ParallelExecutor::BCastParamsToDevices( ...@@ -818,7 +820,8 @@ void ParallelExecutor::BCastParamsToDevices(
// but broadcast is equivalent to no type of operation, does not affect // but broadcast is equivalent to no type of operation, does not affect
// correctness. // correctness.
BKCLDataType data_type = BKCL_FLOAT; BKCLDataType data_type = BKCL_FLOAT;
// BKCLDataType data_type = platform::ToBKCLDataType(main_tensor.type()); // BKCLDataType data_type =
// platform::ToBKCLDataType(framework::TransToProtoVarType(main_tensor.dtype()));
for (size_t i = 0; i < member_->places_.size(); ++i) { for (size_t i = 0; i < member_->places_.size(); ++i) {
auto place = member_->places_[i]; auto place = member_->places_[i];
void *buffer; void *buffer;
...@@ -829,7 +832,7 @@ void ParallelExecutor::BCastParamsToDevices( ...@@ -829,7 +832,7 @@ void ParallelExecutor::BCastParamsToDevices(
auto local_scope = member_->local_scopes_[i]; auto local_scope = member_->local_scopes_[i];
auto *t = local_scope->Var(var)->GetMutable<LoDTensor>(); auto *t = local_scope->Var(var)->GetMutable<LoDTensor>();
t->Resize(dims); t->Resize(dims);
buffer = t->mutable_data(place, main_tensor.type()); buffer = t->mutable_data(place, main_tensor.dtype());
} }
buffers.push_back(buffer); buffers.push_back(buffer);
} }
...@@ -848,7 +851,8 @@ void ParallelExecutor::BCastParamsToDevices( ...@@ -848,7 +851,8 @@ void ParallelExecutor::BCastParamsToDevices(
for (size_t i = 0; i < member_->places_.size(); ++i) { for (size_t i = 0; i < member_->places_.size(); ++i) {
auto &bkcl_ctx = bkcl_ctxs->at(member_->places_[i]); auto &bkcl_ctx = bkcl_ctxs->at(member_->places_[i]);
auto broadcast_numel = numel; auto broadcast_numel = numel;
if (main_tensor.type() == framework::proto::VarType::INT64) { if (framework::TransToProtoVarType(main_tensor.dtype()) ==
framework::proto::VarType::INT64) {
broadcast_numel *= 2; broadcast_numel *= 2;
} }
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -873,7 +877,7 @@ void ParallelExecutor::BCastParamsToDevices( ...@@ -873,7 +877,7 @@ void ParallelExecutor::BCastParamsToDevices(
auto copy_memory = [&] { auto copy_memory = [&] {
t->Resize(dims); t->Resize(dims);
t->mutable_data(cpu, main_tensor.type()); t->mutable_data(cpu, main_tensor.dtype());
paddle::framework::TensorCopy(main_tensor, cpu, t); paddle::framework::TensorCopy(main_tensor, cpu, t);
}; };
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#include <cstdlib> #include <cstdlib>
#include <string> #include <string>
#include <vector> #include <vector>
#include "io/fs.h" #include "io/fs.h"
#include "paddle/fluid/framework/data_feed_factory.h" #include "paddle/fluid/framework/data_feed_factory.h"
#include "paddle/fluid/framework/data_set.h" #include "paddle/fluid/framework/data_set.h"
...@@ -232,12 +233,13 @@ void PSGPUTrainer::Finalize() { ...@@ -232,12 +233,13 @@ void PSGPUTrainer::Finalize() {
} }
#define MergeCallback(cpp_type, proto_type) \ #define MergeCallback(cpp_type, proto_type) \
do { \ do { \
if (root_tensor->type() == proto_type) { \ if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
if (thread_tensor->type() != proto_type) { \ if (framework::TransToProtoVarType(thread_tensor->dtype()) != \
proto_type) { \
VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \ VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \
<< "] " << need_merge_var_names_[i] \ << "] " << need_merge_var_names_[i] \
<< ", root tensor type=" << root_tensor->type() \ << ", root tensor type=" << root_tensor->dtype() \
<< ", thread tensor type=" << thread_tensor->type(); \ << ", thread tensor type=" << thread_tensor->dtype(); \
exit(-1); \ exit(-1); \
} \ } \
MergeToRootScope<cpp_type>(root_tensor, thread_tensor); \ MergeToRootScope<cpp_type>(root_tensor, thread_tensor); \
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include <sstream> #include <sstream>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/pten_utils.h" #include "paddle/fluid/framework/pten_utils.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
...@@ -59,7 +60,7 @@ class KernelArgsNameMakerByOpProto : public KernelArgsNameMaker { ...@@ -59,7 +60,7 @@ class KernelArgsNameMakerByOpProto : public KernelArgsNameMaker {
OpKernelType TransPtenKernelKeyToOpKernelType( OpKernelType TransPtenKernelKeyToOpKernelType(
const pten::KernelKey& kernel_key) { const pten::KernelKey& kernel_key) {
proto::VarType::Type data_type = proto::VarType::Type data_type =
pten::TransToProtoVarType(kernel_key.dtype()); paddle::framework::TransToProtoVarType(kernel_key.dtype());
// no need to set current device id here // no need to set current device id here
platform::Place place = pten::TransToPtenPlace(kernel_key.backend(), false); platform::Place place = pten::TransToPtenPlace(kernel_key.backend(), false);
DataLayout data_layout = kernel_key.layout(); DataLayout data_layout = kernel_key.layout();
...@@ -87,7 +88,7 @@ pten::KernelKey TransOpKernelTypeToPtenKernelKey( ...@@ -87,7 +88,7 @@ pten::KernelKey TransOpKernelTypeToPtenKernelKey(
} }
paddle::experimental::DataLayout layout = kernel_type.data_layout_; paddle::experimental::DataLayout layout = kernel_type.data_layout_;
paddle::experimental::DataType dtype = paddle::experimental::DataType dtype =
pten::TransToPtenDataType(kernel_type.data_type_); paddle::framework::TransToPtenDataType(kernel_type.data_type_);
return pten::KernelKey(backend, layout, dtype); return pten::KernelKey(backend, layout, dtype);
} }
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <fstream> #include <fstream>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/layer.h"
namespace paddle { namespace paddle {
...@@ -282,7 +283,7 @@ bool SaveTensorToDisk(const std::string& file_name, ...@@ -282,7 +283,7 @@ bool SaveTensorToDisk(const std::string& file_name,
auto tensor = itera.second; auto tensor = itera.second;
proto::VarType::TensorDesc desc; proto::VarType::TensorDesc desc;
desc.set_data_type(tensor->type()); desc.set_data_type(framework::TransToProtoVarType(tensor->dtype()));
auto dims = framework::vectorize(tensor->dims()); auto dims = framework::vectorize(tensor->dims());
auto* pb_dims = desc.mutable_dims(); auto* pb_dims = desc.mutable_dims();
pb_dims->Resize(static_cast<int>(dims.size()), 0); pb_dims->Resize(static_cast<int>(dims.size()), 0);
...@@ -294,7 +295,7 @@ bool SaveTensorToDisk(const std::string& file_name, ...@@ -294,7 +295,7 @@ bool SaveTensorToDisk(const std::string& file_name,
// save tensor // save tensor
uint64_t data_size = uint64_t data_size =
tensor->numel() * framework::SizeOfType(tensor->type()); tensor->numel() * framework::DataTypeSize(tensor->dtype());
auto* data_ptr = tensor->data(); auto* data_ptr = tensor->data();
if (platform::is_gpu_place(tensor->place())) { if (platform::is_gpu_place(tensor->place())) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
......
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/complex.h"
...@@ -55,10 +56,10 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place, ...@@ -55,10 +56,10 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
// than numel()*size(type()) // than numel()*size(type())
auto dst_ptr = auto dst_ptr =
src.layout() == DataLayout::kMKLDNN src.layout() == DataLayout::kMKLDNN
? dst->mutable_data(dst_place, src.type(), src.memory_size()) ? dst->mutable_data(dst_place, src.dtype(), src.memory_size())
: dst->mutable_data(dst_place, src.type()); : dst->mutable_data(dst_place, src.dtype());
#else #else
auto dst_ptr = dst->mutable_data(dst_place, src.type()); auto dst_ptr = dst->mutable_data(dst_place, src.dtype());
#endif #endif
if (src_ptr == dst_ptr && src_place == dst_place) { if (src_ptr == dst_ptr && src_place == dst_place) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to " VLOG(3) << "Skip copy the same data async from " << src_place << " to "
...@@ -70,9 +71,9 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place, ...@@ -70,9 +71,9 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
auto size = src.layout() == DataLayout::kMKLDNN auto size = src.layout() == DataLayout::kMKLDNN
? src.memory_size() ? src.memory_size()
: src.numel() * SizeOfType(src.type()); : src.numel() * framework::DataTypeSize(src.dtype());
#else #else
auto size = src.numel() * SizeOfType(src.type()); auto size = src.numel() * framework::DataTypeSize(src.dtype());
#endif #endif
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
...@@ -126,7 +127,7 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place, ...@@ -126,7 +127,7 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
Tensor npu_pinned_tensor; Tensor npu_pinned_tensor;
npu_pinned_tensor.Resize(src.dims()); npu_pinned_tensor.Resize(src.dims());
auto npu_pinned_ptr = auto npu_pinned_ptr =
npu_pinned_tensor.mutable_data(npu_pinned_place, src.type()); npu_pinned_tensor.mutable_data(npu_pinned_place, src.dtype());
memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size); memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size);
// 2. async copy npu pinned tensor -> npu tensor // 2. async copy npu pinned tensor -> npu tensor
...@@ -410,7 +411,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, ...@@ -410,7 +411,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
#endif #endif
auto src_place = src.place(); auto src_place = src.place();
auto src_ptr = src.data(); auto src_ptr = src.data();
auto dst_ptr = dst->mutable_data(dst_place, src.type()); auto dst_ptr = dst->mutable_data(dst_place, src.dtype());
VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr; VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr;
if (src_ptr == dst_ptr && src_place == dst_place) { if (src_ptr == dst_ptr && src_place == dst_place) {
...@@ -419,7 +420,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, ...@@ -419,7 +420,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
return; return;
} }
auto size = src.numel() * SizeOfType(src.type()); auto size = src.numel() * framework::DataTypeSize(src.dtype());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} }
...@@ -582,8 +583,9 @@ struct AnyDTypeVisitor { ...@@ -582,8 +583,9 @@ struct AnyDTypeVisitor {
template <typename Predicate, typename DevCtx> template <typename Predicate, typename DevCtx>
inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor, inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor,
const DevCtx& ctx, framework::Tensor* out) { const DevCtx& ctx, framework::Tensor* out) {
VisitDataType(tensor.type(), AnyDTypeVisitor<Predicate, DevCtx>( VisitDataType(
predicate, tensor, ctx, out)); framework::TransToProtoVarType(tensor.dtype()),
AnyDTypeVisitor<Predicate, DevCtx>(predicate, tensor, ctx, out));
} }
template <typename Predicate> template <typename Predicate>
...@@ -722,8 +724,9 @@ struct AllDTypeVisitor { ...@@ -722,8 +724,9 @@ struct AllDTypeVisitor {
template <typename Predicate, typename DevCtx> template <typename Predicate, typename DevCtx>
inline void AllImpl(Predicate predicate, const framework::Tensor& tensor, inline void AllImpl(Predicate predicate, const framework::Tensor& tensor,
const DevCtx& ctx, framework::Tensor* out) { const DevCtx& ctx, framework::Tensor* out) {
VisitDataType(tensor.type(), AllDTypeVisitor<Predicate, DevCtx>( VisitDataType(
predicate, tensor, ctx, out)); framework::TransToProtoVarType(tensor.dtype()),
AllDTypeVisitor<Predicate, DevCtx>(predicate, tensor, ctx, out));
} }
template <typename Predicate> template <typename Predicate>
...@@ -930,7 +933,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor, ...@@ -930,7 +933,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor,
// int32_t size // int32_t size
// void* protobuf message // void* protobuf message
proto::VarType::TensorDesc desc; proto::VarType::TensorDesc desc;
desc.set_data_type(tensor.type()); desc.set_data_type(framework::TransToProtoVarType(tensor.dtype()));
auto dims = framework::vectorize(tensor.dims()); auto dims = framework::vectorize(tensor.dims());
auto* pb_dims = desc.mutable_dims(); auto* pb_dims = desc.mutable_dims();
pb_dims->Resize(static_cast<int>(dims.size()), 0); pb_dims->Resize(static_cast<int>(dims.size()), 0);
...@@ -941,7 +944,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor, ...@@ -941,7 +944,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor,
os.write(out.data(), size); os.write(out.data(), size);
} }
{ // the 3rd field, tensor data { // the 3rd field, tensor data
uint64_t size = tensor.numel() * framework::SizeOfType(tensor.type()); uint64_t size = tensor.numel() * framework::DataTypeSize(tensor.dtype());
auto* data_ptr = tensor.data(); auto* data_ptr = tensor.data();
PADDLE_ENFORCE_LT(size, (std::numeric_limits<std::streamsize>::max)(), PADDLE_ENFORCE_LT(size, (std::numeric_limits<std::streamsize>::max)(),
...@@ -1419,13 +1422,14 @@ std::ostream& operator<<(std::ostream& os, const pten::DenseTensor& t) { ...@@ -1419,13 +1422,14 @@ std::ostream& operator<<(std::ostream& os, const pten::DenseTensor& t) {
dev_ctx.Wait(); dev_ctx.Wait();
} }
#define PrintTensorCallback(cpp_type, proto_type) \ #define PrintTensorCallback(cpp_type, proto_type) \
do { \ do { \
if (tensor.type() == proto_type) { \ if (paddle::framework::TransToProtoVarType(tensor.dtype()) == \
os << " - dtype: " << proto_type << "\n"; \ proto_type) { \
paddle::framework::print_tensor<cpp_type>(os, tensor); \ os << " - dtype: " << proto_type << "\n"; \
return os; \ paddle::framework::print_tensor<cpp_type>(os, tensor); \
} \ return os; \
} \
} while (0) } while (0)
_ForEachDataType_(PrintTensorCallback); _ForEachDataType_(PrintTensorCallback);
......
...@@ -160,7 +160,7 @@ void TensorFromArray(const T* src, const size_t& array_size, ...@@ -160,7 +160,7 @@ void TensorFromArray(const T* src, const size_t& array_size,
Tensor npu_pinned_tensor; Tensor npu_pinned_tensor;
npu_pinned_tensor.Resize(dst->dims()); npu_pinned_tensor.Resize(dst->dims());
auto npu_pinned_ptr = auto npu_pinned_ptr =
npu_pinned_tensor.mutable_data(npu_pinned_place, dst->type()); npu_pinned_tensor.mutable_data(npu_pinned_place, dst->dtype());
memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size); memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size);
// 2. async copy npu pinned tensor -> npu tensor // 2. async copy npu pinned tensor -> npu tensor
...@@ -211,7 +211,7 @@ void TensorFromVector(const std::vector<T>& src, ...@@ -211,7 +211,7 @@ void TensorFromVector(const std::vector<T>& src,
// so pass nullptr as stream to memory::Copy(). // so pass nullptr as stream to memory::Copy().
else if (platform::is_npu_place(dst_place)) { // NOLINT else if (platform::is_npu_place(dst_place)) { // NOLINT
// 1. vector -> npu pinned tensor // 1. vector -> npu pinned tensor
Tensor npu_pinned_tensor(dst->type()); Tensor npu_pinned_tensor(dst->dtype());
platform::NPUPinnedPlace npu_pinned_place; platform::NPUPinnedPlace npu_pinned_place;
auto npu_pinned_ptr = auto npu_pinned_ptr =
npu_pinned_tensor.mutable_data<T>(dst->dims(), npu_pinned_place); npu_pinned_tensor.mutable_data<T>(dst->dims(), npu_pinned_place);
...@@ -280,7 +280,7 @@ inline void TensorFromVector(const std::vector<bool>& src, ...@@ -280,7 +280,7 @@ inline void TensorFromVector(const std::vector<bool>& src,
Tensor npu_pinned_tensor; Tensor npu_pinned_tensor;
npu_pinned_tensor.Resize(dst->dims()); npu_pinned_tensor.Resize(dst->dims());
auto npu_pinned_ptr = auto npu_pinned_ptr =
npu_pinned_tensor.mutable_data(npu_pinned_place, dst->type()); npu_pinned_tensor.mutable_data(npu_pinned_place, dst->dtype());
memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size); memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size);
// 2. async copy npu pinned tensor -> npu tensor // 2. async copy npu pinned tensor -> npu tensor
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/imperative/all_reduce.h" #include "paddle/fluid/imperative/all_reduce.h"
#include "paddle/fluid/framework/convert_utils.h"
#ifdef PADDLE_WITH_NCCL #ifdef PADDLE_WITH_NCCL
#include <nccl.h> #include <nccl.h>
...@@ -62,8 +63,9 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst, ...@@ -62,8 +63,9 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst,
const void *src_ptr = src.data(); const void *src_ptr = src.data();
dst->Resize(src.dims()); dst->Resize(src.dims());
auto *dst_ptr = dst->mutable_data(src.place(), src.type()); auto *dst_ptr = dst->mutable_data(src.place(), src.dtype());
auto nccl_dtype = platform::ToNCCLDataType(src.type()); auto nccl_dtype =
platform::ToNCCLDataType(framework::TransToProtoVarType(src.dtype()));
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce( PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
src_ptr, dst_ptr, src.numel(), nccl_dtype, ncclSum, comm->comm(), src_ptr, dst_ptr, src.numel(), nccl_dtype, ncclSum, comm->comm(),
stream)); stream));
...@@ -82,7 +84,7 @@ static void AllReduce(const pten::SelectedRows &src, pten::SelectedRows *dst, ...@@ -82,7 +84,7 @@ static void AllReduce(const pten::SelectedRows &src, pten::SelectedRows *dst,
platform::errors::Unimplemented( platform::errors::Unimplemented(
"Imperative mode does not support multi-CPU training yet.")); "Imperative mode does not support multi-CPU training yet."));
auto dtype = src_tensor.type(); auto dtype = framework::TransToProtoVarType(src_tensor.dtype());
auto nccl_dtype = platform::ToNCCLDataType(dtype); auto nccl_dtype = platform::ToNCCLDataType(dtype);
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>( auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(place)); platform::DeviceContextPool::Instance().Get(place));
...@@ -127,7 +129,7 @@ static void AllReduce(const pten::SelectedRows &src, pten::SelectedRows *dst, ...@@ -127,7 +129,7 @@ static void AllReduce(const pten::SelectedRows &src, pten::SelectedRows *dst,
dims[0] = rows_num; dims[0] = rows_num;
auto feature_size = framework::product(dims) / dims[0]; auto feature_size = framework::product(dims) / dims[0];
dst_tensor->Resize(dims); dst_tensor->Resize(dims);
auto *dst_tensor_ptr = dst_tensor->mutable_data(place, dtype); auto *dst_tensor_ptr = dst_tensor->mutable_data(place, src_tensor.dtype());
const auto *src_tensor_ptr = src_tensor.data(); const auto *src_tensor_ptr = src_tensor.data();
auto sizeof_dtype = framework::SizeOfType(dtype); auto sizeof_dtype = framework::SizeOfType(dtype);
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/imperative/gradient_accumulator.h" #include "paddle/fluid/imperative/gradient_accumulator.h"
#include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/op_base.h" #include "paddle/fluid/imperative/op_base.h"
...@@ -152,7 +153,8 @@ void BasicEngine::CheckBackwardInputs(const OpBase& op) { ...@@ -152,7 +153,8 @@ void BasicEngine::CheckBackwardInputs(const OpBase& op) {
// correct. var->DataType() returns the default dtype, which is float32. // correct. var->DataType() returns the default dtype, which is float32.
// Here, we use the type of the corresponding forward datatype. // Here, we use the type of the corresponding forward datatype.
tensor->mutable_data(op.place(), var->ForwardDataType()); tensor->mutable_data(
op.place(), framework::TransToPtenDataType(var->ForwardDataType()));
VLOG(6) << "Set ungenerated Grad: " << var->Name() VLOG(6) << "Set ungenerated Grad: " << var->Name()
<< " as zero with dtype " << " as zero with dtype "
<< framework::DataTypeToString(var->ForwardDataType()); << framework::DataTypeToString(var->ForwardDataType());
......
...@@ -13,13 +13,14 @@ ...@@ -13,13 +13,14 @@
// limitations under the License. // limitations under the License.
#if defined(PADDLE_WITH_XPU_BKCL) #if defined(PADDLE_WITH_XPU_BKCL)
#include "paddle/fluid/imperative/bkcl_context.h"
#include <string> #include <string>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/imperative/bkcl_context.h"
#include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/xpu/bkcl_helper.h" #include "paddle/fluid/platform/device/xpu/bkcl_helper.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
...@@ -41,8 +42,9 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst, ...@@ -41,8 +42,9 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst,
const void *src_ptr = src.data(); const void *src_ptr = src.data();
dst->Resize(src.dims()); dst->Resize(src.dims());
auto *dst_ptr = dst->mutable_data(src.place(), src.type()); auto *dst_ptr = dst->mutable_data(src.place(), src.dtype());
auto bkcl_dtype = platform::ToBKCLDataType(src.type()); auto bkcl_dtype =
platform::ToBKCLDataType(framework::TransToProtoVarType(src.dtype()));
PADDLE_ENFORCE_EQ(bkcl_all_reduce(comm->comm(), src_ptr, dst_ptr, src.numel(), PADDLE_ENFORCE_EQ(bkcl_all_reduce(comm->comm(), src_ptr, dst_ptr, src.numel(),
bkcl_dtype, BKCL_ADD, stream), bkcl_dtype, BKCL_ADD, stream),
...@@ -159,7 +161,8 @@ void BKCLParallelContext::Broadcast(framework::Variable *src, int ring_id) { ...@@ -159,7 +161,8 @@ void BKCLParallelContext::Broadcast(framework::Variable *src, int ring_id) {
XPUStream stream = comm->stream(); XPUStream stream = comm->stream();
void *src_ptr = src_tensor->data(); void *src_ptr = src_tensor->data();
auto data_type = platform::ToBKCLDataType(src_tensor->type()); auto data_type = platform::ToBKCLDataType(
framework::TransToProtoVarType(src_tensor->dtype()));
PADDLE_ENFORCE_EQ(bkcl_broadcast(comm->comm(), src_ptr, src_ptr, PADDLE_ENFORCE_EQ(bkcl_broadcast(comm->comm(), src_ptr, src_ptr,
src_tensor->numel(), data_type, 0, stream), src_tensor->numel(), data_type, 0, stream),
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/imperative/gloo_context.h" #include "paddle/fluid/imperative/gloo_context.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/fleet/gloo_wrapper.h" #include "paddle/fluid/framework/fleet/gloo_wrapper.h"
#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
...@@ -109,7 +110,7 @@ void GLOOParallelContext::AllReduce(const framework::Tensor &src_tensor, ...@@ -109,7 +110,7 @@ void GLOOParallelContext::AllReduce(const framework::Tensor &src_tensor,
framework::Tensor *dst_tensor) { framework::Tensor *dst_tensor) {
auto gloo_wrapper = framework::GlooWrapper::GetInstance(); auto gloo_wrapper = framework::GlooWrapper::GetInstance();
dst_tensor->Resize(src_tensor.dims()); dst_tensor->Resize(src_tensor.dims());
switch (src_tensor.type()) { switch (framework::TransToProtoVarType(src_tensor.dtype())) {
GLOO_CASE(framework::proto::VarType::FP32, float, gloo_wrapper); GLOO_CASE(framework::proto::VarType::FP32, float, gloo_wrapper);
GLOO_CASE(framework::proto::VarType::FP64, double, gloo_wrapper); GLOO_CASE(framework::proto::VarType::FP64, double, gloo_wrapper);
GLOO_CASE(framework::proto::VarType::INT32, int, gloo_wrapper); GLOO_CASE(framework::proto::VarType::INT32, int, gloo_wrapper);
...@@ -139,7 +140,7 @@ void GLOOParallelContext::AllReduce(const pten::SelectedRows &src, ...@@ -139,7 +140,7 @@ void GLOOParallelContext::AllReduce(const pten::SelectedRows &src,
VLOG(3) << "SelectedRows AllReduce start"; VLOG(3) << "SelectedRows AllReduce start";
const auto &src_tensor = src.value(); const auto &src_tensor = src.value();
const auto &place = src_tensor.place(); const auto &place = src_tensor.place();
auto dtype = src_tensor.type(); auto dtype = framework::TransToProtoVarType(src_tensor.dtype());
// 1. Gather rows number from all workers. Here use ncclAllGather to do this, // 1. Gather rows number from all workers. Here use ncclAllGather to do this,
// but we can use other ways to implement is in the future // but we can use other ways to implement is in the future
const auto &src_rows = src.rows(); const auto &src_rows = src.rows();
...@@ -169,7 +170,7 @@ void GLOOParallelContext::AllReduce(const pten::SelectedRows &src, ...@@ -169,7 +170,7 @@ void GLOOParallelContext::AllReduce(const pten::SelectedRows &src,
std::for_each(element_nums.begin(), element_nums.end(), std::for_each(element_nums.begin(), element_nums.end(),
[feature_size](size_t &x) { x = x * feature_size; }); [feature_size](size_t &x) { x = x * feature_size; });
auto *dst_tensor_ptr = dst_tensor->mutable_data(place, dtype); auto *dst_tensor_ptr = dst_tensor->mutable_data(place, src_tensor.dtype());
gloo_wrapper->AllGatherVector<int64_t>(const_cast<int64_t *>(src_rows_ptr), gloo_wrapper->AllGatherVector<int64_t>(const_cast<int64_t *>(src_rows_ptr),
static_cast<int64_t *>(dst_rows_ptr), static_cast<int64_t *>(dst_rows_ptr),
rows_num_vector); rows_num_vector);
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <memory> #include <memory>
#include <utility> #include <utility>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/selected_rows_utils.h" #include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/layer.h"
...@@ -263,10 +264,11 @@ void TensorAdd(const VarType& src, VarType* dst) { ...@@ -263,10 +264,11 @@ void TensorAdd(const VarType& src, VarType* dst) {
"%zu and the number of elements of destination tensor is %zu.", "%zu and the number of elements of destination tensor is %zu.",
numel, dst_tensor->numel())); numel, dst_tensor->numel()));
auto data_type = src_tensor.type(); auto data_type = framework::TransToProtoVarType(src_tensor.dtype());
auto place = src_tensor.place(); auto place = src_tensor.place();
PADDLE_ENFORCE_EQ(dst_tensor->type(), data_type, PADDLE_ENFORCE_EQ(framework::TransToProtoVarType(dst_tensor->dtype()),
data_type,
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
"The data type of source tensor and destination tensor " "The data type of source tensor and destination tensor "
"should be equal, Otherwise, the calculation results " "should be equal, Otherwise, the calculation results "
...@@ -376,7 +378,8 @@ void SelectedRowsAddToTensor(const VarType& src, VarType* dst) { ...@@ -376,7 +378,8 @@ void SelectedRowsAddToTensor(const VarType& src, VarType* dst) {
const pten::SelectedRows& src_selected_rows = const pten::SelectedRows& src_selected_rows =
GetInnerTensor<pten::SelectedRows>(src); GetInnerTensor<pten::SelectedRows>(src);
auto place = dst_tensor->place(); auto place = dst_tensor->place();
auto data_type = src_selected_rows.value().type(); auto data_type =
framework::TransToProtoVarType(src_selected_rows.value().dtype());
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
#define PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(dev_ctx_type, cpp_type) \ #define PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(dev_ctx_type, cpp_type) \
...@@ -422,13 +425,14 @@ void SelectedRowsAddTensor(const VarType& src_selected_rows_var, ...@@ -422,13 +425,14 @@ void SelectedRowsAddTensor(const VarType& src_selected_rows_var,
const pten::DenseTensor& src_tensor = const pten::DenseTensor& src_tensor =
GetInnerTensor<pten::DenseTensor>(src_tensor_var); GetInnerTensor<pten::DenseTensor>(src_tensor_var);
const auto& place = src_tensor.place(); const auto& place = src_tensor.place();
auto data_type = src_tensor.type(); auto data_type = framework::TransToProtoVarType(src_tensor.dtype());
auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place); auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place);
pten::DenseTensor* dst_tensor = pten::DenseTensor* dst_tensor =
GetInnerMutableTensor<pten::DenseTensor>(dst_tensor_var); GetInnerMutableTensor<pten::DenseTensor>(dst_tensor_var);
dst_tensor->Resize(src_tensor.dims()); dst_tensor->Resize(src_tensor.dims());
dst_tensor->mutable_data(place, data_type); dst_tensor->mutable_data(place, src_tensor.dtype());
#define PADDLE_SELECTED_ROWS_ADD_TENSOR(dev_ctx_type, cpp_type) \ #define PADDLE_SELECTED_ROWS_ADD_TENSOR(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \ if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
paddle::operators::math::SelectedRowsAddTensor<dev_ctx_type, cpp_type> \ paddle::operators::math::SelectedRowsAddTensor<dev_ctx_type, cpp_type> \
...@@ -477,7 +481,8 @@ std::shared_ptr<VariableWrapper> SelectedRowsMerge( ...@@ -477,7 +481,8 @@ std::shared_ptr<VariableWrapper> SelectedRowsMerge(
auto& src_selected_rows1 = src1.Get<pten::SelectedRows>(); auto& src_selected_rows1 = src1.Get<pten::SelectedRows>();
auto& src_selected_rows2 = src2.Get<pten::SelectedRows>(); auto& src_selected_rows2 = src2.Get<pten::SelectedRows>();
auto place = src_selected_rows1.value().place(); auto place = src_selected_rows1.value().place();
auto data_type = src_selected_rows1.value().type(); auto data_type =
framework::TransToProtoVarType(src_selected_rows1.value().dtype());
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
std::vector<const pten::SelectedRows*> src_selected_rows; std::vector<const pten::SelectedRows*> src_selected_rows;
...@@ -702,12 +707,14 @@ void EagerGradientAccumulator::SumGrad(std::shared_ptr<VariableWrapper> var, ...@@ -702,12 +707,14 @@ void EagerGradientAccumulator::SumGrad(std::shared_ptr<VariableWrapper> var,
VLOG(6) << "Dims of " << dst_var->Name() << " is set as: " VLOG(6) << "Dims of " << dst_var->Name() << " is set as: "
<< var->Var().Get<framework::LoDTensor>().dims(); << var->Var().Get<framework::LoDTensor>().dims();
tensor->Resize(var->Var().Get<framework::LoDTensor>().dims()); tensor->Resize(var->Var().Get<framework::LoDTensor>().dims());
tensor->mutable_data(place, var->DataType()); tensor->mutable_data(place,
framework::TransToPtenDataType(var->DataType()));
pten::funcs::set_constant(*dev_ctx, tensor, 0.0); pten::funcs::set_constant(*dev_ctx, tensor, 0.0);
} else { } else {
auto* tensor = auto* tensor =
dst_var->MutableVar()->GetMutable<framework::LoDTensor>(); dst_var->MutableVar()->GetMutable<framework::LoDTensor>();
tensor->mutable_data(place, var->DataType()); tensor->mutable_data(place,
framework::TransToPtenDataType(var->DataType()));
pten::funcs::set_constant(*dev_ctx, tensor, 0.0); pten::funcs::set_constant(*dev_ctx, tensor, 0.0);
} }
} }
...@@ -834,12 +841,14 @@ void SortedGradientAccumulator::SumGrad(std::shared_ptr<VariableWrapper> var, ...@@ -834,12 +841,14 @@ void SortedGradientAccumulator::SumGrad(std::shared_ptr<VariableWrapper> var,
VLOG(6) << "Dims of " << dst_var->Name() << " is set as: " VLOG(6) << "Dims of " << dst_var->Name() << " is set as: "
<< var->Var().Get<framework::LoDTensor>().dims(); << var->Var().Get<framework::LoDTensor>().dims();
tensor->Resize(var->Var().Get<framework::LoDTensor>().dims()); tensor->Resize(var->Var().Get<framework::LoDTensor>().dims());
tensor->mutable_data(place, var->DataType()); tensor->mutable_data(place,
framework::TransToPtenDataType(var->DataType()));
pten::funcs::set_constant(*dev_ctx, tensor, 0.0); pten::funcs::set_constant(*dev_ctx, tensor, 0.0);
} else { } else {
auto* tensor = auto* tensor =
dst_var->MutableVar()->GetMutable<framework::LoDTensor>(); dst_var->MutableVar()->GetMutable<framework::LoDTensor>();
tensor->mutable_data(place, var->DataType()); tensor->mutable_data(place,
framework::TransToPtenDataType(var->DataType()));
pten::funcs::set_constant(*dev_ctx, tensor, 0.0); pten::funcs::set_constant(*dev_ctx, tensor, 0.0);
} }
} }
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/imperative/hccl_context.h" #include "paddle/fluid/imperative/hccl_context.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable.h"
...@@ -44,8 +45,9 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst, ...@@ -44,8 +45,9 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst,
void *src_ptr = const_cast<void *>(src.data()); void *src_ptr = const_cast<void *>(src.data());
dst->Resize(src.dims()); dst->Resize(src.dims());
void *dst_ptr = dst->mutable_data(src.place(), src.type()); void *dst_ptr = dst->mutable_data(src.place(), src.dtype());
HcclDataType hccl_dtype = platform::ToHCCLDataType(src.type()); HcclDataType hccl_dtype =
platform::ToHCCLDataType(framework::TransToProtoVarType(src.dtype()));
PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::HcclAllReduce( PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::HcclAllReduce(
src_ptr, dst_ptr, src.numel(), hccl_dtype, HCCL_REDUCE_SUM, comm->comm(), src_ptr, dst_ptr, src.numel(), hccl_dtype, HCCL_REDUCE_SUM, comm->comm(),
...@@ -169,7 +171,8 @@ void HCCLParallelContext::Broadcast(framework::Variable *src, int ring_id) { ...@@ -169,7 +171,8 @@ void HCCLParallelContext::Broadcast(framework::Variable *src, int ring_id) {
void *src_ptr = void *src_ptr =
reinterpret_cast<void *>(const_cast<void *>(src_tensor->data())); reinterpret_cast<void *>(const_cast<void *>(src_tensor->data()));
auto hccl_dtype = platform::ToHCCLDataType(src_tensor->type()); auto hccl_dtype = platform::ToHCCLDataType(
framework::TransToProtoVarType(src_tensor->dtype()));
PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::HcclBroadcast( PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::HcclBroadcast(
src_ptr, src_tensor->numel(), hccl_dtype, 0, comm->comm(), src_ptr, src_tensor->numel(), hccl_dtype, 0, comm->comm(),
reinterpret_cast<void *>(stream))); reinterpret_cast<void *>(stream)));
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/imperative/jit/program_desc_tracer.h" #include "paddle/fluid/imperative/jit/program_desc_tracer.h"
#include "paddle/fluid/framework/convert_utils.h"
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
...@@ -253,7 +254,7 @@ void ProgramDescTracer::InsertVarIfNotExist( ...@@ -253,7 +254,7 @@ void ProgramDescTracer::InsertVarIfNotExist(
new_var_desc->SetShape(framework::vectorize<int64_t>(tensor.dims())); new_var_desc->SetShape(framework::vectorize<int64_t>(tensor.dims()));
new_var_desc->SetLoDLevel(tensor.lod().size()); new_var_desc->SetLoDLevel(tensor.lod().size());
if (tensor.IsInitialized()) { if (tensor.IsInitialized()) {
new_var_desc->SetDataType(tensor.type()); new_var_desc->SetDataType(framework::TransToProtoVarType(tensor.dtype()));
} else { } else {
new_var_desc->SetDataType(framework::proto::VarType::FP32); new_var_desc->SetDataType(framework::proto::VarType::FP32);
} }
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
#include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/imperative/infer_var_type_context.h" #include "paddle/fluid/imperative/infer_var_type_context.h"
#include "paddle/fluid/imperative/op_base.h" #include "paddle/fluid/imperative/op_base.h"
...@@ -99,7 +101,9 @@ static std::string DebugString( ...@@ -99,7 +101,9 @@ static std::string DebugString(
auto& tensor = var.Get<framework::LoDTensor>(); auto& tensor = var.Get<framework::LoDTensor>();
ss << "LoDTensor<"; ss << "LoDTensor<";
if (tensor.IsInitialized()) { if (tensor.IsInitialized()) {
ss << framework::DataTypeToString(tensor.type()) << ", "; ss << framework::DataTypeToString(
framework::TransToProtoVarType(tensor.dtype()))
<< ", ";
ss << tensor.place() << ", "; ss << tensor.place() << ", ";
ss << "(" << tensor.dims() << ")"; ss << "(" << tensor.dims() << ")";
} else { } else {
...@@ -112,7 +116,9 @@ static std::string DebugString( ...@@ -112,7 +116,9 @@ static std::string DebugString(
auto& tensor = selected_rows.value(); auto& tensor = selected_rows.value();
auto& rows = selected_rows.rows(); auto& rows = selected_rows.rows();
if (tensor.IsInitialized()) { if (tensor.IsInitialized()) {
ss << framework::DataTypeToString(tensor.type()) << ", "; ss << framework::DataTypeToString(
framework::TransToProtoVarType(tensor.dtype()))
<< ", ";
ss << tensor.place() << ", "; ss << tensor.place() << ", ";
ss << "height(" << selected_rows.height() << "), rows("; ss << "height(" << selected_rows.height() << "), rows(";
std::for_each(rows.cbegin(), rows.cend(), std::for_each(rows.cbegin(), rows.cend(),
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "paddle/fluid/platform/dynload/nccl.h" #include "paddle/fluid/platform/dynload/nccl.h"
#endif #endif
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
...@@ -143,7 +144,8 @@ void NCCLParallelContext::Broadcast(framework::Variable *src, int ring_id) { ...@@ -143,7 +144,8 @@ void NCCLParallelContext::Broadcast(framework::Variable *src, int ring_id) {
gpuStream_t stream = comm->stream(); gpuStream_t stream = comm->stream();
void *src_ptr = src_tensor->data(); void *src_ptr = src_tensor->data();
auto nccl_dtype = platform::ToNCCLDataType(src_tensor->type()); auto nccl_dtype = platform::ToNCCLDataType(
framework::TransToProtoVarType(src_tensor->dtype()));
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast( PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast(
src_ptr, src_tensor->numel(), nccl_dtype, 0, comm->comm(), stream)); src_ptr, src_tensor->numel(), nccl_dtype, 0, comm->comm(), stream));
} }
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <unordered_set> #include <unordered_set>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/imperative/gradient_accumulator.h" #include "paddle/fluid/imperative/gradient_accumulator.h"
#include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/op_base.h" #include "paddle/fluid/imperative/op_base.h"
...@@ -312,9 +313,11 @@ static void FillConstantLike(const VariableWrapper &ref_var, ...@@ -312,9 +313,11 @@ static void FillConstantLike(const VariableWrapper &ref_var,
// we can't get data_type_ directly. We need to check if we can only use // we can't get data_type_ directly. We need to check if we can only use
// default data_type for now. // default data_type for now.
if (ref_var.ForwardDataType() != -1) { if (ref_var.ForwardDataType() != -1) {
dst_tensor->mutable_data(place, ref_var.ForwardDataType()); dst_tensor->mutable_data(
place, framework::TransToPtenDataType(ref_var.ForwardDataType()));
} else { } else {
dst_tensor->mutable_data(place, ref_var.DataType()); dst_tensor->mutable_data(
place, framework::TransToPtenDataType(ref_var.DataType()));
} }
pten::funcs::set_constant(*dev_ctx, dst_tensor, value); pten::funcs::set_constant(*dev_ctx, dst_tensor, value);
} }
...@@ -739,7 +742,8 @@ PartialGradTask::PartialGradTask( ...@@ -739,7 +742,8 @@ PartialGradTask::PartialGradTask(
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The %d-th grad_output's shape does not match the %d-th output", "The %d-th grad_output's shape does not match the %d-th output",
i, i)); i, i));
PADDLE_ENFORCE_EQ(grad_tensor.type(), out_tensor.type(), PADDLE_ENFORCE_EQ(framework::TransToProtoVarType(grad_tensor.dtype()),
framework::TransToProtoVarType(out_tensor.dtype()),
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The %d-th grad_output's data type does not " "The %d-th grad_output's data type does not "
"match the %d-th output", "match the %d-th output",
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/imperative/var_helper.h" #include "paddle/fluid/imperative/var_helper.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/selected_rows.h" #include "paddle/pten/core/selected_rows.h"
...@@ -425,7 +426,7 @@ void BuildDygraphPtenKernelContext( ...@@ -425,7 +426,7 @@ void BuildDygraphPtenKernelContext(
kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(std::string, attr)); kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(std::string, attr));
} else if (attr_defs[i].type_index == } else if (attr_defs[i].type_index ==
std::type_index(typeid(pten::DataType))) { std::type_index(typeid(pten::DataType))) {
auto data_type = pten::TransToPtenDataType( auto data_type = framework::TransToPtenDataType(
static_cast<framework::proto::VarType::Type>( static_cast<framework::proto::VarType::Type>(
BOOST_GET_CONST(int, attr))); BOOST_GET_CONST(int, attr)));
kernel_ctx->EmplaceBackAttr(data_type); kernel_ctx->EmplaceBackAttr(data_type);
......
...@@ -446,7 +446,7 @@ void Reducer::InitializeGroups( ...@@ -446,7 +446,7 @@ void Reducer::InitializeGroups(
InitializeDenseGroups(variable_indices_, &group); InitializeDenseGroups(variable_indices_, &group);
auto tensor = group.dense_contents_.GetMutable<framework::LoDTensor>(); auto tensor = group.dense_contents_.GetMutable<framework::LoDTensor>();
tensor->Resize(framework::make_ddim({group.all_length_})) tensor->Resize(framework::make_ddim({group.all_length_}))
.mutable_data(place_, group.dtype_); .mutable_data(place_, framework::TransToPtenDataType(group.dtype_));
} }
// map variables to this group by VariableLocator // map variables to this group by VariableLocator
...@@ -737,7 +737,8 @@ void Reducer::MarkVarReady(const size_t var_index, const bool is_used_var) { ...@@ -737,7 +737,8 @@ void Reducer::MarkVarReady(const size_t var_index, const bool is_used_var) {
// by avoiding tensor construction // by avoiding tensor construction
if (!group_tensor.IsInitialized()) { if (!group_tensor.IsInitialized()) {
group_tensor.Resize({static_cast<int64_t>(length)}); group_tensor.Resize({static_cast<int64_t>(length)});
group_tensor.mutable_data(place_, group.dtype_); group_tensor.mutable_data(place_,
framework::TransToPtenDataType(group.dtype_));
} }
#ifdef PADDLE_WITH_XPU_BKCL #ifdef PADDLE_WITH_XPU_BKCL
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <vector> #include <vector>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/imperative/gradient_accumulator.h" #include "paddle/fluid/imperative/gradient_accumulator.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
...@@ -224,8 +225,10 @@ static bool IsEqualVar(const framework::Variable& var1, ...@@ -224,8 +225,10 @@ static bool IsEqualVar(const framework::Variable& var1,
auto* t1_p = t1.data(); auto* t1_p = t1.data();
auto* t2_p = t2.data(); auto* t2_p = t2.data();
return std::memcmp(t1_p, t2_p, return std::memcmp(
t1.numel() * framework::SizeOfType(t1.type())) == 0; t1_p, t2_p,
t1.numel() * framework::SizeOfType(
framework::TransToProtoVarType(t1.dtype()))) == 0;
} }
template <typename T> template <typename T>
......
...@@ -86,7 +86,7 @@ void GroupConcatSplit(Place place, size_t size) { ...@@ -86,7 +86,7 @@ void GroupConcatSplit(Place place, size_t size) {
tmp.ShareDataWith(*tensor).Resize({static_cast<int64_t>(len)}); tmp.ShareDataWith(*tensor).Resize({static_cast<int64_t>(len)});
group.dense_tensors_.push_back(std::move(tmp)); group.dense_tensors_.push_back(std::move(tmp));
group.all_length_ += len; group.all_length_ += len;
group.dtype_ = tensor->type(); group.dtype_ = framework::TransToProtoVarType(tensor->dtype());
} }
paddle::platform::DeviceContextPool& pool = paddle::platform::DeviceContextPool& pool =
...@@ -96,7 +96,7 @@ void GroupConcatSplit(Place place, size_t size) { ...@@ -96,7 +96,7 @@ void GroupConcatSplit(Place place, size_t size) {
{ // concat { // concat
auto* tensor = group.dense_contents_.GetMutable<framework::LoDTensor>(); auto* tensor = group.dense_contents_.GetMutable<framework::LoDTensor>();
tensor->Resize(framework::make_ddim({group.all_length_})) tensor->Resize(framework::make_ddim({group.all_length_}))
.mutable_data(place, group.dtype_); .mutable_data(place, framework::TransToPtenDataType(group.dtype_));
group.ConcatTensors(*dev_ctx); group.ConcatTensors(*dev_ctx);
group.DivNRanks(*dev_ctx, 1); group.DivNRanks(*dev_ctx, 1);
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "paddle/fluid/imperative/var_helper.h" #include "paddle/fluid/imperative/var_helper.h"
#include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
...@@ -170,9 +171,11 @@ template <> ...@@ -170,9 +171,11 @@ template <>
framework::proto::VarType::Type GetDataType<egr::EagerTensor>( framework::proto::VarType::Type GetDataType<egr::EagerTensor>(
std::shared_ptr<egr::EagerTensor> var) { std::shared_ptr<egr::EagerTensor> var) {
if (var->Var().IsType<pten::SelectedRows>()) { if (var->Var().IsType<pten::SelectedRows>()) {
return var->Var().Get<pten::SelectedRows>().value().type(); return framework::TransToProtoVarType(
var->Var().Get<pten::SelectedRows>().value().type());
} else if (var->Var().IsType<framework::LoDTensor>()) { } else if (var->Var().IsType<framework::LoDTensor>()) {
return var->Var().Get<framework::LoDTensor>().type(); return framework::TransToProtoVarType(
var->Var().Get<framework::LoDTensor>().type());
} else { } else {
PADDLE_THROW(paddle::platform::errors::PermissionDenied( PADDLE_THROW(paddle::platform::errors::PermissionDenied(
"We only support pten::SelectedRows and framework::LoDTensor in " "We only support pten::SelectedRows and framework::LoDTensor in "
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <string> #include <string>
#include <utility> #include <utility>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/op_kernel_type.h" #include "paddle/fluid/framework/op_kernel_type.h"
#include "paddle/fluid/framework/string_array.h" #include "paddle/fluid/framework/string_array.h"
#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable.h"
...@@ -169,7 +170,7 @@ class VariableWrapper { ...@@ -169,7 +170,7 @@ class VariableWrapper {
} }
} }
if (tensor && tensor->IsInitialized()) { if (tensor && tensor->IsInitialized()) {
return tensor->type(); return framework::TransToProtoVarType(tensor->dtype());
} else { } else {
VLOG(6) << "The tensor of variable " << name_ << " is not initialized"; VLOG(6) << "The tensor of variable " << name_ << " is not initialized";
......
...@@ -551,7 +551,7 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs, ...@@ -551,7 +551,7 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
framework::FetchType &fetch_var = framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx); framework::GetFetchVariable(*scope, "fetch", idx);
auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var); auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var);
auto type = fetch.type(); auto type = framework::TransToProtoVarType(fetch.dtype());
auto output = &(outputs->at(i)); auto output = &(outputs->at(i));
output->name = fetches_[idx]->Input("X")[0]; output->name = fetches_[idx]->Input("X")[0];
if (type == framework::proto::VarType::FP32) { if (type == framework::proto::VarType::FP32) {
......
...@@ -327,7 +327,7 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs, ...@@ -327,7 +327,7 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
framework::FetchType &fetch_var = framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx); framework::GetFetchVariable(*scope, "fetch", idx);
auto fetch = BOOST_GET_CONST(framework::LoDTensor, fetch_var); auto fetch = BOOST_GET_CONST(framework::LoDTensor, fetch_var);
auto type = fetch.type(); auto type = framework::TransToProtoVarType(fetch.dtype());
auto output = &(outputs->at(i)); auto output = &(outputs->at(i));
output->name = fetchs_[idx]->Input("X")[0]; output->name = fetchs_[idx]->Input("X")[0];
if (type == framework::DataTypeTrait<float>::DataType()) { if (type == framework::DataTypeTrait<float>::DataType()) {
......
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include <thread> // NOLINT #include <thread> // NOLINT
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/inference/api/api_impl.h" #include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/tests/test_helper.h" #include "paddle/fluid/inference/tests/test_helper.h"
...@@ -36,13 +37,16 @@ namespace paddle { ...@@ -36,13 +37,16 @@ namespace paddle {
PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) { PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {
PaddleTensor pt; PaddleTensor pt;
if (t->type() == framework::proto::VarType::INT64) { if (framework::TransToProtoVarType(t->dtype()) ==
framework::proto::VarType::INT64) {
pt.data.Reset(t->data(), t->numel() * sizeof(int64_t)); pt.data.Reset(t->data(), t->numel() * sizeof(int64_t));
pt.dtype = PaddleDType::INT64; pt.dtype = PaddleDType::INT64;
} else if (t->type() == framework::proto::VarType::FP32) { } else if (framework::TransToProtoVarType(t->dtype()) ==
framework::proto::VarType::FP32) {
pt.data.Reset(t->data(), t->numel() * sizeof(float)); pt.data.Reset(t->data(), t->numel() * sizeof(float));
pt.dtype = PaddleDType::FLOAT32; pt.dtype = PaddleDType::FLOAT32;
} else if (t->type() == framework::proto::VarType::INT32) { } else if (framework::TransToProtoVarType(t->dtype()) ==
framework::proto::VarType::INT32) {
pt.data.Reset(t->data(), t->numel() * sizeof(int32_t)); pt.data.Reset(t->data(), t->numel() * sizeof(int32_t));
pt.dtype = PaddleDType::INT32; pt.dtype = PaddleDType::INT32;
} else { } else {
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
...@@ -122,7 +123,7 @@ T *Tensor::data(PlaceType *place, int *size) const { ...@@ -122,7 +123,7 @@ T *Tensor::data(PlaceType *place, int *size) const {
DataType Tensor::type() const { DataType Tensor::type() const {
EAGER_GET_TENSOR(paddle::framework::LoDTensor); EAGER_GET_TENSOR(paddle::framework::LoDTensor);
auto type = tensor->type(); auto type = paddle::framework::TransToProtoVarType(tensor->dtype());
if (type == paddle::framework::proto::VarType::FP32) { if (type == paddle::framework::proto::VarType::FP32) {
return DataType::FLOAT32; return DataType::FLOAT32;
} else if (type == paddle::framework::proto::VarType::FP16) { } else if (type == paddle::framework::proto::VarType::FP16) {
......
...@@ -172,7 +172,8 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config, ...@@ -172,7 +172,8 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
snprintf(output_i.name, output_names[i].length() + 1, "%s", snprintf(output_i.name, output_names[i].length() + 1, "%s",
output_names[i].c_str()); output_names[i].c_str());
auto output_t = predictor->GetOutputTensor(output_names[i]); auto output_t = predictor->GetOutputTensor(output_names[i]);
output_i.dtype = ConvertToPDDataType(output_t->type()); output_i.dtype =
ConvertToPDDataType(framework::TransToProtoVarType(output_t->dtype()));
std::vector<int> output_shape = output_t->shape(); std::vector<int> output_shape = output_t->shape();
output_i.shape = new int[output_shape.size()]; output_i.shape = new int[output_shape.size()];
memmove(output_i.shape, output_shape.data(), memmove(output_i.shape, output_shape.data(),
...@@ -256,7 +257,8 @@ void PD_SetZeroCopyInput(PD_Predictor* predictor, ...@@ -256,7 +257,8 @@ void PD_SetZeroCopyInput(PD_Predictor* predictor,
void PD_GetZeroCopyOutput(PD_Predictor* predictor, PD_ZeroCopyTensor* tensor) { void PD_GetZeroCopyOutput(PD_Predictor* predictor, PD_ZeroCopyTensor* tensor) {
auto output = predictor->predictor->GetOutputTensor(tensor->name); auto output = predictor->predictor->GetOutputTensor(tensor->name);
tensor->dtype = ConvertToPDDataType(output->type()); tensor->dtype =
ConvertToPDDataType(framework::TransToProtoVarType(output->dtype()));
auto shape = output->shape(); auto shape = output->shape();
size_t shape_size = shape.size(); size_t shape_size = shape.size();
if (tensor->shape.capacity < shape_size * sizeof(int)) { if (tensor->shape.capacity < shape_size * sizeof(int)) {
...@@ -271,7 +273,8 @@ void PD_GetZeroCopyOutput(PD_Predictor* predictor, PD_ZeroCopyTensor* tensor) { ...@@ -271,7 +273,8 @@ void PD_GetZeroCopyOutput(PD_Predictor* predictor, PD_ZeroCopyTensor* tensor) {
int n = int n =
std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()); std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
size_t length = n * paddle::PaddleDtypeSize(output->type()); size_t length = n * paddle::PaddleDtypeSize(
framework::TransToProtoVarType(output->dtype()));
if (tensor->data.capacity < length) { if (tensor->data.capacity < length) {
if (tensor->data.data) { if (tensor->data.data) {
std::free(tensor->data.data); std::free(tensor->data.data);
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <functional> #include <functional>
#include <map> #include <map>
#include <memory> #include <memory>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/inference/lite/engine.h" #include "paddle/fluid/inference/lite/engine.h"
#include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/allocation/allocator.h"
...@@ -185,9 +186,11 @@ void InitDstTensor(paddle::lite_api::Tensor* dst, ...@@ -185,9 +186,11 @@ void InitDstTensor(paddle::lite_api::Tensor* dst,
// the input tensor. // the input tensor.
constexpr int empty_size = 0; constexpr int empty_size = 0;
dst->Resize({empty_size}); dst->Resize({empty_size});
GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()), GetLiteTensorDataPtr(
GetLiteTargetType(src.place())); dst, GetLitePrecisionType(framework::TransToProtoVarType(src.dtype())),
dst->SetPrecision(GetLitePrecisionType(src.type())); GetLiteTargetType(src.place()));
dst->SetPrecision(
GetLitePrecisionType(framework::TransToProtoVarType(src.dtype())));
paddle::lite::LoD lite_lod; paddle::lite::LoD lite_lod;
SetLoD(&lite_lod, src.lod()); SetLoD(&lite_lod, src.lod());
dst->SetLoD(lite_lod); dst->SetLoD(lite_lod);
...@@ -195,8 +198,9 @@ void InitDstTensor(paddle::lite_api::Tensor* dst, ...@@ -195,8 +198,9 @@ void InitDstTensor(paddle::lite_api::Tensor* dst,
void InitDstTensor(framework::LoDTensor* dst, void InitDstTensor(framework::LoDTensor* dst,
const paddle::lite_api::Tensor& src) { const paddle::lite_api::Tensor& src) {
dst->mutable_data(inference::lite::utils::GetNativePlace(src.target()), dst->mutable_data(
GetNativePrecisionType(src.precision())); inference::lite::utils::GetNativePlace(src.target()),
framework::TransToPtenDataType(GetNativePrecisionType(src.precision())));
SetLoD(dst->mutable_lod(), src.lod()); SetLoD(dst->mutable_lod(), src.lod());
} }
...@@ -208,14 +212,16 @@ void TensorCopyAsync(paddle::lite_api::Tensor* dst, ...@@ -208,14 +212,16 @@ void TensorCopyAsync(paddle::lite_api::Tensor* dst,
const platform::Place& src_place = src.place(); const platform::Place& src_place = src.place();
const platform::Place& dst_place = GetNativePlace(dst->target()); const platform::Place& dst_place = GetNativePlace(dst->target());
const size_t bytes = const size_t bytes =
static_cast<size_t>(src.numel()) * framework::SizeOfType(src.type()); static_cast<size_t>(src.numel()) * framework::DataTypeSize(src.dtype());
dst->Resize(framework::vectorize(src.dims())); dst->Resize(framework::vectorize(src.dims()));
const void* src_data = src.data(); const void* src_data = src.data();
void* dst_data{nullptr}; void* dst_data{nullptr};
dst_data = GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()), dst_data = GetLiteTensorDataPtr(
GetLiteTargetType(src.place())); dst, GetLitePrecisionType(framework::TransToProtoVarType(src.dtype())),
GetLiteTargetType(src.place()));
VLOG(3) << "[CopyAsync fluid -> lite] Bytes = " << bytes << ", src = " << &src VLOG(3) << "[CopyAsync fluid -> lite] Bytes = " << bytes << ", src = " << &src
<< ", dst = " << dst << ", src_type = " << src.type(); << ", dst = " << dst
<< ", src_type = " << framework::TransToProtoVarType(src.dtype());
MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx); MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
VLOG(3) << "[Lite memory size] Bytes = " << bytes; VLOG(3) << "[Lite memory size] Bytes = " << bytes;
} }
...@@ -229,12 +235,13 @@ void TensorCopyAsync(framework::LoDTensor* dst, ...@@ -229,12 +235,13 @@ void TensorCopyAsync(framework::LoDTensor* dst,
const platform::Place& src_place = GetNativePlace(src.target()); const platform::Place& src_place = GetNativePlace(src.target());
const platform::Place& dst_place = dst->place(); const platform::Place& dst_place = dst->place();
int64_t src_numel = GetLiteTensorNumel(src); int64_t src_numel = GetLiteTensorNumel(src);
const size_t bytes = src_numel * framework::SizeOfType(dst->type()); const size_t bytes = src_numel * framework::DataTypeSize(dst->dtype());
const void* src_data = src.data<void>(); const void* src_data = src.data<void>();
// When Lite is ready, the source type needs to be modified here. // When Lite is ready, the source type needs to be modified here.
void* dst_data = dst->mutable_data(dst_place, dst->type()); void* dst_data = dst->mutable_data(dst_place, dst->dtype());
VLOG(3) << "[CopyAsync lite -> fluid] Bytes = " << bytes << ", src = " << &src VLOG(3) << "[CopyAsync lite -> fluid] Bytes = " << bytes << ", src = " << &src
<< ", dst = " << dst << ", src_type = " << dst->type(); << ", dst = " << dst
<< ", src_type = " << framework::TransToProtoVarType(dst->dtype());
MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx); MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
VLOG(3) << "[Lite memory size] Bytes = " << bytes; VLOG(3) << "[Lite memory size] Bytes = " << bytes;
} }
...@@ -244,7 +251,8 @@ void TensorDataShare(paddle::lite_api::Tensor* dst, framework::LoDTensor* src) { ...@@ -244,7 +251,8 @@ void TensorDataShare(paddle::lite_api::Tensor* dst, framework::LoDTensor* src) {
dst->Resize(framework::vectorize(src->dims())); dst->Resize(framework::vectorize(src->dims()));
dst->ShareExternalMemory(src->data(), src->memory_size(), dst->ShareExternalMemory(src->data(), src->memory_size(),
GetLiteTargetType(src->place())); GetLiteTargetType(src->place()));
dst->SetPrecision(GetLitePrecisionType(src->type())); dst->SetPrecision(
GetLitePrecisionType(framework::TransToProtoVarType(src->dtype())));
paddle::lite::LoD lite_lod; paddle::lite::LoD lite_lod;
SetLoD(&lite_lod, src->lod()); SetLoD(&lite_lod, src->lod());
dst->SetLoD(lite_lod); dst->SetLoD(lite_lod);
...@@ -261,7 +269,9 @@ void TensorDataShare(framework::LoDTensor* dst, paddle::lite_api::Tensor* src) { ...@@ -261,7 +269,9 @@ void TensorDataShare(framework::LoDTensor* dst, paddle::lite_api::Tensor* src) {
src_raw_data, memory_size, GetNativePlace(src->target()))); src_raw_data, memory_size, GetNativePlace(src->target())));
dst->Resize(paddle::framework::make_ddim(src->shape())); dst->Resize(paddle::framework::make_ddim(src->shape()));
SetLoD(dst->mutable_lod(), src->lod()); SetLoD(dst->mutable_lod(), src->lod());
dst->ResetHolderWithType(holder, GetNativePrecisionType(src->precision())); dst->ResetHolderWithType(
holder,
framework::TransToPtenDataType(GetNativePrecisionType(src->precision())));
} }
} // namespace utils } // namespace utils
......
...@@ -1020,7 +1020,7 @@ static bool CompareTensorData(const framework::LoDTensor &a, ...@@ -1020,7 +1020,7 @@ static bool CompareTensorData(const framework::LoDTensor &a,
} }
for (size_t i = 0; i < a_size; i++) { for (size_t i = 0; i < a_size; i++) {
if (a.type() == VarType::FP32) { if (framework::TransToProtoVarType(a.dtype()) == VarType::FP32) {
const auto *a_data = a.data<float>(); const auto *a_data = a.data<float>();
const auto *b_data = b.data<float>(); const auto *b_data = b.data<float>();
if (std::abs(a_data[i] - b_data[i]) > 1e-3) { if (std::abs(a_data[i] - b_data[i]) > 1e-3) {
...@@ -1029,7 +1029,7 @@ static bool CompareTensorData(const framework::LoDTensor &a, ...@@ -1029,7 +1029,7 @@ static bool CompareTensorData(const framework::LoDTensor &a,
b_data[i]); b_data[i]);
return false; return false;
} }
} else if (a.type() == VarType::INT64) { } else if (framework::TransToProtoVarType(a.dtype()) == VarType::INT64) {
const auto *a_data = a.data<int64_t>(); const auto *a_data = a.data<int64_t>();
const auto *b_data = b.data<int64_t>(); const auto *b_data = b.data<int64_t>();
if (std::abs(a_data[i] - b_data[i]) > 1e-3) { if (std::abs(a_data[i] - b_data[i]) > 1e-3) {
......
...@@ -140,8 +140,9 @@ class AbsDoubleGradOp : public framework::OperatorWithKernel { ...@@ -140,8 +140,9 @@ class AbsDoubleGradOp : public framework::OperatorWithKernel {
framework::OpKernelType GetKernelTypeForVar( framework::OpKernelType GetKernelTypeForVar(
const std::string& var_name, const framework::Tensor& tensor, const std::string& var_name, const framework::Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const { const framework::OpKernelType& expected_kernel_type) const {
return framework::OpKernelType(tensor.type(), tensor.place(), return framework::OpKernelType(
tensor.layout()); framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
tensor.layout());
} }
}; };
......
...@@ -38,10 +38,12 @@ class ActivationMLUKernel : public framework::OpKernel<T> { ...@@ -38,10 +38,12 @@ class ActivationMLUKernel : public framework::OpKernel<T> {
output->mutable_data<T>(ctx.GetPlace()); output->mutable_data<T>(ctx.GetPlace());
MLUCnnlActivationDesc act_desc(act_mode, alpha); MLUCnnlActivationDesc act_desc(act_mode, alpha);
MLUCnnlTensorDesc input_desc(*input, CNNL_LAYOUT_ARRAY, MLUCnnlTensorDesc input_desc(
ToCnnlDataType(input->type())); *input, CNNL_LAYOUT_ARRAY,
MLUCnnlTensorDesc output_desc(*output, CNNL_LAYOUT_ARRAY, ToCnnlDataType(framework::TransToProtoVarType(input->dtype())));
ToCnnlDataType(output->type())); MLUCnnlTensorDesc output_desc(
*output, CNNL_LAYOUT_ARRAY,
ToCnnlDataType(framework::TransToProtoVarType(output->dtype())));
MLUCnnl::Active(ctx, act_desc.get(), input_desc.get(), MLUCnnl::Active(ctx, act_desc.get(), input_desc.get(),
reinterpret_cast<const void*>(input->data<T>()), reinterpret_cast<const void*>(input->data<T>()),
...@@ -61,12 +63,15 @@ class ActivationGradMLUKernel : public framework::OpKernel<T> { ...@@ -61,12 +63,15 @@ class ActivationGradMLUKernel : public framework::OpKernel<T> {
dx->mutable_data<T>(ctx.GetPlace()); dx->mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc dout_desc(*dout, CNNL_LAYOUT_ARRAY, MLUCnnlTensorDesc dout_desc(
ToCnnlDataType(dout->type())); *dout, CNNL_LAYOUT_ARRAY,
MLUCnnlTensorDesc out_desc(*out, CNNL_LAYOUT_ARRAY, ToCnnlDataType(framework::TransToProtoVarType(dout->dtype())));
ToCnnlDataType(out->type())); MLUCnnlTensorDesc out_desc(
MLUCnnlTensorDesc dx_desc(*dx, CNNL_LAYOUT_ARRAY, *out, CNNL_LAYOUT_ARRAY,
ToCnnlDataType(dx->type())); ToCnnlDataType(framework::TransToProtoVarType(out->dtype())));
MLUCnnlTensorDesc dx_desc(
*dx, CNNL_LAYOUT_ARRAY,
ToCnnlDataType(framework::TransToProtoVarType(dx->dtype())));
MLUCnnlActivationDesc act_desc(act_mode, alpha); MLUCnnlActivationDesc act_desc(act_mode, alpha);
MLUCnnl::ActiveGrad( MLUCnnl::ActiveGrad(
ctx, act_desc.get(), nullptr, nullptr, nullptr, nullptr, ctx, act_desc.get(), nullptr, nullptr, nullptr, nullptr,
......
...@@ -76,13 +76,13 @@ class PowGradNPUKernel : public framework::OpKernel<T> { ...@@ -76,13 +76,13 @@ class PowGradNPUKernel : public framework::OpKernel<T> {
// Step 2: Construct a broadcast factor, which has the same shape with x. // Step 2: Construct a broadcast factor, which has the same shape with x.
// 2.1 Get a factor tensor with shape [1]. // 2.1 Get a factor tensor with shape [1].
Tensor factor_tensor(framework::proto::VarType::FP32); Tensor factor_tensor(experimental::DataType::FLOAT32);
factor_tensor.mutable_data<float>({1}, place); factor_tensor.mutable_data<float>({1}, place);
FillNpuTensorWithConstant<float>(&factor_tensor, factor); FillNpuTensorWithConstant<float>(&factor_tensor, factor);
// 2.2 Get the factor which has the shape with x and the same value with // 2.2 Get the factor which has the shape with x and the same value with
// factor. // factor.
Tensor factor_bc_tensor(framework::proto::VarType::FP32); Tensor factor_bc_tensor(experimental::DataType::FLOAT32);
factor_bc_tensor.mutable_data<float>(x_dims, place); factor_bc_tensor.mutable_data<float>(x_dims, place);
const auto& runner_bc = const auto& runner_bc =
NpuOpRunner("FillD", {factor_tensor}, {factor_bc_tensor}, NpuOpRunner("FillD", {factor_tensor}, {factor_bc_tensor},
...@@ -659,14 +659,15 @@ class HardSwishGradNPUKernel : public framework::OpKernel<T> { ...@@ -659,14 +659,15 @@ class HardSwishGradNPUKernel : public framework::OpKernel<T> {
{{"dims", framework::vectorize(x->dims())}}); {{"dims", framework::vectorize(x->dims())}});
runner_fill.Run(stream); runner_fill.Run(stream);
Tensor tmp_bool(framework::proto::VarType::BOOL); Tensor tmp_bool(experimental::DataType::BOOL);
tmp_bool.mutable_data<bool>(x->dims(), place); tmp_bool.mutable_data<bool>(x->dims(), place);
const auto& runner_less = const auto& runner_less =
NpuOpRunner("Less", {add_offset_val, tensor_threshold}, {tmp_bool}); NpuOpRunner("Less", {add_offset_val, tensor_threshold}, {tmp_bool});
runner_less.Run(stream); runner_less.Run(stream);
Tensor tmp4(x->type()); Tensor tmp4(x->type());
tmp4.mutable_data<T>(x->dims(), place); tmp4.mutable_data<T>(x->dims(), place);
auto dst_dtype = ConvertToNpuDtype(x->type()); auto dst_dtype =
ConvertToNpuDtype(framework::TransToProtoVarType(x->type()));
const auto& runner_cast = const auto& runner_cast =
NpuOpRunner("Cast", {tmp_bool}, {tmp4}, NpuOpRunner("Cast", {tmp_bool}, {tmp4},
{{"dst_type", static_cast<int>(dst_dtype)}}); {{"dst_type", static_cast<int>(dst_dtype)}});
......
...@@ -59,10 +59,13 @@ class AllcloseKernel : public framework::OpKernel<T> { ...@@ -59,10 +59,13 @@ class AllcloseKernel : public framework::OpKernel<T> {
rtol->numel(), 1, rtol->numel(), 1,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Input(Rtol) size must be 1, but get %d.", rtol->numel())); "Input(Rtol) size must be 1, but get %d.", rtol->numel()));
PADDLE_ENFORCE_EQ(rtol->type(), framework::proto::VarType::FP64, PADDLE_ENFORCE_EQ(
platform::errors::InvalidArgument( framework::TransToProtoVarType(rtol->dtype()),
"Input(Rtol) type must be double, but get %s.", framework::proto::VarType::FP64,
framework::DataTypeToString(rtol->type()))); platform::errors::InvalidArgument(
"Input(Rtol) type must be double, but get %s.",
framework::DataTypeToString(
framework::TransToProtoVarType(rtol->dtype()))));
rtol_v = get_tensor_value(dev_ctx, *rtol); rtol_v = get_tensor_value(dev_ctx, *rtol);
} }
if (ctx.HasInput("Atol")) { if (ctx.HasInput("Atol")) {
...@@ -71,10 +74,13 @@ class AllcloseKernel : public framework::OpKernel<T> { ...@@ -71,10 +74,13 @@ class AllcloseKernel : public framework::OpKernel<T> {
atol->numel(), 1, atol->numel(), 1,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Input(Atol) size must be 1, but get %d", atol->numel())); "Input(Atol) size must be 1, but get %d", atol->numel()));
PADDLE_ENFORCE_EQ(atol->type(), framework::proto::VarType::FP64, PADDLE_ENFORCE_EQ(
platform::errors::InvalidArgument( framework::TransToProtoVarType(atol->dtype()),
"Input(Atol) type must be double, but get %s", framework::proto::VarType::FP64,
framework::DataTypeToString(atol->type()))); platform::errors::InvalidArgument(
"Input(Atol) type must be double, but get %s",
framework::DataTypeToString(
framework::TransToProtoVarType(atol->dtype()))));
atol_v = get_tensor_value(dev_ctx, *atol); atol_v = get_tensor_value(dev_ctx, *atol);
} }
......
...@@ -42,7 +42,7 @@ void Update(const platform::NPUDeviceContext& ctx, ...@@ -42,7 +42,7 @@ void Update(const platform::NPUDeviceContext& ctx,
platform::NPUMemsetAsync(static_cast<void*>(g), 0, platform::NPUMemsetAsync(static_cast<void*>(g), 0,
good_out_tensor->numel() * sizeof(int), stream); good_out_tensor->numel() * sizeof(int), stream);
// bad_out_data = bad_in_data + 1 // bad_out_data = bad_in_data + 1
Tensor factor_tensor(bad_out_tensor->type()); Tensor factor_tensor(bad_out_tensor->dtype());
factor_tensor.mutable_data<int>({1}, place); factor_tensor.mutable_data<int>({1}, place);
FillNpuTensorWithConstant<int>(&factor_tensor, static_cast<int>(1)); FillNpuTensorWithConstant<int>(&factor_tensor, static_cast<int>(1));
const auto& runner_p2 = NpuOpRunner("Add", {*bad_in_tensor, factor_tensor}, const auto& runner_p2 = NpuOpRunner("Add", {*bad_in_tensor, factor_tensor},
...@@ -91,7 +91,7 @@ void Update(const platform::NPUDeviceContext& ctx, ...@@ -91,7 +91,7 @@ void Update(const platform::NPUDeviceContext& ctx,
bad_out_tensor->numel() * sizeof(int), stream); bad_out_tensor->numel() * sizeof(int), stream);
// good_out_data = good_in_data + 1 // good_out_data = good_in_data + 1
Tensor factor_tensor(good_out_tensor->type()); Tensor factor_tensor(good_out_tensor->dtype());
factor_tensor.mutable_data<int>({1}, place); factor_tensor.mutable_data<int>({1}, place);
FillNpuTensorWithConstant<int>(&factor_tensor, static_cast<int>(1)); FillNpuTensorWithConstant<int>(&factor_tensor, static_cast<int>(1));
const auto& runner_p2 = NpuOpRunner("Add", {*good_in_tensor, factor_tensor}, const auto& runner_p2 = NpuOpRunner("Add", {*good_in_tensor, factor_tensor},
...@@ -188,7 +188,7 @@ class LazyZerosNPU { ...@@ -188,7 +188,7 @@ class LazyZerosNPU {
if (!found_inf_vec[0]) { if (!found_inf_vec[0]) {
framework::TensorCopy(*x, place, dev_ctx, out); framework::TensorCopy(*x, place, dev_ctx, out);
} else if (zero_ptr != dst_ptr) { } else if (zero_ptr != dst_ptr) {
auto size = out->numel() * framework::SizeOfType(out->type()); auto size = out->numel() * framework::DataTypeSize(out->dtype());
memory::Copy(place, dst_ptr, place, zero_ptr, size, stream); memory::Copy(place, dst_ptr, place, zero_ptr, size, stream);
} }
} }
......
...@@ -75,15 +75,16 @@ class ArgsortNPUKernel : public framework::OpKernel<T> { ...@@ -75,15 +75,16 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
framework::NPUAttributeMap attr = {{"axis", -1}, framework::NPUAttributeMap attr = {{"axis", -1},
{"descending", descending}}; {"descending", descending}};
Tensor indices_tmp(framework::proto::VarType::INT32); Tensor indices_tmp(experimental::DataType::INT32);
indices_tmp.Resize(indices->dims()); indices_tmp.Resize(indices->dims());
if (input->type() == framework::proto::VarType::INT64) { if (framework::TransToProtoVarType(input->dtype()) ==
Tensor input_fp32(framework::proto::VarType::FP32); framework::proto::VarType::INT64) {
Tensor input_fp32(experimental::DataType::FLOAT32);
input_fp32.Resize(input->dims()); input_fp32.Resize(input->dims());
CastToFP32(ctx, stream, *input, &input_fp32); CastToFP32(ctx, stream, *input, &input_fp32);
Tensor output_fp32(framework::proto::VarType::FP32); Tensor output_fp32(experimental::DataType::FLOAT32);
output_fp32.Resize(output->dims()); output_fp32.Resize(output->dims());
if (axis == -1 || axis + 1 == in_dims.size()) { if (axis == -1 || axis + 1 == in_dims.size()) {
...@@ -112,7 +113,7 @@ class ArgsortNPUKernel : public framework::OpKernel<T> { ...@@ -112,7 +113,7 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
TranposeNPU<float>(ctx, stream, &perm, input_fp32, &trans_input); TranposeNPU<float>(ctx, stream, &perm, input_fp32, &trans_input);
Tensor trans_output(input_fp32.type()); Tensor trans_output(input_fp32.type());
Tensor trans_indices(framework::proto::VarType::INT32); Tensor trans_indices(experimental::DataType::INT32);
trans_output.mutable_data<float>(trans_dims, ctx.GetPlace()); trans_output.mutable_data<float>(trans_dims, ctx.GetPlace());
trans_indices.mutable_data<int32_t>(trans_dims, ctx.GetPlace()); trans_indices.mutable_data<int32_t>(trans_dims, ctx.GetPlace());
...@@ -150,7 +151,7 @@ class ArgsortNPUKernel : public framework::OpKernel<T> { ...@@ -150,7 +151,7 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
TranposeNPU<T>(ctx, stream, &perm, *input, &trans_input); TranposeNPU<T>(ctx, stream, &perm, *input, &trans_input);
Tensor trans_output(input->type()); Tensor trans_output(input->type());
Tensor trans_indices(framework::proto::VarType::INT32); Tensor trans_indices(experimental::DataType::INT32);
trans_output.mutable_data<T>(trans_dims, ctx.GetPlace()); trans_output.mutable_data<T>(trans_dims, ctx.GetPlace());
trans_indices.mutable_data<int32_t>(trans_dims, ctx.GetPlace()); trans_indices.mutable_data<int32_t>(trans_dims, ctx.GetPlace());
......
...@@ -66,7 +66,8 @@ struct ArrayToLoDFunctor : public boost::static_visitor<void> { ...@@ -66,7 +66,8 @@ struct ArrayToLoDFunctor : public boost::static_visitor<void> {
ArrayToLoDFunctorImpl<DeviceContext> functor; ArrayToLoDFunctorImpl<DeviceContext> functor;
functor.dev_ctx_ = dev_ctx; functor.dev_ctx_ = dev_ctx;
functor.prev_functor_ = this; functor.prev_functor_ = this;
framework::VisitDataType(out->type(), functor); framework::VisitDataType(framework::TransToProtoVarType(out->dtype()),
functor);
} }
}; };
...@@ -101,7 +102,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { ...@@ -101,7 +102,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
"There's no element in the input array.")); "There's no element in the input array."));
int rank = x[0].dims().size(); int rank = x[0].dims().size();
platform::Place place = x[0].place(); platform::Place place = x[0].place();
auto data_type = x[0].type(); auto data_type = x[0].dtype();
int64_t batch_size = x[0].dims()[0]; int64_t batch_size = x[0].dims()[0];
framework::DDim ins_dims = rank > 1 framework::DDim ins_dims = rank > 1
? framework::slice_ddim(x[0].dims(), 1, rank) ? framework::slice_ddim(x[0].dims(), 1, rank)
...@@ -124,12 +125,12 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { ...@@ -124,12 +125,12 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
"The current place is %d, and the previous place is %d.", "The current place is %d, and the previous place is %d.",
i, x[i].place(), place)); i, x[i].place(), place));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
x[i].type(), data_type, x[i].dtype(), data_type,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The date type of the %zu'th element in LoDTensorArray " "The date type of the %zu'th element in LoDTensorArray "
"differs from previous ones." "differs from previous ones."
"The current data type is %d, and the previous data type is %d.", "The current data type is %d, and the previous data type is %d.",
i, x[i].type(), data_type)); i, x[i].dtype(), data_type));
batch_size += x[i].dims()[0]; batch_size += x[i].dims()[0];
} }
auto ins_dim_vec = framework::vectorize(ins_dims); auto ins_dim_vec = framework::vectorize(ins_dims);
......
...@@ -151,15 +151,19 @@ framework::OpKernelType BatchNormOp::GetExpectedKernelType( ...@@ -151,15 +151,19 @@ framework::OpKernelType BatchNormOp::GetExpectedKernelType(
bn_param_type = framework::proto::VarType::FP64; bn_param_type = framework::proto::VarType::FP64;
} }
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
bn_param_type, ctx.Input<Tensor>("Scale")->type(), bn_param_type,
framework::TransToProtoVarType(ctx.Input<Tensor>("Scale")->dtype()),
platform::errors::InvalidArgument("Scale input should be of float type")); platform::errors::InvalidArgument("Scale input should be of float type"));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
bn_param_type, ctx.Input<Tensor>("Bias")->type(), bn_param_type,
framework::TransToProtoVarType(ctx.Input<Tensor>("Bias")->dtype()),
platform::errors::InvalidArgument("Bias input should be of float type")); platform::errors::InvalidArgument("Bias input should be of float type"));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
bn_param_type, ctx.Input<Tensor>("Mean")->type(), bn_param_type,
framework::TransToProtoVarType(ctx.Input<Tensor>("Mean")->dtype()),
platform::errors::InvalidArgument("Mean input should be of float type")); platform::errors::InvalidArgument("Mean input should be of float type"));
PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input<Tensor>("Variance")->type(), PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType(
ctx.Input<Tensor>("Variance")->dtype()),
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Variance input should be of float type")); "Variance input should be of float type"));
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include <string> #include <string>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/operators/beam_search_decode_op.h" #include "paddle/fluid/operators/beam_search_decode_op.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
...@@ -192,7 +193,7 @@ class BeamSearchDecodeOp : public framework::OperatorBase { ...@@ -192,7 +193,7 @@ class BeamSearchDecodeOp : public framework::OperatorBase {
LoDTensor* sentenceScores = ctx.Output<LoDTensor>("SentenceScores"); LoDTensor* sentenceScores = ctx.Output<LoDTensor>("SentenceScores");
framework::VisitDataType( framework::VisitDataType(
scores->at(0).type(), framework::TransToProtoVarType(scores->at(0).dtype()),
BeamSearchDecodeFunctor(*ids, *scores, sentenceIds, sentenceScores, BeamSearchDecodeFunctor(*ids, *scores, sentenceIds, sentenceScores,
beam_size, end_id)); beam_size, end_id));
} }
......
...@@ -112,7 +112,7 @@ void BincountCUDAInner(const framework::ExecutionContext& context) { ...@@ -112,7 +112,7 @@ void BincountCUDAInner(const framework::ExecutionContext& context) {
PADDLE_CUDA_NUM_THREADS, 0, stream>>>( PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
input_data, input_numel, has_weights, weights_data, output_data); input_data, input_numel, has_weights, weights_data, output_data);
} else { } else {
const auto& weights_type = weights->type(); const auto& weights_type = framework::TransToProtoVarType(weights->dtype());
if (weights_type == framework::proto::VarType::FP32) { if (weights_type == framework::proto::VarType::FP32) {
float* output_data = output->mutable_data<float>(context.GetPlace()); float* output_data = output->mutable_data<float>(context.GetPlace());
...@@ -141,7 +141,7 @@ class BincountCUDAKernel : public framework::OpKernel<T> { ...@@ -141,7 +141,7 @@ class BincountCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<framework::Tensor>("X"); const Tensor* input = context.Input<framework::Tensor>("X");
const auto& input_type = input->type(); const auto& input_type = framework::TransToProtoVarType(input->dtype());
if (input_type == framework::proto::VarType::INT32) { if (input_type == framework::proto::VarType::INT32) {
BincountCUDAInner<DeviceContext, T, int>(context); BincountCUDAInner<DeviceContext, T, int>(context);
......
...@@ -61,7 +61,7 @@ void BincountInner(const framework::ExecutionContext& context) { ...@@ -61,7 +61,7 @@ void BincountInner(const framework::ExecutionContext& context) {
if (has_weights) { if (has_weights) {
const T* weights_data = weights->data<T>(); const T* weights_data = weights->data<T>();
const auto& weights_type = weights->type(); const auto& weights_type = framework::TransToProtoVarType(weights->dtype());
if (weights_type == framework::proto::VarType::FP32) { if (weights_type == framework::proto::VarType::FP32) {
float* output_data = output->mutable_data<float>(context.GetPlace()); float* output_data = output->mutable_data<float>(context.GetPlace());
pten::funcs::SetConstant<DeviceContext, float>()( pten::funcs::SetConstant<DeviceContext, float>()(
...@@ -95,7 +95,7 @@ class BincountKernel : public framework::OpKernel<T> { ...@@ -95,7 +95,7 @@ class BincountKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<framework::Tensor>("X"); const Tensor* input = context.Input<framework::Tensor>("X");
const auto& input_type = input->type(); const auto& input_type = framework::TransToProtoVarType(input->dtype());
if (input_type == framework::proto::VarType::INT32) { if (input_type == framework::proto::VarType::INT32) {
BincountInner<DeviceContext, T, int>(context); BincountInner<DeviceContext, T, int>(context);
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/cast_op.h" #include "paddle/fluid/operators/cast_op.h"
#include <memory> #include <memory>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
#ifdef PADDLE_WITH_MLU #ifdef PADDLE_WITH_MLU
...@@ -82,7 +83,9 @@ class CastOp : public framework::OperatorWithKernel { ...@@ -82,7 +83,9 @@ class CastOp : public framework::OperatorWithKernel {
auto &tensor_place = tensor->place(); auto &tensor_place = tensor->place();
// NOTE: cuda pinned tensor need to copy its data to target place // NOTE: cuda pinned tensor need to copy its data to target place
if (platform::is_cuda_pinned_place(tensor_place)) { if (platform::is_cuda_pinned_place(tensor_place)) {
return framework::OpKernelType(tensor->type(), ctx.device_context()); return framework::OpKernelType(
framework::TransToProtoVarType(tensor->dtype()),
ctx.device_context());
} }
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
...@@ -100,26 +103,32 @@ class CastOp : public framework::OperatorWithKernel { ...@@ -100,26 +103,32 @@ class CastOp : public framework::OperatorWithKernel {
return true; return true;
}; };
if (this->CanMKLDNNBeUsed(ctx, tensor->type()) && MKLDNNSupportsCast()) { if (this->CanMKLDNNBeUsed(
return framework::OpKernelType(tensor->type(), ctx.GetPlace(), ctx, framework::TransToProtoVarType(tensor->dtype())) &&
framework::DataLayout::kMKLDNN, MKLDNNSupportsCast()) {
framework::LibraryType::kMKLDNN); return framework::OpKernelType(
framework::TransToProtoVarType(tensor->dtype()), ctx.GetPlace(),
framework::DataLayout::kMKLDNN, framework::LibraryType::kMKLDNN);
} }
#endif #endif
#ifdef PADDLE_WITH_MLU #ifdef PADDLE_WITH_MLU
auto src_type = static_cast<VT::Type>(ctx.Attr<int>("in_dtype")); auto src_type = static_cast<VT::Type>(ctx.Attr<int>("in_dtype"));
auto dst_type = static_cast<VT::Type>(ctx.Attr<int>("out_dtype")); auto dst_type = static_cast<VT::Type>(ctx.Attr<int>("out_dtype"));
if (src_type == dst_type || MLUSupportsCast(src_type, dst_type)) { if (src_type == dst_type || MLUSupportsCast(src_type, dst_type)) {
return framework::OpKernelType(tensor->type(), tensor_place); return framework::OpKernelType(
framework::TransToProtoVarType(tensor->dtype()), tensor_place);
} else { } else {
VLOG(3) << "MLU not support cast type: " VLOG(3) << "MLU not support cast type: "
<< framework::DataTypeToString(src_type) << framework::DataTypeToString(src_type)
<< " to type: " << framework::DataTypeToString(dst_type) << " to type: " << framework::DataTypeToString(dst_type)
<< ", fallbacking to CPU one!"; << ", fallbacking to CPU one!";
return framework::OpKernelType(tensor->type(), platform::CPUPlace()); return framework::OpKernelType(
framework::TransToProtoVarType(tensor->dtype()),
platform::CPUPlace());
} }
#endif #endif
return framework::OpKernelType(tensor->type(), tensor_place); return framework::OpKernelType(
framework::TransToProtoVarType(tensor->dtype()), tensor_place);
} }
}; };
......
...@@ -63,7 +63,7 @@ class CastOpKernel : public framework::OpKernel<InT> { ...@@ -63,7 +63,7 @@ class CastOpKernel : public framework::OpKernel<InT> {
out->mutable_data(dev_ctx.GetPlace(), out->mutable_data(dev_ctx.GetPlace(),
static_cast<framework::proto::VarType::Type>(out_dtype)); static_cast<framework::proto::VarType::Type>(out_dtype));
auto pt_out_dtype = pten::TransToPtenDataType( auto pt_out_dtype = framework::TransToPtenDataType(
static_cast<framework::proto::VarType::Type>(out_dtype)); static_cast<framework::proto::VarType::Type>(out_dtype));
// call new kernel // call new kernel
......
...@@ -43,7 +43,7 @@ class CastNPUKernel : public framework::OpKernel<T> { ...@@ -43,7 +43,7 @@ class CastNPUKernel : public framework::OpKernel<T> {
auto* out = ctx.Output<Tensor>("Out"); auto* out = ctx.Output<Tensor>("Out");
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
if (x->type() == dtype) { if (framework::TransToProtoVarType(x->dtype()) == dtype) {
// NOTE(zhiqiu): NPU cast op may result in wrong value, so // NOTE(zhiqiu): NPU cast op may result in wrong value, so
// add special case here. // add special case here.
VLOG(4) << "cast to same dtype:" << dtype; VLOG(4) << "cast to same dtype:" << dtype;
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include <memory> #include <memory>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/cast_op.h" #include "paddle/fluid/operators/cast_op.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
...@@ -45,7 +46,7 @@ class CastXPUKernel : public framework::OpKernel<InT> { ...@@ -45,7 +46,7 @@ class CastXPUKernel : public framework::OpKernel<InT> {
out->mutable_data(dev_ctx.GetPlace(), out->mutable_data(dev_ctx.GetPlace(),
static_cast<framework::proto::VarType::Type>(out_dtype)); static_cast<framework::proto::VarType::Type>(out_dtype));
auto pt_out_dtype = pten::TransToPtenDataType( auto pt_out_dtype = framework::TransToPtenDataType(
static_cast<framework::proto::VarType::Type>(out_dtype)); static_cast<framework::proto::VarType::Type>(out_dtype));
// call pten kernel // call pten kernel
pten::CastKernel<InT>( pten::CastKernel<InT>(
......
...@@ -338,8 +338,9 @@ class ClassCenterSampleCUDAKernel : public framework::OpKernel<T> { ...@@ -338,8 +338,9 @@ class ClassCenterSampleCUDAKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce( PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
num_classes_per_device_ptr, num_classes_per_device_ptr, num_classes_per_device_ptr, num_classes_per_device_ptr,
num_classes_per_device.numel(), num_classes_per_device.numel(),
platform::ToNCCLDataType(num_classes_per_device.type()), ncclSum, platform::ToNCCLDataType(
comm->comm(), calcu_stream)); framework::TransToProtoVarType(num_classes_per_device.dtype())),
ncclSum, comm->comm(), calcu_stream));
} }
#endif #endif
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#ifdef PADDLE_WITH_ASCEND_CL #ifdef PADDLE_WITH_ASCEND_CL
#include "paddle/fluid/platform/device/npu/npu_op_runner.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#endif #endif
#include "paddle/fluid/framework/convert_utils.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -53,7 +54,7 @@ struct FillConstantVisitor { ...@@ -53,7 +54,7 @@ struct FillConstantVisitor {
* = nullptr) const { * = nullptr) const {
#ifdef PADDLE_WITH_ASCEND_CL #ifdef PADDLE_WITH_ASCEND_CL
if (platform::is_npu_place(dev_ctx_.GetPlace())) { if (platform::is_npu_place(dev_ctx_.GetPlace())) {
Tensor tensor_tmp(dtype_); Tensor tensor_tmp(framework::TransToPtenDataType(dtype_));
tensor_tmp.mutable_data<T>({1}, context_.GetPlace()); tensor_tmp.mutable_data<T>({1}, context_.GetPlace());
FillNpuTensorWithConstant<T>(&tensor_tmp, static_cast<T>(value_)); FillNpuTensorWithConstant<T>(&tensor_tmp, static_cast<T>(value_));
...@@ -193,7 +194,8 @@ class CoalesceTensorOpKernel : public framework::OpKernel<T> { ...@@ -193,7 +194,8 @@ class CoalesceTensorOpKernel : public framework::OpKernel<T> {
void *fused_tensor_ptr = void *fused_tensor_ptr =
fused_tensor fused_tensor
->Resize(framework::make_ddim({static_cast<int64_t>(numel)})) ->Resize(framework::make_ddim({static_cast<int64_t>(numel)}))
.mutable_data(context.GetPlace(), dtype); .mutable_data(context.GetPlace(),
framework::TransToPtenDataType(dtype));
VLOG(10) << "Fused tensor addr " << fused_tensor_ptr; VLOG(10) << "Fused tensor addr " << fused_tensor_ptr;
// Init the continuous space // Init the continuous space
......
...@@ -41,7 +41,8 @@ class AllReduceOpKernel : public framework::OpKernel<T> { ...@@ -41,7 +41,8 @@ class AllReduceOpKernel : public framework::OpKernel<T> {
auto in = ctx.Input<framework::Tensor>("X"); auto in = ctx.Input<framework::Tensor>("X");
auto out = ctx.Output<framework::Tensor>("Out"); auto out = ctx.Output<framework::Tensor>("Out");
int dtype = platform::ToNCCLDataType(in->type()); int dtype =
platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));
int64_t numel = in->numel(); int64_t numel = in->numel();
auto* sendbuff = in->data(); auto* sendbuff = in->data();
out->Resize(in->dims()); out->Resize(in->dims());
......
...@@ -31,7 +31,8 @@ class AllToAllOpCUDAKernel : public framework::OpKernel<T> { ...@@ -31,7 +31,8 @@ class AllToAllOpCUDAKernel : public framework::OpKernel<T> {
auto x = ctx.Input<framework::LoDTensor>("X"); auto x = ctx.Input<framework::LoDTensor>("X");
auto out = ctx.Output<framework::LoDTensor>("Out"); auto out = ctx.Output<framework::LoDTensor>("Out");
int send_numel = x->numel(); int send_numel = x->numel();
ncclDataType_t dtype = platform::ToNCCLDataType(x->type()); ncclDataType_t dtype =
platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype()));
int ring_id = ctx.Attr<int>("ring_id"); int ring_id = ctx.Attr<int>("ring_id");
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(
......
...@@ -31,7 +31,8 @@ class BarrierOpCUDAKernel : public framework::OpKernel<T> { ...@@ -31,7 +31,8 @@ class BarrierOpCUDAKernel : public framework::OpKernel<T> {
auto out = ctx.Output<framework::Tensor>("Out"); auto out = ctx.Output<framework::Tensor>("Out");
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
ncclDataType_t dtype = platform::ToNCCLDataType(in->type()); ncclDataType_t dtype =
platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));
int64_t numel = in->numel(); int64_t numel = in->numel();
const void* sendbuff = in->data(); const void* sendbuff = in->data();
void* recvbuff = out->mutable_data<T>(place); void* recvbuff = out->mutable_data<T>(place);
......
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#include "paddle/fluid/platform/device/gpu/nccl_helper.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#endif #endif
#include "paddle/fluid/framework/convert_utils.h"
namespace ops = paddle::operators; namespace ops = paddle::operators;
namespace plat = paddle::platform; namespace plat = paddle::platform;
...@@ -56,7 +57,8 @@ class NCCLBroadcastOpKernel : public framework::OpKernel<T> { ...@@ -56,7 +57,8 @@ class NCCLBroadcastOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast( PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast(
send_recv_buffer, static_cast<size_t>(in->numel()), send_recv_buffer, static_cast<size_t>(in->numel()),
platform::ToNCCLDataType(in->type()), root_dev_id, comm, stream)); platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype())),
root_dev_id, comm, stream));
VLOG(3) << "Bcast " << ctx.InputNames("X")[0] << ", (" << in->numel() << ")" VLOG(3) << "Bcast " << ctx.InputNames("X")[0] << ", (" << in->numel() << ")"
<< " From " << root_dev_id << " to " << dev_id; << " From " << root_dev_id << " to " << dev_id;
......
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#endif #endif
#include "paddle/fluid/framework/convert_utils.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -29,7 +30,8 @@ class CAllGatherOpCUDAKernel : public framework::OpKernel<T> { ...@@ -29,7 +30,8 @@ class CAllGatherOpCUDAKernel : public framework::OpKernel<T> {
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
auto in = ctx.Input<framework::Tensor>("X"); auto in = ctx.Input<framework::Tensor>("X");
auto out = ctx.Output<framework::Tensor>("Out"); auto out = ctx.Output<framework::Tensor>("Out");
ncclDataType_t dtype = platform::ToNCCLDataType(in->type()); ncclDataType_t dtype =
platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));
int nranks = ctx.Attr<int>("nranks"); int nranks = ctx.Attr<int>("nranks");
int rid = ctx.Attr<int>("ring_id"); int rid = ctx.Attr<int>("ring_id");
......
...@@ -31,7 +31,8 @@ class CAllGatherOpASCENDKernel : public framework::OpKernel<T> { ...@@ -31,7 +31,8 @@ class CAllGatherOpASCENDKernel : public framework::OpKernel<T> {
#if defined(PADDLE_WITH_ASCEND_CL) #if defined(PADDLE_WITH_ASCEND_CL)
auto in = ctx.Input<framework::Tensor>("X"); auto in = ctx.Input<framework::Tensor>("X");
auto out = ctx.Output<framework::Tensor>("Out"); auto out = ctx.Output<framework::Tensor>("Out");
HcclDataType dtype = platform::ToHCCLDataType(in->type()); HcclDataType dtype =
platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype()));
int ring_id = ctx.Attr<int>("ring_id"); int ring_id = ctx.Attr<int>("ring_id");
std::string group = std::string group =
......
...@@ -173,7 +173,8 @@ class CAllReduceOpASCENDKernel : public framework::OpKernel<T> { ...@@ -173,7 +173,8 @@ class CAllReduceOpASCENDKernel : public framework::OpKernel<T> {
auto in = ctx.Input<framework::Tensor>("X"); auto in = ctx.Input<framework::Tensor>("X");
auto out = ctx.Output<framework::Tensor>("Out"); auto out = ctx.Output<framework::Tensor>("Out");
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
HcclDataType dtype = platform::ToHCCLDataType(in->type()); HcclDataType dtype =
platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype()));
int64_t numel = in->numel(); int64_t numel = in->numel();
void* sendbuff = reinterpret_cast<void*>(const_cast<T*>(in->data<T>())); void* sendbuff = reinterpret_cast<void*>(const_cast<T*>(in->data<T>()));
...@@ -231,7 +232,7 @@ class CAllReduceOpASCENDKernel : public framework::OpKernel<T> { ...@@ -231,7 +232,7 @@ class CAllReduceOpASCENDKernel : public framework::OpKernel<T> {
bool found_nan = false; bool found_nan = false;
auto d_type = in->type(); auto d_type = framework::TransToProtoVarType(in->dtype());
switch (d_type) { switch (d_type) {
case framework::proto::VarType::FP16: { case framework::proto::VarType::FP16: {
break; break;
...@@ -284,7 +285,8 @@ class CAllReduceOpXPUKernel : public framework::OpKernel<T> { ...@@ -284,7 +285,8 @@ class CAllReduceOpXPUKernel : public framework::OpKernel<T> {
auto out = ctx.Output<framework::Tensor>("Out"); auto out = ctx.Output<framework::Tensor>("Out");
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
BKCLDataType dtype = platform::ToBKCLDataType(in->type()); BKCLDataType dtype =
platform::ToBKCLDataType(framework::TransToProtoVarType(in->dtype()));
int64_t numel = in->numel(); int64_t numel = in->numel();
const void* sendbuff = in->data<T>(); const void* sendbuff = in->data<T>();
out->Resize(in->dims()); out->Resize(in->dims());
...@@ -346,7 +348,8 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel<T> { ...@@ -346,7 +348,8 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel<T> {
auto out = ctx.Output<framework::Tensor>("Out"); auto out = ctx.Output<framework::Tensor>("Out");
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
ncclDataType_t dtype = platform::ToNCCLDataType(in->type()); ncclDataType_t dtype =
platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));
int64_t numel = in->numel(); int64_t numel = in->numel();
const void* sendbuff = in->data<T>(); const void* sendbuff = in->data<T>();
out->Resize(in->dims()); out->Resize(in->dims());
......
...@@ -30,7 +30,8 @@ class CBroadcastOpCUDAKernel : public framework::OpKernel<T> { ...@@ -30,7 +30,8 @@ class CBroadcastOpCUDAKernel : public framework::OpKernel<T> {
auto x = ctx.Input<framework::LoDTensor>("X"); auto x = ctx.Input<framework::LoDTensor>("X");
auto out = ctx.Output<framework::LoDTensor>("Out"); auto out = ctx.Output<framework::LoDTensor>("Out");
int numel = x->numel(); int numel = x->numel();
ncclDataType_t dtype = platform::ToNCCLDataType(x->type()); ncclDataType_t dtype =
platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype()));
int rid = ctx.Attr<int>("ring_id"); int rid = ctx.Attr<int>("ring_id");
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
......
...@@ -30,7 +30,8 @@ class CBroadcastOpASCENDKernel : public framework::OpKernel<T> { ...@@ -30,7 +30,8 @@ class CBroadcastOpASCENDKernel : public framework::OpKernel<T> {
auto x = ctx.Input<framework::LoDTensor>("X"); auto x = ctx.Input<framework::LoDTensor>("X");
void* ptr = reinterpret_cast<void*>(const_cast<T*>(x->data<T>())); void* ptr = reinterpret_cast<void*>(const_cast<T*>(x->data<T>()));
int numel = x->numel(); int numel = x->numel();
HcclDataType dtype = platform::ToHCCLDataType(x->type()); HcclDataType dtype =
platform::ToHCCLDataType(framework::TransToProtoVarType(x->dtype()));
auto out = ctx.Output<framework::LoDTensor>("Out"); auto out = ctx.Output<framework::LoDTensor>("Out");
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册