未验证 提交 2052f1e3 编写于 作者: Z Zhanlue Yang 提交者: GitHub

[Unify Tensors PR #8] Merged Tensor into DenseTensor, test=allcases (#38914)

* Merged LoDTensor with Tensor,test=allcases

* Patched python level LoDTensor

* Patched python level LoDTensor

* Merge Tensor into DenseTensor

* Fixed namespace issues,test=allcases

* Fixed merge issues

* Fixed inference issues

* Fixed NPU test issues

* Fixed merge issues
上级 bfacd706
...@@ -36,7 +36,6 @@ limitations under the License. */ ...@@ -36,7 +36,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class Scope; class Scope;
class SelectedRows; class SelectedRows;
class Variable; class Variable;
......
...@@ -145,8 +145,8 @@ bool DistModel::LoadParameters() { ...@@ -145,8 +145,8 @@ bool DistModel::LoadParameters() {
return true; return true;
} }
void DistModel::Run(const std::vector<framework::Tensor> &input_data, void DistModel::Run(const std::vector<paddle::framework::Tensor> &input_data,
std::vector<framework::Tensor> *output_data) { std::vector<paddle::framework::Tensor> *output_data) {
/* TODO(fleet exe dev): implement this funct */ /* TODO(fleet exe dev): implement this funct */
} }
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <vector> #include <vector>
#include "paddle/fluid/distributed/fleet_executor/fleet_executor_desc.pb.h" #include "paddle/fluid/distributed/fleet_executor/fleet_executor_desc.pb.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
...@@ -25,7 +26,6 @@ namespace paddle { ...@@ -25,7 +26,6 @@ namespace paddle {
namespace framework { namespace framework {
class ProgramDesc; class ProgramDesc;
class Scope; class Scope;
class Tensor;
} }
namespace distributed { namespace distributed {
...@@ -45,8 +45,8 @@ class DistModel { ...@@ -45,8 +45,8 @@ class DistModel {
public: public:
explicit DistModel(const DistModelConfig& config) : config_(config) {} explicit DistModel(const DistModelConfig& config) : config_(config) {}
bool Init(); bool Init();
void Run(const std::vector<framework::Tensor>& input_data, void Run(const std::vector<paddle::framework::Tensor>& input_data,
std::vector<framework::Tensor>* output_data); std::vector<paddle::framework::Tensor>* output_data);
~DistModel() = default; ~DistModel() = default;
private: private:
......
...@@ -20,10 +20,13 @@ limitations under the License. */ ...@@ -20,10 +20,13 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Variable; class Variable;
class Tensor;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
namespace pten {
class DenseTensor;
} // namespace pten
namespace paddle { namespace paddle {
namespace distributed { namespace distributed {
......
...@@ -31,11 +31,14 @@ class PSClient; ...@@ -31,11 +31,14 @@ class PSClient;
class PSServer; class PSServer;
} // namespace distributed } // namespace distributed
namespace framework { namespace framework {
class Tensor;
class Variable; class Variable;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
namespace pten {
class DenseTensor;
} // namespace pten
namespace framework = paddle::framework; namespace framework = paddle::framework;
namespace platform = paddle::platform; namespace platform = paddle::platform;
namespace operators = paddle::operators; namespace operators = paddle::operators;
......
...@@ -32,11 +32,14 @@ class PSClient; ...@@ -32,11 +32,14 @@ class PSClient;
class PSServer; class PSServer;
} // namespace distributed } // namespace distributed
namespace framework { namespace framework {
class Tensor;
class Variable; class Variable;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
namespace pten {
class DenseTensor;
} // namespace pten
namespace framework = paddle::framework; namespace framework = paddle::framework;
namespace platform = paddle::platform; namespace platform = paddle::platform;
namespace operators = paddle::operators; namespace operators = paddle::operators;
......
...@@ -29,7 +29,6 @@ DECLARE_bool(use_mkldnn); ...@@ -29,7 +29,6 @@ DECLARE_bool(use_mkldnn);
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class Variable; class Variable;
} // namespace framework } // namespace framework
namespace platform { namespace platform {
...@@ -37,6 +36,10 @@ class DeviceContext; ...@@ -37,6 +36,10 @@ class DeviceContext;
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
namespace pten {
class DenseTensor;
} // namespace pten
namespace egr { namespace egr {
namespace legacy { namespace legacy {
......
...@@ -68,7 +68,7 @@ static bool CopySameTensorTestMain(const DDim &dims, ...@@ -68,7 +68,7 @@ static bool CopySameTensorTestMain(const DDim &dims,
if (sync_copy) { if (sync_copy) {
TensorCopySync(src_tensor, dst_place, &src_tensor); TensorCopySync(src_tensor, dst_place, &src_tensor);
} else { } else {
TensorCopy(src_tensor, dst_place, &src_tensor); paddle::framework::TensorCopy(src_tensor, dst_place, &src_tensor);
platform::DeviceContextPool::Instance().Get(src_place)->Wait(); platform::DeviceContextPool::Instance().Get(src_place)->Wait();
platform::DeviceContextPool::Instance().Get(dst_place)->Wait(); platform::DeviceContextPool::Instance().Get(dst_place)->Wait();
} }
......
...@@ -28,8 +28,9 @@ void TransDataDevice(const Tensor &in, const platform::Place &dst_place, ...@@ -28,8 +28,9 @@ void TransDataDevice(const Tensor &in, const platform::Place &dst_place,
// NOTE(zhiqiu): Special case for CPU->NPU, avoid stream sync. // NOTE(zhiqiu): Special case for CPU->NPU, avoid stream sync.
if (platform::is_cpu_place(in.place()) && platform::is_npu_place(dst_place)) { if (platform::is_cpu_place(in.place()) && platform::is_npu_place(dst_place)) {
TensorCopy(in, dst_place, paddle::framework::TensorCopy(
*platform::DeviceContextPool::Instance().Get(dst_place), out); in, dst_place, *platform::DeviceContextPool::Instance().Get(dst_place),
out);
return; return;
} }
......
...@@ -21,8 +21,6 @@ limitations under the License. */ ...@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
void TransDataDevice(const Tensor& in, const platform::Place& dst_place, void TransDataDevice(const Tensor& in, const platform::Place& dst_place,
Tensor* out); Tensor* out);
......
...@@ -50,12 +50,15 @@ DECLARE_bool(enable_slotrecord_reset_shrink); ...@@ -50,12 +50,15 @@ DECLARE_bool(enable_slotrecord_reset_shrink);
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class DataFeedDesc; class DataFeedDesc;
class Tensor;
class Scope; class Scope;
class Variable; class Variable;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
namespace pten {
class DenseTensor;
} // namespace pten
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class OpKernelType; class OpKernelType;
class Tensor;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
......
...@@ -31,7 +31,6 @@ namespace paddle { ...@@ -31,7 +31,6 @@ namespace paddle {
namespace framework { namespace framework {
class OpKernelType; class OpKernelType;
class Tensor;
class Variable; class Variable;
void TransformData(const OpKernelType &expected_kernel_type, void TransformData(const OpKernelType &expected_kernel_type,
......
...@@ -25,7 +25,6 @@ namespace paddle { ...@@ -25,7 +25,6 @@ namespace paddle {
namespace framework { namespace framework {
class OpKernelType; class OpKernelType;
class Tensor;
using KernelTypePair = std::pair<OpKernelType, OpKernelType>; using KernelTypePair = std::pair<OpKernelType, OpKernelType>;
......
...@@ -169,7 +169,7 @@ FetchResultType AsyncSSAGraphExecutor::Run( ...@@ -169,7 +169,7 @@ FetchResultType AsyncSSAGraphExecutor::Run(
std::vector<const LoDTensor *> lodtensor_ptrs; std::vector<const LoDTensor *> lodtensor_ptrs;
lodtensor_ptrs.push_back(&(BOOST_GET(LoDTensor, val.at(fetch_idx)))); lodtensor_ptrs.push_back(&(BOOST_GET(LoDTensor, val.at(fetch_idx))));
LoDTensor var; LoDTensor var;
var.MergeLoDTensor(lodtensor_ptrs, platform::CPUPlace()); MergeLoDTensor(&var, lodtensor_ptrs, platform::CPUPlace());
ret.emplace_back(var); ret.emplace_back(var);
} else { } else {
auto array = BOOST_GET(LoDTensorArray, val.at(fetch_idx)); auto array = BOOST_GET(LoDTensorArray, val.at(fetch_idx));
...@@ -179,7 +179,8 @@ FetchResultType AsyncSSAGraphExecutor::Run( ...@@ -179,7 +179,8 @@ FetchResultType AsyncSSAGraphExecutor::Run(
std::vector<const LoDTensor *> lodtensor_ptrs; std::vector<const LoDTensor *> lodtensor_ptrs;
lodtensor_ptrs.push_back(&array[i]); lodtensor_ptrs.push_back(&array[i]);
item_array.emplace_back(); item_array.emplace_back();
item_array.back().MergeLoDTensor(lodtensor_ptrs, platform::CPUPlace()); MergeLoDTensor(&(item_array.back()), lodtensor_ptrs,
platform::CPUPlace());
} }
ret.emplace_back(item_array); ret.emplace_back(item_array);
} }
......
...@@ -22,14 +22,18 @@ ...@@ -22,14 +22,18 @@
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
namespace pten {
class DenseTensor;
} // namespace pten
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
namespace ir { namespace ir {
class Node; class Node;
} // namespace ir } // namespace ir
} // namespace framework } // namespace framework
namespace platform { namespace platform {
class DeviceContext; class DeviceContext;
} // namespace platform } // namespace platform
......
...@@ -81,7 +81,7 @@ void FetchOpHandle::WaitAndMergeCPUFetchVars() const { ...@@ -81,7 +81,7 @@ void FetchOpHandle::WaitAndMergeCPUFetchVars() const {
} }
auto &val = BOOST_GET(FetchList, *data_); auto &val = BOOST_GET(FetchList, *data_);
LoDTensor var; LoDTensor var;
var.MergeLoDTensor(tensors_ptr, platform::CPUPlace()); MergeLoDTensor(&var, tensors_ptr, platform::CPUPlace());
val.at(offset_) = std::move(var); val.at(offset_) = std::move(var);
} else { } else {
auto &array = BOOST_GET_CONST(LoDTensorArray, tensors_[0]); auto &array = BOOST_GET_CONST(LoDTensorArray, tensors_[0]);
...@@ -99,7 +99,7 @@ void FetchOpHandle::WaitAndMergeCPUFetchVars() const { ...@@ -99,7 +99,7 @@ void FetchOpHandle::WaitAndMergeCPUFetchVars() const {
tensors_ptr.push_back(&element[i]); tensors_ptr.push_back(&element[i]);
} }
tmp_array.emplace_back(); tmp_array.emplace_back();
tmp_array.back().MergeLoDTensor(tensors_ptr, platform::CPUPlace()); MergeLoDTensor(&(tmp_array.back()), tensors_ptr, platform::CPUPlace());
} }
auto &val = BOOST_GET(FetchList, *data_); auto &val = BOOST_GET(FetchList, *data_);
val.at(offset_) = std::move(tmp_array); val.at(offset_) = std::move(tmp_array);
......
...@@ -16,11 +16,10 @@ ...@@ -16,11 +16,10 @@
#include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/framework/details/variable_visitor.h"
namespace paddle {
namespace framework { namespace pten {
class Tensor; class DenseTensor;
} // namespace framework } // namespace pten
} // namespace paddle
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -19,11 +19,9 @@ ...@@ -19,11 +19,9 @@
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
namespace paddle { namespace pten {
namespace framework { class DenseTensor;
class Tensor; } // namespace pten
} // namespace framework
} // namespace paddle
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -275,7 +275,7 @@ FetchResultType ParallelSSAGraphExecutor::Run( ...@@ -275,7 +275,7 @@ FetchResultType ParallelSSAGraphExecutor::Run(
} }
if (lodtensor_ptrs.size() != 0) { if (lodtensor_ptrs.size() != 0) {
LoDTensor var; LoDTensor var;
var.MergeLoDTensor(lodtensor_ptrs, platform::CPUPlace()); MergeLoDTensor(&var, lodtensor_ptrs, platform::CPUPlace());
ret.emplace_back(var); ret.emplace_back(var);
} else { } else {
LoDTensorArray var_array(lodtensorarray_ptrs[0]->size()); LoDTensorArray var_array(lodtensorarray_ptrs[0]->size());
...@@ -285,7 +285,7 @@ FetchResultType ParallelSSAGraphExecutor::Run( ...@@ -285,7 +285,7 @@ FetchResultType ParallelSSAGraphExecutor::Run(
for (size_t j = 0; j < lodtensorarray_ptrs.size(); ++j) { for (size_t j = 0; j < lodtensorarray_ptrs.size(); ++j) {
ptrs.push_back(&(lodtensorarray_ptrs[j]->at(i))); ptrs.push_back(&(lodtensorarray_ptrs[j]->at(i)));
} }
var.MergeLoDTensor(ptrs, platform::CPUPlace()); MergeLoDTensor(&var, ptrs, platform::CPUPlace());
var_array[i] = std::move(var); var_array[i] = std::move(var);
} }
ret.emplace_back(var_array); ret.emplace_back(var_array);
......
...@@ -18,11 +18,9 @@ ...@@ -18,11 +18,9 @@
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
namespace paddle { namespace pten {
namespace framework { class DenseTensor;
class Tensor; } // namespace pten
} // namespace framework
} // namespace paddle
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Scope; class Scope;
class Tensor;
class Variable; class Variable;
namespace ir { namespace ir {
...@@ -31,6 +30,10 @@ class MemOptVarInfo; ...@@ -31,6 +30,10 @@ class MemOptVarInfo;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
namespace pten {
class DenseTensor;
} // namespace pten
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
......
...@@ -16,9 +16,12 @@ ...@@ -16,9 +16,12 @@
#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/selected_rows.h"
namespace pten {
class DenseTensor;
} // namespace pten
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class Variable; class Variable;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class Variable; class Variable;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
......
...@@ -14,10 +14,13 @@ limitations under the License. */ ...@@ -14,10 +14,13 @@ limitations under the License. */
#include "paddle/fluid/framework/device_worker.h" #include "paddle/fluid/framework/device_worker.h"
namespace pten {
class DenseTensor;
} // namespace pten
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class Scope; class Scope;
void DeviceWorker::SetRootScope(Scope* root_scope) { root_scope_ = root_scope; } void DeviceWorker::SetRootScope(Scope* root_scope) { root_scope_ = root_scope; }
......
...@@ -43,7 +43,6 @@ limitations under the License. */ ...@@ -43,7 +43,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class ProgramDesc; class ProgramDesc;
class Scope; class Scope;
} // namespace framework } // namespace framework
......
...@@ -21,8 +21,6 @@ ...@@ -21,8 +21,6 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class DLPackTensor { class DLPackTensor {
public: public:
using LaneType = decltype(::DLTensor::dtype.lanes); // uint16_t using LaneType = decltype(::DLTensor::dtype.lanes); // uint16_t
......
...@@ -15,9 +15,12 @@ limitations under the License. */ ...@@ -15,9 +15,12 @@ limitations under the License. */
#include "paddle/fluid/framework/device_worker.h" #include "paddle/fluid/framework/device_worker.h"
#include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/cpu_helper.h"
namespace pten {
class DenseTensor;
} // namespace pten
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class Variable; class Variable;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
......
...@@ -19,10 +19,13 @@ limitations under the License. */ ...@@ -19,10 +19,13 @@ limitations under the License. */
#include <boost/variant.hpp> #include <boost/variant.hpp>
#include "glog/logging.h" #include "glog/logging.h"
namespace pten {
class DenseTensor;
} // namespace pten
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class Variable; class Variable;
void SetFeedVariable(Scope* scope, const LoDTensor& input, void SetFeedVariable(Scope* scope, const LoDTensor& input,
......
...@@ -20,10 +20,13 @@ limitations under the License. */ ...@@ -20,10 +20,13 @@ limitations under the License. */
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/string_array.h" #include "paddle/fluid/framework/string_array.h"
namespace pten {
class DenseTensor;
} // namespace pten
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class Scope; class Scope;
void SetFeedVariable(Scope* scope, const LoDTensor& input, void SetFeedVariable(Scope* scope, const LoDTensor& input,
......
...@@ -18,9 +18,12 @@ ...@@ -18,9 +18,12 @@
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
namespace pten {
class DenseTensor;
} // namespace pten
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class Scope; class Scope;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
......
...@@ -19,9 +19,12 @@ ...@@ -19,9 +19,12 @@
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
namespace pten {
class DenseTensor;
} // namespace pten
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class Scope; class Scope;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
......
...@@ -15,11 +15,9 @@ ...@@ -15,11 +15,9 @@
#include "paddle/fluid/framework/ir/delete_dropout_op_pass.h" #include "paddle/fluid/framework/ir/delete_dropout_op_pass.h"
namespace paddle { namespace pten {
namespace framework { class DenseTensor;
class Tensor; } // namespace pten
} // namespace framework
} // namespace paddle
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -16,11 +16,9 @@ ...@@ -16,11 +16,9 @@
#include <string> #include <string>
namespace paddle { namespace pten {
namespace framework { class DenseTensor;
class Tensor; } // namespace pten
} // namespace framework
} // namespace paddle
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -22,11 +22,9 @@ limitations under the License. */ ...@@ -22,11 +22,9 @@ limitations under the License. */
#include "paddle/fluid/platform/device_code.h" #include "paddle/fluid/platform/device_code.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
namespace paddle { namespace pten {
namespace framework { class DenseTensor;
class Tensor; } // namespace pten
} // namespace framework
} // namespace paddle
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...@@ -206,9 +204,11 @@ void TestMainImpl(std::string func_name, std::string code_str, ...@@ -206,9 +204,11 @@ void TestMainImpl(std::string func_name, std::string code_str,
for (int64_t i = 0; i < cpu_tensors[id].numel(); ++i) { for (int64_t i = 0; i < cpu_tensors[id].numel(); ++i) {
tmp_cpu_ptr[i] = paddle::platform::float16(cpu_ptr[i]); tmp_cpu_ptr[i] = paddle::platform::float16(cpu_ptr[i]);
} }
TensorCopySync(tmp_cpu_tensors[id], place, &gpu_tensors[id]); paddle::framework::TensorCopySync(tmp_cpu_tensors[id], place,
&gpu_tensors[id]);
} else { } else {
TensorCopySync(cpu_tensors[id], place, &gpu_tensors[id]); paddle::framework::TensorCopySync(cpu_tensors[id], place,
&gpu_tensors[id]);
} }
args.push_back(&gpu_ptrs[id]); args.push_back(&gpu_ptrs[id]);
} }
...@@ -234,8 +234,8 @@ void TestMainImpl(std::string func_name, std::string code_str, ...@@ -234,8 +234,8 @@ void TestMainImpl(std::string func_name, std::string code_str,
paddle::platform::float16* tmp_cpu_ptr = paddle::platform::float16* tmp_cpu_ptr =
tmp_cpu_tensors[id].mutable_data<paddle::platform::float16>( tmp_cpu_tensors[id].mutable_data<paddle::platform::float16>(
cpu_tensors[id].dims(), paddle::platform::CPUPlace()); cpu_tensors[id].dims(), paddle::platform::CPUPlace());
TensorCopySync(gpu_tensors[id], paddle::platform::CPUPlace(), paddle::framework::TensorCopySync(
&tmp_cpu_tensors[id]); gpu_tensors[id], paddle::platform::CPUPlace(), &tmp_cpu_tensors[id]);
float* cpu_ptr = cpu_tensors[id].mutable_data<float>( float* cpu_ptr = cpu_tensors[id].mutable_data<float>(
cpu_tensors[id].dims(), paddle::platform::CPUPlace()); cpu_tensors[id].dims(), paddle::platform::CPUPlace());
...@@ -243,8 +243,8 @@ void TestMainImpl(std::string func_name, std::string code_str, ...@@ -243,8 +243,8 @@ void TestMainImpl(std::string func_name, std::string code_str,
cpu_ptr[i] = static_cast<float>(tmp_cpu_ptr[i]); cpu_ptr[i] = static_cast<float>(tmp_cpu_ptr[i]);
} }
} else { } else {
TensorCopySync(gpu_tensors[id], paddle::platform::CPUPlace(), paddle::framework::TensorCopySync(
&cpu_tensors[id]); gpu_tensors[id], paddle::platform::CPUPlace(), &cpu_tensors[id]);
} }
} }
} }
......
...@@ -319,14 +319,47 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor, ...@@ -319,14 +319,47 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor,
TensorFromStream(is, static_cast<Tensor *>(tensor), dev_ctx); TensorFromStream(is, static_cast<Tensor *>(tensor), dev_ctx);
} }
std::vector<LoDTensor> LoDTensor::SplitLoDTensor( LoD ConvertToLengthBasedLoD(const LoD &offset_lod) {
const std::vector<platform::Place> places) const { LoD length_lod;
length_lod.reserve(offset_lod.size());
for (size_t lvl = 0; lvl < offset_lod.size(); ++lvl) {
std::vector<size_t> level;
if (offset_lod[lvl].size() > 0) {
level.reserve(offset_lod[lvl].size() - 1);
}
for (size_t idx = 0; idx < offset_lod[lvl].size() - 1; ++idx) {
level.push_back(offset_lod[lvl][idx + 1] - offset_lod[lvl][idx]);
}
length_lod.push_back(level);
}
return length_lod;
}
LoD ConvertToOffsetBasedLoD(const LoD &length_lod) {
LoD offset_lod;
offset_lod.reserve(length_lod.size());
for (size_t lvl = 0; lvl < length_lod.size(); ++lvl) {
std::vector<size_t> level;
level.reserve(length_lod[lvl].size() + 1);
size_t tmp = 0;
level.push_back(tmp);
for (size_t idx = 0; idx < length_lod[lvl].size(); ++idx) {
tmp += length_lod[lvl][idx];
level.push_back(tmp);
}
offset_lod.push_back(level);
}
return offset_lod;
}
std::vector<LoDTensor> SplitLoDTensor(
const LoDTensor &src, const std::vector<platform::Place> places) {
PADDLE_ENFORCE_GT(places.size(), 0, PADDLE_ENFORCE_GT(places.size(), 0,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Place number cannot be empty when splitting.")); "Place number cannot be empty when splitting."));
check_memory_size(); src.check_memory_size();
size_t batch_size = size_t batch_size = src.lod().empty() ? static_cast<size_t>(src.dims()[0])
lod().empty() ? static_cast<size_t>(dims()[0]) : lod()[0].size() - 1; : src.lod()[0].size() - 1;
// if batch_size is 0, just return #places.size() copys of empty // if batch_size is 0, just return #places.size() copys of empty
// tensors. // tensors.
...@@ -335,10 +368,10 @@ std::vector<LoDTensor> LoDTensor::SplitLoDTensor( ...@@ -335,10 +368,10 @@ std::vector<LoDTensor> LoDTensor::SplitLoDTensor(
empty_results.reserve(places.size()); empty_results.reserve(places.size());
for (size_t i = 0; i < places.size(); ++i) { for (size_t i = 0; i < places.size(); ++i) {
LoDTensor dst; LoDTensor dst;
dst.Resize(dims()); dst.Resize(src.dims());
dst.mutable_data(places[i], type()); dst.mutable_data(places[i], src.type());
if (!lod().empty()) { if (!src.lod().empty()) {
dst.set_lod(lod()); dst.set_lod(src.lod());
} }
empty_results.emplace_back(std::move(dst)); empty_results.emplace_back(std::move(dst));
} }
...@@ -360,17 +393,18 @@ std::vector<LoDTensor> LoDTensor::SplitLoDTensor( ...@@ -360,17 +393,18 @@ std::vector<LoDTensor> LoDTensor::SplitLoDTensor(
begin, end)); begin, end));
LoDTensor dst; LoDTensor dst;
if (lod().empty()) { if (src.lod().empty()) {
auto src = Slice(begin, end); auto sliced_src = src.Slice(begin, end);
auto &dst_place = places[i]; auto &dst_place = places[i];
framework::TensorCopy(src, dst_place, &dst); framework::TensorCopy(sliced_src, dst_place, &dst);
} else { } else {
auto lod_and_offset = GetSubLoDAndAbsoluteOffset(lod(), begin, end, 0); auto lod_and_offset =
GetSubLoDAndAbsoluteOffset(src.lod(), begin, end, 0);
auto &offset = lod_and_offset.second; auto &offset = lod_and_offset.second;
auto src = Slice(offset.first, offset.second); auto sliced_src = src.Slice(offset.first, offset.second);
auto &dst_place = places[i]; auto &dst_place = places[i];
framework::TensorCopy(src, dst_place, &dst); framework::TensorCopy(sliced_src, dst_place, &dst);
LoD my_lod; LoD my_lod;
for (auto &l : lod_and_offset.first) { for (auto &l : lod_and_offset.first) {
...@@ -388,9 +422,9 @@ std::vector<LoDTensor> LoDTensor::SplitLoDTensor( ...@@ -388,9 +422,9 @@ std::vector<LoDTensor> LoDTensor::SplitLoDTensor(
return results; return results;
} }
void LoDTensor::MergeLoDTensor( void MergeLoDTensor(LoDTensor *target,
const std::vector<const LoDTensor *> &lod_tensors, const std::vector<const LoDTensor *> &lod_tensors,
platform::Place dst_place) { platform::Place dst_place) {
PADDLE_ENFORCE_EQ(lod_tensors.empty(), false, PADDLE_ENFORCE_EQ(lod_tensors.empty(), false,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The LoDTensors to be merged are empty.")); "The LoDTensors to be merged are empty."));
...@@ -449,10 +483,10 @@ void LoDTensor::MergeLoDTensor( ...@@ -449,10 +483,10 @@ void LoDTensor::MergeLoDTensor(
} }
} }
} }
Resize(new_dim); target->Resize(new_dim);
set_layout(new_layout); target->set_layout(new_layout);
set_lod(new_lod); target->set_lod(new_lod);
mutable_data(dst_place, new_type); target->mutable_data(dst_place, new_type);
int begin = 0; int begin = 0;
for (auto *src : lod_tensors) { for (auto *src : lod_tensors) {
...@@ -460,44 +494,11 @@ void LoDTensor::MergeLoDTensor( ...@@ -460,44 +494,11 @@ void LoDTensor::MergeLoDTensor(
if (end == begin) { if (end == begin) {
continue; continue;
} }
auto dst = Slice(begin, end); auto dst = target->Slice(begin, end);
framework::TensorCopy(*src, dst_place, &dst); framework::TensorCopy(*src, dst_place, &dst);
begin = end; begin = end;
} }
} }
LoD ConvertToLengthBasedLoD(const LoD &offset_lod) {
LoD length_lod;
length_lod.reserve(offset_lod.size());
for (size_t lvl = 0; lvl < offset_lod.size(); ++lvl) {
std::vector<size_t> level;
if (offset_lod[lvl].size() > 0) {
level.reserve(offset_lod[lvl].size() - 1);
}
for (size_t idx = 0; idx < offset_lod[lvl].size() - 1; ++idx) {
level.push_back(offset_lod[lvl][idx + 1] - offset_lod[lvl][idx]);
}
length_lod.push_back(level);
}
return length_lod;
}
LoD ConvertToOffsetBasedLoD(const LoD &length_lod) {
LoD offset_lod;
offset_lod.reserve(length_lod.size());
for (size_t lvl = 0; lvl < length_lod.size(); ++lvl) {
std::vector<size_t> level;
level.reserve(length_lod[lvl].size() + 1);
size_t tmp = 0;
level.push_back(tmp);
for (size_t idx = 0; idx < length_lod[lvl].size(); ++idx) {
tmp += length_lod[lvl][idx];
level.push_back(tmp);
}
offset_lod.push_back(level);
}
return offset_lod;
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -36,7 +36,15 @@ class DeviceContext; ...@@ -36,7 +36,15 @@ class DeviceContext;
namespace paddle { namespace paddle {
namespace framework { namespace framework {
using LoDTensor = paddle::framework::Tensor; using LoDTensor = pten::DenseTensor;
// Split Tensor and copy to each place specified in places.
std::vector<LoDTensor> SplitLoDTensor(
const LoDTensor& src, const std::vector<platform::Place> places);
void MergeLoDTensor(LoDTensor* target,
const std::vector<const LoDTensor*>& lod_tensors,
platform::Place dst_place);
/* /*
* LoD is short for Level of Details. * LoD is short for Level of Details.
......
...@@ -147,7 +147,7 @@ TEST(LoD, SplitLoDTensor) { ...@@ -147,7 +147,7 @@ TEST(LoD, SplitLoDTensor) {
lod1.push_back(std::vector<size_t>({0, 1, 2})); lod1.push_back(std::vector<size_t>({0, 1, 2}));
lod1.push_back(std::vector<size_t>({0, 2, 7})); lod1.push_back(std::vector<size_t>({0, 2, 7}));
auto lods = lod_tensor.SplitLoDTensor(places); auto lods = SplitLoDTensor(lod_tensor, places);
EXPECT_EQ(lods[0].lod(), lod0); EXPECT_EQ(lods[0].lod(), lod0);
EXPECT_EQ(lods[1].lod(), lod1); EXPECT_EQ(lods[1].lod(), lod1);
} }
...@@ -167,7 +167,7 @@ TEST(LoD, SplitLoDTensorWithZeroBatchSize) { ...@@ -167,7 +167,7 @@ TEST(LoD, SplitLoDTensorWithZeroBatchSize) {
LoD lod_res; LoD lod_res;
lod_res.push_back(std::vector<size_t>({0})); lod_res.push_back(std::vector<size_t>({0}));
auto lods = lod_tensor.SplitLoDTensor(places); auto lods = SplitLoDTensor(lod_tensor, places);
EXPECT_EQ(lods[0].lod(), lod_res); EXPECT_EQ(lods[0].lod(), lod_res);
EXPECT_EQ(lods[1].lod(), lod_res); EXPECT_EQ(lods[1].lod(), lod_res);
} }
...@@ -213,7 +213,7 @@ TEST(LoD, MergeLoDTensor) { ...@@ -213,7 +213,7 @@ TEST(LoD, MergeLoDTensor) {
std::vector<const LoDTensor*> lods{&lod_tensor0, &lod_tensor1, &lod_tensor2}; std::vector<const LoDTensor*> lods{&lod_tensor0, &lod_tensor1, &lod_tensor2};
LoDTensor lod_tensor; LoDTensor lod_tensor;
lod_tensor.MergeLoDTensor(lods, place); MergeLoDTensor(&lod_tensor, lods, place);
EXPECT_EQ(lod_tensor.lod(), lod); EXPECT_EQ(lod_tensor.lod(), lod);
} }
......
...@@ -24,6 +24,10 @@ ...@@ -24,6 +24,10 @@
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
namespace pten {
class DenseTensor;
} // namespace pten
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -31,7 +35,6 @@ namespace framework { ...@@ -31,7 +35,6 @@ namespace framework {
* Simple, intuitive and effective. Only single thread is supported, and * Simple, intuitive and effective. Only single thread is supported, and
* currently designed for inference. * currently designed for inference.
*/ */
class Tensor;
class ProgramDesc; class ProgramDesc;
class Scope; class Scope;
......
...@@ -32,11 +32,10 @@ limitations under the License. */ ...@@ -32,11 +32,10 @@ limitations under the License. */
#include "paddle/pten/common/scalar.h" #include "paddle/pten/common/scalar.h"
#include "paddle/pten/common/scalar_array.h" #include "paddle/pten/common/scalar_array.h"
namespace paddle { namespace pten {
namespace framework { class DenseTensor;
class Tensor; } // namespace pten
} // namespace framework
} // namespace paddle
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include "paddle/fluid/platform/device/xpu/xpu_info.h" #include "paddle/fluid/platform/device/xpu/xpu_info.h"
#include "paddle/fluid/platform/device/xpu/xpu_op_list.h" #include "paddle/fluid/platform/device/xpu/xpu_op_list.h"
......
...@@ -1048,7 +1048,7 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes( ...@@ -1048,7 +1048,7 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes(
VLOG(3) << "Split " << (is_persistable ? "persistable" : "no persistable") VLOG(3) << "Split " << (is_persistable ? "persistable" : "no persistable")
<< " data (" << pair.first << "), dim:" << pair.second.dims() << " data (" << pair.first << "), dim:" << pair.second.dims()
<< ", place: " << pair.second.place(); << ", place: " << pair.second.place();
auto lod_tensors = pair.second.SplitLoDTensor(member_->places_); auto lod_tensors = SplitLoDTensor(pair.second, member_->places_);
bool is_cpu_place = platform::is_cpu_place(member_->places_.front()); bool is_cpu_place = platform::is_cpu_place(member_->places_.front());
if (!is_persistable && num_places != lod_tensors.size() && if (!is_persistable && num_places != lod_tensors.size() &&
!allow_partial_feed) { !allow_partial_feed) {
......
...@@ -14,10 +14,13 @@ limitations under the License. */ ...@@ -14,10 +14,13 @@ limitations under the License. */
#include <time.h> #include <time.h>
#include "paddle/fluid/framework/device_worker.h" #include "paddle/fluid/framework/device_worker.h"
namespace pten {
class DenseTensor;
} // namespace pten
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class Scope; class Scope;
class Variable; class Variable;
......
...@@ -30,8 +30,6 @@ limitations under the License. */ ...@@ -30,8 +30,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class SelectedRows { class SelectedRows {
/* /*
* @brief We can use the SelectedRows structure to reproduce a sparse table. * @brief We can use the SelectedRows structure to reproduce a sparse table.
......
...@@ -18,105 +18,13 @@ limitations under the License. */ ...@@ -18,105 +18,13 @@ limitations under the License. */
DECLARE_bool(use_stream_safe_cuda_allocator); DECLARE_bool(use_stream_safe_cuda_allocator);
namespace paddle { namespace paddle {
namespace framework { namespace memory {
namespace allocation {
Tensor Tensor::Slice(int64_t begin_idx, int64_t end_idx) const { class Allocation;
check_memory_size(); } // namespace allocation
PADDLE_ENFORCE_GE(begin_idx, 0, } // namespace memory
paddle::platform::errors::OutOfRange( } // namespace paddle
"The start row index must be greater than 0."
"But received the start index is d%.",
begin_idx));
PADDLE_ENFORCE_LE(end_idx, meta_.dims[0],
paddle::platform::errors::OutOfRange(
"The end row index is out of bound."));
PADDLE_ENFORCE_LT(
begin_idx, end_idx,
paddle::platform::errors::InvalidArgument(
"The start row index must be less than the end row index."
"But received the start index = %d, the end index = %d.",
begin_idx, end_idx));
if (meta_.dims[0] == 1) {
return *this;
} else {
size_t base = numel() / meta_.dims[0];
Tensor dst;
dst.storage_ = pten::make_intrusive<paddle::experimental::SharedStorage>(
storage_->data_shared());
dst.meta_.layout = meta_.layout;
dst.meta_.dtype = meta_.dtype;
DDim dst_dims = meta_.dims;
dst_dims[0] = end_idx - begin_idx;
dst.Resize(dst_dims);
dst.meta_.offset = meta_.offset + begin_idx * base * SizeOf(dtype());
return dst;
}
}
std::vector<Tensor> Tensor::Split(int64_t split_size, int64_t axis) const {
check_memory_size();
PADDLE_ENFORCE_GE(meta_.dims.size(), 0,
paddle::platform::errors::OutOfRange(
"split expects at least a 1-dimensional tensor"));
PADDLE_ENFORCE_GE(
split_size, 0,
paddle::platform::errors::OutOfRange(
"split expects split_size be non-negative, but got split_size is %d",
split_size));
int64_t numel_size = meta_.dims[axis];
int64_t num_splits = 1;
if (split_size != 0) {
num_splits =
std::max<int64_t>((numel_size + split_size - 1) / split_size, 1);
}
std::vector<Tensor> splits(num_splits);
int64_t last_split_size = split_size - (split_size * num_splits - numel_size);
for (int64_t i = 0; i < num_splits; ++i) {
int64_t length = i < num_splits - 1 ? split_size : last_split_size;
splits[i] = Slice(i * split_size, i * split_size + length);
}
return splits;
}
std::vector<Tensor> Tensor::Chunk(int64_t chunks, int64_t axis) const {
check_memory_size();
PADDLE_ENFORCE_GE(meta_.dims.size(), 0,
paddle::platform::errors::OutOfRange(
"split expects at least a 1-dimensional tensor"));
PADDLE_ENFORCE_GE(
chunks, 0,
paddle::platform::errors::OutOfRange(
"chunks expects to be greater than 0, but got chunks is %d", chunks));
int64_t numel_size = meta_.dims[axis];
int64_t split_size = (numel_size + chunks - 1) / chunks;
return Split(split_size, axis);
}
Tensor& Tensor::ShareDataWith(const Tensor& src) {
src.check_memory_size();
// Preserve LoD
auto lod = meta_.lod;
*this = src;
meta_.lod = lod;
return *this;
}
Tensor& Tensor::ShareInplaceVersionCounterWith(const Tensor& src) {
PADDLE_ENFORCE_NOT_NULL(
inplace_version_counter_,
platform::errors::PreconditionNotMet(
"Tensor does not hold inplace_version_counter_."));
inplace_version_counter_ = src.inplace_version_counter_;
return *this;
}
} // namespace framework namespace paddle {
namespace framework {} // namespace framework
} // namespace paddle } // namespace paddle
...@@ -69,35 +69,7 @@ using LoD = std::vector<paddle::framework::Vector<size_t>>; ...@@ -69,35 +69,7 @@ using LoD = std::vector<paddle::framework::Vector<size_t>>;
Variable object but not a pointer. Variable object but not a pointer.
*/ */
class Tensor : public pten::DenseTensor { using Tensor = pten::DenseTensor;
public:
using DenseTensor = pten::DenseTensor;
using DenseTensor::DenseTensor;
// Split Tensor and copy to each place specified in places.
std::vector<Tensor> SplitLoDTensor(
const std::vector<platform::Place> places) const;
void MergeLoDTensor(const std::vector<const Tensor*>& lod_tensors,
platform::Place place);
/*! The internal of two tensors share the same memory block. */
Tensor& ShareDataWith(const Tensor& src);
/*! The internal of two tensors share the same inplace version counter. */
Tensor& ShareInplaceVersionCounterWith(const Tensor& src);
Tensor Slice(int64_t begin_idx, int64_t end_idx) const;
std::vector<Tensor> Split(int64_t split_size, int64_t axis) const;
std::vector<Tensor> Chunk(int64_t chunks, int64_t axis) const;
Tensor& Resize(const DDim& dims) {
meta_.dims = dims;
return *this;
}
};
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
......
...@@ -387,18 +387,10 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -387,18 +387,10 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) { Tensor* dst) {
TensorCopyImpl<Tensor>(src, dst_place, dst); TensorCopyImpl<Tensor>(src, dst_place, dst);
} }
void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
pten::DenseTensor* dst) {
TensorCopyImpl<pten::DenseTensor>(src, dst_place, dst);
}
void TensorCopy(const Tensor& src, const platform::Place& dst_place, void TensorCopy(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, Tensor* dst) { const platform::DeviceContext& ctx, Tensor* dst) {
TensorCopyImpl<Tensor>(src, dst_place, ctx, dst); TensorCopyImpl<Tensor>(src, dst_place, ctx, dst);
} }
void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, pten::DenseTensor* dst) {
TensorCopyImpl<pten::DenseTensor>(src, dst_place, ctx, dst);
}
void TensorCopySync(const Tensor& src, const platform::Place& dst_place, void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) { Tensor* dst) {
...@@ -1394,45 +1386,50 @@ std::ostream& operator<<(std::ostream& os, const LoD& lod) { ...@@ -1394,45 +1386,50 @@ std::ostream& operator<<(std::ostream& os, const LoD& lod) {
return os; return os;
} }
std::ostream& operator<<(std::ostream& os, const Tensor& t) { } // namespace framework
} // namespace paddle
namespace pten {
std::ostream& operator<<(std::ostream& os, const pten::DenseTensor& t) {
if (t.lod().size() > 0) { if (t.lod().size() > 0) {
os << " - lod: " << t.lod() << "\n"; os << " - lod: " << t.lod() << "\n";
} }
os << " - place: " << t.place() << "\n"; os << " - place: " << t.place() << "\n";
os << " - shape: [" << t.dims() << "]\n"; os << " - shape: [" << t.dims() << "]\n";
os << " - layout: " << DataLayoutToString(t.layout()) << "\n"; os << " - layout: " << paddle::framework::DataLayoutToString(t.layout())
<< "\n";
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
os << " - format: " os << " - format: "
<< dnnl_fmt_tag2str(static_cast<dnnl_format_tag_t>(t.format())) << "\n"; << dnnl_fmt_tag2str(static_cast<dnnl_format_tag_t>(t.format())) << "\n";
#endif #endif
Tensor tensor; DenseTensor tensor;
tensor.Resize(t.dims()); tensor.Resize(t.dims());
if (platform::is_cpu_place(t.place())) { if (paddle::platform::is_cpu_place(t.place())) {
tensor.ShareDataWith(t); tensor.ShareDataWith(t);
} else { } else {
platform::CPUPlace place; paddle::platform::CPUPlace place;
framework::TensorCopy(t, place, &tensor); paddle::framework::TensorCopy(t, place, &tensor);
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); paddle::platform::DeviceContextPool& pool =
paddle::platform::DeviceContextPool::Instance();
auto& dev_ctx = *pool.Get(t.place()); auto& dev_ctx = *pool.Get(t.place());
dev_ctx.Wait(); dev_ctx.Wait();
} }
#define PrintTensorCallback(cpp_type, proto_type) \ #define PrintTensorCallback(cpp_type, proto_type) \
do { \ do { \
if (tensor.type() == proto_type) { \ if (tensor.type() == proto_type) { \
os << " - dtype: " << proto_type << "\n"; \ os << " - dtype: " << proto_type << "\n"; \
print_tensor<cpp_type>(os, tensor); \ paddle::framework::print_tensor<cpp_type>(os, tensor); \
return os; \ return os; \
} \ } \
} while (0) } while (0)
_ForEachDataType_(PrintTensorCallback); _ForEachDataType_(PrintTensorCallback);
VLOG(1) << "PrintVar: unrecognized data type:" << t.type(); VLOG(1) << "PrintVar: unrecognized data type:" << t.type();
return os; return os;
} }
}
} // namespace framework
} // namespace paddle
...@@ -39,9 +39,6 @@ limitations under the License. */ ...@@ -39,9 +39,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
std::ostream& operator<<(std::ostream& os, const LoD& lod);
std::ostream& operator<<(std::ostream& os, const Tensor& t);
class PrintOptions { class PrintOptions {
public: public:
static PrintOptions& Instance() { static PrintOptions& Instance() {
...@@ -76,12 +73,8 @@ void TensorFromStream(std::istream& is, Tensor* tensor, ...@@ -76,12 +73,8 @@ void TensorFromStream(std::istream& is, Tensor* tensor,
// If ctx_place and src_place are the same, src_ctx.Wait() is added // If ctx_place and src_place are the same, src_ctx.Wait() is added
// after memory::Copy; if ctx_place and dst_place are the same, // after memory::Copy; if ctx_place and dst_place are the same,
// src_ctx.Wait() is added before memory::Copy. // src_ctx.Wait() is added before memory::Copy.
class Tensor;
void TensorCopy(const Tensor& src, const platform::Place& dst_place, void TensorCopy(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, Tensor* dst); const platform::DeviceContext& ctx, Tensor* dst);
void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, pten::DenseTensor* dst);
// NOTE(zcd): If the src.place() and dst_place are two different GPU, // NOTE(zcd): If the src.place() and dst_place are two different GPU,
// the copy operation is carried out on the dst_place's stream. This is // the copy operation is carried out on the dst_place's stream. This is
...@@ -92,8 +85,6 @@ void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place, ...@@ -92,8 +85,6 @@ void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
// not completed. // not completed.
void TensorCopy(const Tensor& src, const platform::Place& dst_place, void TensorCopy(const Tensor& src, const platform::Place& dst_place,
Tensor* dst); Tensor* dst);
void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
pten::DenseTensor* dst);
void TensorCopySync(const Tensor& src, const platform::Place& dst_place, void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
Tensor* dst); Tensor* dst);
...@@ -469,5 +460,11 @@ inline void TensorToVector(const Tensor& src, std::vector<bool>* dst) { ...@@ -469,5 +460,11 @@ inline void TensorToVector(const Tensor& src, std::vector<bool>* dst) {
delete[] array; delete[] array;
} }
std::ostream& operator<<(std::ostream& os, const LoD& lod);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
namespace pten {
std::ostream& operator<<(std::ostream& os, const DenseTensor& t);
}
...@@ -40,7 +40,6 @@ namespace paddle { ...@@ -40,7 +40,6 @@ namespace paddle {
namespace framework { namespace framework {
class Dataset; class Dataset;
class Tensor;
class ProgramDesc; class ProgramDesc;
class PullDenseWorker; class PullDenseWorker;
class Scope; class Scope;
......
...@@ -47,6 +47,10 @@ ...@@ -47,6 +47,10 @@
#include "xpu/bkcl.h" #include "xpu/bkcl.h"
#endif #endif
namespace pten {
class DenseTensor;
} // namespace pten
// Users should add forward declarations here // Users should add forward declarations here
namespace paddle { namespace paddle {
...@@ -70,7 +74,6 @@ class BKCLCommunicator; ...@@ -70,7 +74,6 @@ class BKCLCommunicator;
namespace framework { namespace framework {
class LoDRankTable; class LoDRankTable;
class ScopeBase; class ScopeBase;
class Tensor;
class ReaderHolder; class ReaderHolder;
class Scope; class Scope;
class SelectedRows; class SelectedRows;
......
...@@ -29,9 +29,12 @@ ...@@ -29,9 +29,12 @@
DECLARE_bool(use_mkldnn); DECLARE_bool(use_mkldnn);
namespace pten {
class DenseTensor;
} // namespace pten
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class Variable; class Variable;
} // namespace framework } // namespace framework
namespace platform { namespace platform {
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <iostream> #include <iostream>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/string/string_helper.h" #include "paddle/fluid/string/string_helper.h"
...@@ -24,6 +25,7 @@ ...@@ -24,6 +25,7 @@
#include "paddle/fluid/imperative/parallel_context.h" #include "paddle/fluid/imperative/parallel_context.h"
#include "paddle/pten/core/dense_tensor.h"
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
...@@ -975,7 +977,8 @@ void Reducer::ProcessUnusedDenseVars() { ...@@ -975,7 +977,8 @@ void Reducer::ProcessUnusedDenseVars() {
auto *dest_grad_tensor = auto *dest_grad_tensor =
grad_var_base_tmp->MutableVar()->GetMutable<framework::LoDTensor>(); grad_var_base_tmp->MutableVar()->GetMutable<framework::LoDTensor>();
const auto *dev_ctx = platform::DeviceContextPool::Instance().Get(place_); const auto *dev_ctx = platform::DeviceContextPool::Instance().Get(place_);
TensorCopy(src_tensor, place_, *dev_ctx, dest_grad_tensor); paddle::framework::TensorCopy(src_tensor, place_, *dev_ctx,
dest_grad_tensor);
dest_grad_tensor->Resize(dest_dims); dest_grad_tensor->Resize(dest_dims);
} }
} }
......
...@@ -90,12 +90,12 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { ...@@ -90,12 +90,12 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
temp_tensor.mutable_data<float>(cpu_place); temp_tensor.mutable_data<float>(cpu_place);
// Copy the parameter data to a tmp tensor. // Copy the parameter data to a tmp tensor.
TensorCopySync(*t, cpu_place, &temp_tensor); paddle::framework::TensorCopySync(*t, cpu_place, &temp_tensor);
// Reallocation the space on GPU // Reallocation the space on GPU
t->clear(); t->clear();
// Copy parameter data to newly allocated GPU space. // Copy parameter data to newly allocated GPU space.
TensorCopySync(temp_tensor, place, t); paddle::framework::TensorCopySync(temp_tensor, place, t);
} }
} }
} }
......
...@@ -35,7 +35,6 @@ limitations under the License. */ ...@@ -35,7 +35,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class Scope; class Scope;
} // namespace framework } // namespace framework
......
...@@ -21,9 +21,12 @@ ...@@ -21,9 +21,12 @@
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable.h"
namespace pten {
class DenseTensor;
} // namespace pten
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class Scope; class Scope;
class SelectedRows; class SelectedRows;
} // namespace framework } // namespace framework
......
...@@ -122,7 +122,7 @@ void test_tensor_copy(const platform::DeviceContext& ctx) { ...@@ -122,7 +122,7 @@ void test_tensor_copy(const platform::DeviceContext& ctx) {
} }
#endif #endif
std::vector<float> result; std::vector<float> result;
TensorToVector(lod_tensor_n, ctx, &result); paddle::framework::TensorToVector(lod_tensor_n, ctx, &result);
ASSERT_EQ(result, vector); ASSERT_EQ(result, vector);
ASSERT_EQ(lod_tensor_n.lod(), lod_tensor.lod()); ASSERT_EQ(lod_tensor_n.lod(), lod_tensor.lod());
} }
...@@ -142,7 +142,7 @@ void test_tensor_share(const platform::DeviceContext& ctx) { ...@@ -142,7 +142,7 @@ void test_tensor_share(const platform::DeviceContext& ctx) {
framework::LoDTensor lod_tensor_n; framework::LoDTensor lod_tensor_n;
TensorCopyAsync(&lod_tensor_n, lite_api_tensor, ctx); TensorCopyAsync(&lod_tensor_n, lite_api_tensor, ctx);
std::vector<float> result; std::vector<float> result;
TensorToVector(lod_tensor_n, ctx, &result); paddle::framework::TensorToVector(lod_tensor_n, ctx, &result);
ASSERT_EQ(result, vector); ASSERT_EQ(result, vector);
ASSERT_EQ(lod_tensor_n.lod(), lod_tensor.lod()); ASSERT_EQ(lod_tensor_n.lod(), lod_tensor.lod());
} }
......
...@@ -82,10 +82,11 @@ class BatchNormOpConverter : public OpConverter { ...@@ -82,10 +82,11 @@ class BatchNormOpConverter : public OpConverter {
platform::CPUPlace cpu_place; platform::CPUPlace cpu_place;
// copy data from gpu to cpu // copy data from gpu to cpu
TensorCopySync((*Bias_t), cpu_place, &bias_tensor); paddle::framework::TensorCopySync((*Bias_t), cpu_place, &bias_tensor);
TensorCopySync((*Mean_t), cpu_place, &mean_tensor); paddle::framework::TensorCopySync((*Mean_t), cpu_place, &mean_tensor);
TensorCopySync((*Scale_t), cpu_place, &scale_tensor); paddle::framework::TensorCopySync((*Scale_t), cpu_place, &scale_tensor);
TensorCopySync((*Variance_t), cpu_place, &variance_tensor); paddle::framework::TensorCopySync((*Variance_t), cpu_place,
&variance_tensor);
auto* bias_data = bias_tensor.mutable_data<float>(platform::CPUPlace()); auto* bias_data = bias_tensor.mutable_data<float>(platform::CPUPlace());
auto* mean_data = mean_tensor.mutable_data<float>(platform::CPUPlace()); auto* mean_data = mean_tensor.mutable_data<float>(platform::CPUPlace());
......
...@@ -55,8 +55,8 @@ class LayerNormOpConverter : public OpConverter { ...@@ -55,8 +55,8 @@ class LayerNormOpConverter : public OpConverter {
scale_tensor->Resize(Scale_t->dims()); scale_tensor->Resize(Scale_t->dims());
platform::CPUPlace cpu_place; platform::CPUPlace cpu_place;
TensorCopySync((*Bias_t), cpu_place, &(*bias_tensor)); paddle::framework::TensorCopySync((*Bias_t), cpu_place, &(*bias_tensor));
TensorCopySync((*Scale_t), cpu_place, &(*scale_tensor)); paddle::framework::TensorCopySync((*Scale_t), cpu_place, &(*scale_tensor));
auto* bias_data = bias_tensor->mutable_data<float>(platform::CPUPlace()); auto* bias_data = bias_tensor->mutable_data<float>(platform::CPUPlace());
auto* scale_data = scale_tensor->mutable_data<float>(platform::CPUPlace()); auto* scale_data = scale_tensor->mutable_data<float>(platform::CPUPlace());
......
...@@ -46,7 +46,8 @@ class PReluOpConverter : public OpConverter { ...@@ -46,7 +46,8 @@ class PReluOpConverter : public OpConverter {
std::unique_ptr<framework::LoDTensor> alpha_tensor_temp( std::unique_ptr<framework::LoDTensor> alpha_tensor_temp(
new framework::LoDTensor()); new framework::LoDTensor());
alpha_tensor_temp->Resize(alpha_tensor->dims()); alpha_tensor_temp->Resize(alpha_tensor->dims());
TensorCopySync(*alpha_tensor, cpu_place, alpha_tensor_temp.get()); paddle::framework::TensorCopySync(*alpha_tensor, cpu_place,
alpha_tensor_temp.get());
float* alpha_data = alpha_tensor_temp->mutable_data<float>(cpu_place); float* alpha_data = alpha_tensor_temp->mutable_data<float>(cpu_place);
nvinfer1::ILayer* layer = nullptr; nvinfer1::ILayer* layer = nullptr;
......
...@@ -63,7 +63,7 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place, ...@@ -63,7 +63,7 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place,
*(temp_data + i) = random(0., 1.); *(temp_data + i) = random(0., 1.);
} }
TensorCopySync(temp_tensor, place, tensor); paddle::framework::TensorCopySync(temp_tensor, place, tensor);
} }
/* /*
......
...@@ -370,7 +370,8 @@ float *TensorRTEngine::GetWeightCPUData(const std::string &name, ...@@ -370,7 +370,8 @@ float *TensorRTEngine::GetWeightCPUData(const std::string &name,
name_with_suffix)); name_with_suffix));
weight_map[name_with_suffix].reset(new framework::Tensor()); weight_map[name_with_suffix].reset(new framework::Tensor());
weight_map[name_with_suffix]->Resize(weight_tensor->dims()); weight_map[name_with_suffix]->Resize(weight_tensor->dims());
TensorCopySync(*weight_tensor, cpu_place, weight_map[name_with_suffix].get()); paddle::framework::TensorCopySync(*weight_tensor, cpu_place,
weight_map[name_with_suffix].get());
float *weight_data = float *weight_data =
weight_map[name_with_suffix]->mutable_data<float>(cpu_place); weight_map[name_with_suffix]->mutable_data<float>(cpu_place);
name_suffix_counter += 1; name_suffix_counter += 1;
......
...@@ -35,12 +35,6 @@ limitations under the License. */ ...@@ -35,12 +35,6 @@ limitations under the License. */
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/utils/any.h" #include "paddle/utils/any.h"
namespace paddle {
namespace framework {
class Tensor;
} // namespace framework
} // namespace paddle
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace tensorrt { namespace tensorrt {
......
...@@ -41,12 +41,12 @@ class TensorRTEngineTest : public ::testing::Test { ...@@ -41,12 +41,12 @@ class TensorRTEngineTest : public ::testing::Test {
void PrepareInputOutput(const std::vector<float> &input, void PrepareInputOutput(const std::vector<float> &input,
std::vector<int> output_shape) { std::vector<int> output_shape) {
TensorFromVector(input, *ctx_, &input_); paddle::framework::TensorFromVector(input, *ctx_, &input_);
output_.Resize(framework::make_ddim(output_shape)); output_.Resize(framework::make_ddim(output_shape));
} }
void GetOutput(std::vector<float> *output) { void GetOutput(std::vector<float> *output) {
TensorToVector(output_, *ctx_, output); paddle::framework::TensorToVector(output_, *ctx_, output);
} }
protected: protected:
......
...@@ -50,9 +50,9 @@ class AbsKernel<platform::CUDADeviceContext, T> ...@@ -50,9 +50,9 @@ class AbsKernel<platform::CUDADeviceContext, T>
std::vector<const framework::Tensor*> ins = {x}; std::vector<const framework::Tensor*> ins = {x};
std::vector<framework::Tensor*> outs = {out}; std::vector<framework::Tensor*> outs = {out};
auto functor = CudaAbsFunctor<T>(); auto functor = CudaAbsFunctor<T>();
LaunchSameDimsElementwiseCudaKernel<ElementwiseType::kUnary, T, paddle::operators::LaunchSameDimsElementwiseCudaKernel<
math::Real<T>>(dev_ctx, ins, &outs, ElementwiseType::kUnary, T, math::Real<T>>(dev_ctx, ins, &outs,
functor); functor);
} }
}; };
......
...@@ -1368,14 +1368,14 @@ class ELUGradCudaKernel : public framework::OpKernel<T> { ...@@ -1368,14 +1368,14 @@ class ELUGradCudaKernel : public framework::OpKernel<T> {
if (alpha > 0) { if (alpha > 0) {
CudaELUGradFunctor<T> functor; CudaELUGradFunctor<T> functor;
functor.alpha = alpha; functor.alpha = alpha;
LaunchSameDimsElementwiseCudaKernel<ElementwiseType::kBinary, T, T>( paddle::operators::LaunchSameDimsElementwiseCudaKernel<
dev_ctx, ins, &outs, functor); ElementwiseType::kBinary, T, T>(dev_ctx, ins, &outs, functor);
} else { } else {
CudaELUGradNegativeAlphaFunctor<T> functor; CudaELUGradNegativeAlphaFunctor<T> functor;
functor.alpha = alpha; functor.alpha = alpha;
ins.push_back(x); ins.push_back(x);
LaunchSameDimsElementwiseCudaKernel<ElementwiseType::kBinary, T, T>( paddle::operators::LaunchSameDimsElementwiseCudaKernel<
dev_ctx, ins, &outs, functor); ElementwiseType::kBinary, T, T>(dev_ctx, ins, &outs, functor);
} }
} }
}; };
...@@ -1451,8 +1451,8 @@ class ActivationCudaKernel ...@@ -1451,8 +1451,8 @@ class ActivationCudaKernel
for (auto& attr : attrs) { for (auto& attr : attrs) {
*attr.second = ctx.Attr<float>(attr.first); *attr.second = ctx.Attr<float>(attr.first);
} }
LaunchSameDimsElementwiseCudaKernel<ElementwiseType::kUnary, T, T>( paddle::operators::LaunchSameDimsElementwiseCudaKernel<
dev_ctx, ins, &outs, functor); ElementwiseType::kUnary, T, T>(dev_ctx, ins, &outs, functor);
} }
}; };
...@@ -1481,17 +1481,17 @@ class ActivationGradCudaKernel ...@@ -1481,17 +1481,17 @@ class ActivationGradCudaKernel
if (static_cast<int>(Functor::FwdDeps()) == static_cast<int>(kDepOut)) { if (static_cast<int>(Functor::FwdDeps()) == static_cast<int>(kDepOut)) {
// Only need forward output Out // Only need forward output Out
ins.push_back(out); ins.push_back(out);
LaunchSameDimsElementwiseCudaKernel<ElementwiseType::kBinary, T, T>( paddle::operators::LaunchSameDimsElementwiseCudaKernel<
dev_ctx, ins, &outs, functor); ElementwiseType::kBinary, T, T>(dev_ctx, ins, &outs, functor);
} else if (static_cast<int>(Functor::FwdDeps()) == } else if (static_cast<int>(Functor::FwdDeps()) ==
static_cast<int>(kDepX)) { static_cast<int>(kDepX)) {
// Only need forward input X // Only need forward input X
ins.push_back(x); ins.push_back(x);
LaunchSameDimsElementwiseCudaKernel<ElementwiseType::kBinary, T, T>( paddle::operators::LaunchSameDimsElementwiseCudaKernel<
dev_ctx, ins, &outs, functor); ElementwiseType::kBinary, T, T>(dev_ctx, ins, &outs, functor);
} else { } else {
LaunchSameDimsElementwiseCudaKernel<ElementwiseType::kUnary, T, T>( paddle::operators::LaunchSameDimsElementwiseCudaKernel<
dev_ctx, ins, &outs, functor); ElementwiseType::kUnary, T, T>(dev_ctx, ins, &outs, functor);
} }
} }
}; };
......
...@@ -2696,8 +2696,8 @@ class PowKernel : public framework::OpKernel<typename Functor::ELEMENT_TYPE> { ...@@ -2696,8 +2696,8 @@ class PowKernel : public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
auto* factor_data = factor_tensor->data<float>(); auto* factor_data = factor_tensor->data<float>();
framework::Tensor cpu_factor_tensor; framework::Tensor cpu_factor_tensor;
if (platform::is_gpu_place(factor_tensor->place())) { if (platform::is_gpu_place(factor_tensor->place())) {
TensorCopySync(*factor_tensor, platform::CPUPlace(), framework::TensorCopySync(*factor_tensor, platform::CPUPlace(),
&cpu_factor_tensor); &cpu_factor_tensor);
factor_data = cpu_factor_tensor.data<float>(); factor_data = cpu_factor_tensor.data<float>();
} }
auto factor = auto factor =
...@@ -2751,8 +2751,8 @@ class PowGradKernel ...@@ -2751,8 +2751,8 @@ class PowGradKernel
auto* factor_data = factor_tensor->data<float>(); auto* factor_data = factor_tensor->data<float>();
framework::Tensor cpu_factor_tensor; framework::Tensor cpu_factor_tensor;
if (platform::is_gpu_place(factor_tensor->place())) { if (platform::is_gpu_place(factor_tensor->place())) {
TensorCopySync(*factor_tensor, platform::CPUPlace(), framework::TensorCopySync(*factor_tensor, platform::CPUPlace(),
&cpu_factor_tensor); &cpu_factor_tensor);
factor_data = cpu_factor_tensor.data<float>(); factor_data = cpu_factor_tensor.data<float>();
} }
auto factor = auto factor =
......
...@@ -50,7 +50,7 @@ void Update(const platform::NPUDeviceContext& ctx, ...@@ -50,7 +50,7 @@ void Update(const platform::NPUDeviceContext& ctx,
runner_p2.Run(stream); runner_p2.Run(stream);
std::vector<int> bad_out_data; std::vector<int> bad_out_data;
TensorToVector(*bad_out_tensor, ctx, &bad_out_data); paddle::framework::TensorToVector(*bad_out_tensor, ctx, &bad_out_data);
if (bad_out_data[0] >= decr_every_n_nan_or_inf) { if (bad_out_data[0] >= decr_every_n_nan_or_inf) {
const auto& runner_p3 = NpuOpRunner("Power", {*pre_loss_scaling_tensor}, const auto& runner_p3 = NpuOpRunner("Power", {*pre_loss_scaling_tensor},
{*updated_loss_scaling_tensor}, {*updated_loss_scaling_tensor},
...@@ -61,7 +61,8 @@ void Update(const platform::NPUDeviceContext& ctx, ...@@ -61,7 +61,8 @@ void Update(const platform::NPUDeviceContext& ctx,
runner_p3.Run(stream); runner_p3.Run(stream);
std::vector<T> new_loss_scaling; std::vector<T> new_loss_scaling;
TensorToVector(*updated_loss_scaling_tensor, ctx, &new_loss_scaling); paddle::framework::TensorToVector(*updated_loss_scaling_tensor, ctx,
&new_loss_scaling);
float min_value = 1.0; float min_value = 1.0;
if (FLAGS_min_loss_scaling > 1) { if (FLAGS_min_loss_scaling > 1) {
min_value = static_cast<float>(FLAGS_min_loss_scaling); min_value = static_cast<float>(FLAGS_min_loss_scaling);
...@@ -98,7 +99,7 @@ void Update(const platform::NPUDeviceContext& ctx, ...@@ -98,7 +99,7 @@ void Update(const platform::NPUDeviceContext& ctx,
runner_p2.Run(stream); runner_p2.Run(stream);
std::vector<int> good_out_data; std::vector<int> good_out_data;
TensorToVector(*good_out_tensor, ctx, &good_out_data); paddle::framework::TensorToVector(*good_out_tensor, ctx, &good_out_data);
if (good_out_data[0] >= incr_every_n_steps) { if (good_out_data[0] >= incr_every_n_steps) {
const auto& runner_p3 = NpuOpRunner("Power", {*pre_loss_scaling_tensor}, const auto& runner_p3 = NpuOpRunner("Power", {*pre_loss_scaling_tensor},
...@@ -109,7 +110,8 @@ void Update(const platform::NPUDeviceContext& ctx, ...@@ -109,7 +110,8 @@ void Update(const platform::NPUDeviceContext& ctx,
runner_p3.Run(stream); runner_p3.Run(stream);
std::vector<T> new_loss_scaling; std::vector<T> new_loss_scaling;
TensorToVector(*updated_loss_scaling_tensor, ctx, &new_loss_scaling); paddle::framework::TensorToVector(*updated_loss_scaling_tensor, ctx,
&new_loss_scaling);
if (!std::isfinite(new_loss_scaling[0])) { if (!std::isfinite(new_loss_scaling[0])) {
// updated_loss_scaling_data = pre_loss_scaling_data // updated_loss_scaling_data = pre_loss_scaling_data
const auto& runner_p4 = NpuOpRunner("Power", {*pre_loss_scaling_tensor}, const auto& runner_p4 = NpuOpRunner("Power", {*pre_loss_scaling_tensor},
...@@ -209,7 +211,8 @@ class UpdateLossScalingNPUKernel : public framework::OpKernel<T> { ...@@ -209,7 +211,8 @@ class UpdateLossScalingNPUKernel : public framework::OpKernel<T> {
"FoundInfinite must has only one element.")); "FoundInfinite must has only one element."));
std::vector<bool> found_inf_vec; std::vector<bool> found_inf_vec;
TensorToVector(*found_inf, ctx.device_context(), &found_inf_vec); paddle::framework::TensorToVector(*found_inf, ctx.device_context(),
&found_inf_vec);
LazyZerosNPU<T>{}(dev_ctx, found_inf_vec, xs, outs); LazyZerosNPU<T>{}(dev_ctx, found_inf_vec, xs, outs);
const bool stop_update = ctx.Attr<bool>("stop_update"); const bool stop_update = ctx.Attr<bool>("stop_update");
......
...@@ -16,10 +16,13 @@ ...@@ -16,10 +16,13 @@
#include "paddle/fluid/operators/controlflow/while_op_helper.h" #include "paddle/fluid/operators/controlflow/while_op_helper.h"
#include "paddle/fluid/operators/tensor_formatter.h" #include "paddle/fluid/operators/tensor_formatter.h"
namespace pten {
class DenseTensor;
} // namespace pten
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class InferShapeContext; class InferShapeContext;
class Tensor;
class OpDesc; class OpDesc;
class Scope; class Scope;
class Variable; class Variable;
......
...@@ -25,9 +25,12 @@ class DeviceContext; ...@@ -25,9 +25,12 @@ class DeviceContext;
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
namespace pten {
class DenseTensor;
} // namespace pten
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class Variable; class Variable;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -76,7 +79,7 @@ class AssignFunctor { ...@@ -76,7 +79,7 @@ class AssignFunctor {
framework::LoDTensor *out) const { framework::LoDTensor *out) const {
if (lod_tensor.numel() == 0) return; if (lod_tensor.numel() == 0) return;
auto &out_tensor = *out; auto &out_tensor = *out;
TensorCopy(lod_tensor, lod_tensor.place(), &out_tensor); paddle::framework::TensorCopy(lod_tensor, lod_tensor.place(), &out_tensor);
out_tensor.set_lod(lod_tensor.lod()); out_tensor.set_lod(lod_tensor.lod());
} }
......
...@@ -47,7 +47,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, ...@@ -47,7 +47,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx,
init.push_back(static_cast<T>(3.0)); init.push_back(static_cast<T>(3.0));
init.push_back(static_cast<T>(4.0)); init.push_back(static_cast<T>(4.0));
TensorFromVector(init, ctx, tensor_x); paddle::framework::TensorFromVector(init, ctx, tensor_x);
tensor_x->Resize({4}); tensor_x->Resize({4});
ctx.Wait(); ctx.Wait();
...@@ -62,7 +62,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, ...@@ -62,7 +62,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx,
op->Run(*scope, place); op->Run(*scope, place);
std::vector<T> out_vec; std::vector<T> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec); paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec);
ctx.Wait(); ctx.Wait();
......
...@@ -382,7 +382,8 @@ class BatchNormKernel<platform::CUDADeviceContext, T> ...@@ -382,7 +382,8 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
if (ctx.HasInput("MomentumTensor")) { if (ctx.HasInput("MomentumTensor")) {
const auto *mom_tensor = ctx.Input<Tensor>("MomentumTensor"); const auto *mom_tensor = ctx.Input<Tensor>("MomentumTensor");
Tensor mom_cpu; Tensor mom_cpu;
TensorCopySync(*mom_tensor, platform::CPUPlace(), &mom_cpu); paddle::framework::TensorCopySync(*mom_tensor, platform::CPUPlace(),
&mom_cpu);
momentum = mom_cpu.data<float>()[0]; momentum = mom_cpu.data<float>()[0];
} }
......
...@@ -86,7 +86,8 @@ class NPUBatchNormOpKernel : public framework::OpKernel<T> { ...@@ -86,7 +86,8 @@ class NPUBatchNormOpKernel : public framework::OpKernel<T> {
if (ctx.HasInput("MomentumTensor")) { if (ctx.HasInput("MomentumTensor")) {
const auto *mom_tensor = ctx.Input<Tensor>("MomentumTensor"); const auto *mom_tensor = ctx.Input<Tensor>("MomentumTensor");
Tensor mom_cpu; Tensor mom_cpu;
TensorCopySync(*mom_tensor, platform::CPUPlace(), &mom_cpu); paddle::framework::TensorCopySync(*mom_tensor, platform::CPUPlace(),
&mom_cpu);
momentum = mom_cpu.data<float>()[0]; momentum = mom_cpu.data<float>()[0];
} }
......
...@@ -87,7 +87,8 @@ class BatchNormXPUKernel : public framework::OpKernel<T> { ...@@ -87,7 +87,8 @@ class BatchNormXPUKernel : public framework::OpKernel<T> {
if (ctx.HasInput("MomentumTensor")) { if (ctx.HasInput("MomentumTensor")) {
const auto *mom_tensor = ctx.Input<Tensor>("MomentumTensor"); const auto *mom_tensor = ctx.Input<Tensor>("MomentumTensor");
Tensor mom_cpu; Tensor mom_cpu;
TensorCopySync(*mom_tensor, platform::CPUPlace(), &mom_cpu); paddle::framework::TensorCopySync(*mom_tensor, platform::CPUPlace(),
&mom_cpu);
momentum = mom_tensor->data<float>()[0]; momentum = mom_tensor->data<float>()[0];
} }
......
...@@ -91,8 +91,8 @@ class BCELossGradCUDAKernel : public framework::OpKernel<T> { ...@@ -91,8 +91,8 @@ class BCELossGradCUDAKernel : public framework::OpKernel<T> {
std::vector<framework::Tensor*> outs = {dx}; std::vector<framework::Tensor*> outs = {dx};
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>(); auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
auto functor = BCELossGradFunctor<T>(); auto functor = BCELossGradFunctor<T>();
LaunchSameDimsElementwiseCudaKernel<ElementwiseType::kTernary, T, T>( paddle::operators::LaunchSameDimsElementwiseCudaKernel<
dev_ctx, ins, &outs, functor); ElementwiseType::kTernary, T, T>(dev_ctx, ins, &outs, functor);
} }
}; };
......
...@@ -308,7 +308,7 @@ void OpTester::SetupTensor(framework::LoDTensor *tensor, ...@@ -308,7 +308,7 @@ void OpTester::SetupTensor(framework::LoDTensor *tensor,
} }
if (!platform::is_cpu_place(place_)) { if (!platform::is_cpu_place(place_)) {
TensorCopySync(cpu_tensor, place_, tensor); paddle::framework::TensorCopySync(cpu_tensor, place_, tensor);
} }
} }
......
...@@ -77,8 +77,10 @@ void BincountCUDAInner(const framework::ExecutionContext& context) { ...@@ -77,8 +77,10 @@ void BincountCUDAInner(const framework::ExecutionContext& context) {
input_min_scala.device(*place) = input_x.minimum(); input_min_scala.device(*place) = input_x.minimum();
Tensor input_min_cpu, input_max_cpu; Tensor input_min_cpu, input_max_cpu;
TensorCopySync(input_max_t, platform::CPUPlace(), &input_max_cpu); paddle::framework::TensorCopySync(input_max_t, platform::CPUPlace(),
TensorCopySync(input_min_t, platform::CPUPlace(), &input_min_cpu); &input_max_cpu);
paddle::framework::TensorCopySync(input_min_t, platform::CPUPlace(),
&input_min_cpu);
InputT input_min = input_min_cpu.data<InputT>()[0]; InputT input_min = input_min_cpu.data<InputT>()[0];
......
...@@ -100,7 +100,7 @@ void CopyInputDataToPlace(const framework::Scope& scope, ...@@ -100,7 +100,7 @@ void CopyInputDataToPlace(const framework::Scope& scope,
for (const auto& var_name : scope.LocalVarNames()) { for (const auto& var_name : scope.LocalVarNames()) {
const auto& src_tensor = scope.GetVar(var_name)->Get<LoDTensor>(); const auto& src_tensor = scope.GetVar(var_name)->Get<LoDTensor>();
auto* dst_tensor = dst_scope->Var(var_name)->GetMutable<LoDTensor>(); auto* dst_tensor = dst_scope->Var(var_name)->GetMutable<LoDTensor>();
TensorCopySync(src_tensor, dst_place, dst_tensor); paddle::framework::TensorCopySync(src_tensor, dst_place, dst_tensor);
} }
} }
...@@ -135,10 +135,12 @@ TEST(CinnLaunchOpTest, TestElementwiseAddPass) { ...@@ -135,10 +135,12 @@ TEST(CinnLaunchOpTest, TestElementwiseAddPass) {
elementwise_add_op->Run(scope, run_place); elementwise_add_op->Run(scope, run_place);
LoDTensor test_out, expected_out; LoDTensor test_out, expected_out;
TensorCopySync(scope.Var(test_out_name)->Get<LoDTensor>(), paddle::framework::TensorCopySync(
platform::CPUPlace(), &test_out); scope.Var(test_out_name)->Get<LoDTensor>(), platform::CPUPlace(),
TensorCopySync(scope.Var(expected_out_name)->Get<LoDTensor>(), &test_out);
platform::CPUPlace(), &expected_out); paddle::framework::TensorCopySync(
scope.Var(expected_out_name)->Get<LoDTensor>(), platform::CPUPlace(),
&expected_out);
ASSERT_TRUE(test_out.IsInitialized()); ASSERT_TRUE(test_out.IsInitialized());
ASSERT_TRUE(expected_out.IsInitialized()); ASSERT_TRUE(expected_out.IsInitialized());
......
...@@ -64,7 +64,8 @@ class ClipKernel : public framework::OpKernel<T> { ...@@ -64,7 +64,8 @@ class ClipKernel : public framework::OpKernel<T> {
auto* max_t = context.Input<Tensor>("Max"); auto* max_t = context.Input<Tensor>("Max");
auto* max_data = max_t->data<T>(); auto* max_data = max_t->data<T>();
if (platform::is_gpu_place(max_t->place())) { if (platform::is_gpu_place(max_t->place())) {
TensorCopySync(*max_t, platform::CPUPlace(), &max_cpu); paddle::framework::TensorCopySync(*max_t, platform::CPUPlace(),
&max_cpu);
max_data = max_cpu.data<T>(); max_data = max_cpu.data<T>();
} }
max = max_data[0]; max = max_data[0];
...@@ -77,7 +78,8 @@ class ClipKernel : public framework::OpKernel<T> { ...@@ -77,7 +78,8 @@ class ClipKernel : public framework::OpKernel<T> {
auto* min_t = context.Input<Tensor>("Min"); auto* min_t = context.Input<Tensor>("Min");
auto* min_data = min_t->data<T>(); auto* min_data = min_t->data<T>();
if (platform::is_gpu_place(min_t->place())) { if (platform::is_gpu_place(min_t->place())) {
TensorCopySync(*min_t, platform::CPUPlace(), &min_cpu); paddle::framework::TensorCopySync(*min_t, platform::CPUPlace(),
&min_cpu);
min_data = min_cpu.data<T>(); min_data = min_cpu.data<T>();
} }
min = min_data[0]; min = min_data[0];
...@@ -101,7 +103,8 @@ class ClipKernel : public framework::OpKernel<T> { ...@@ -101,7 +103,8 @@ class ClipKernel : public framework::OpKernel<T> {
std::vector<const framework::Tensor*> ins = {x}; std::vector<const framework::Tensor*> ins = {x};
std::vector<framework::Tensor*> outs = {out}; std::vector<framework::Tensor*> outs = {out};
auto functor = ClipFunctor<T>(min, max); auto functor = ClipFunctor<T>(min, max);
LaunchSameDimsElementwiseCudaKernel<ElementwiseType::kUnary, T, T>( paddle::operators::LaunchSameDimsElementwiseCudaKernel<
ElementwiseType::kUnary, T, T>(
context.template device_context<platform::CUDADeviceContext>(), ins, context.template device_context<platform::CUDADeviceContext>(), ins,
&outs, functor); &outs, functor);
#endif #endif
...@@ -141,7 +144,8 @@ class ClipGradKernel : public framework::OpKernel<T> { ...@@ -141,7 +144,8 @@ class ClipGradKernel : public framework::OpKernel<T> {
auto* max_t = context.Input<Tensor>("Max"); auto* max_t = context.Input<Tensor>("Max");
auto* max_data = max_t->data<T>(); auto* max_data = max_t->data<T>();
if (platform::is_gpu_place(max_t->place())) { if (platform::is_gpu_place(max_t->place())) {
TensorCopySync(*max_t, platform::CPUPlace(), &max_cpu); paddle::framework::TensorCopySync(*max_t, platform::CPUPlace(),
&max_cpu);
max_data = max_cpu.data<T>(); max_data = max_cpu.data<T>();
} }
max = max_data[0]; max = max_data[0];
...@@ -154,7 +158,8 @@ class ClipGradKernel : public framework::OpKernel<T> { ...@@ -154,7 +158,8 @@ class ClipGradKernel : public framework::OpKernel<T> {
auto* min_t = context.Input<Tensor>("Min"); auto* min_t = context.Input<Tensor>("Min");
auto* min_data = min_t->data<T>(); auto* min_data = min_t->data<T>();
if (platform::is_gpu_place(min_t->place())) { if (platform::is_gpu_place(min_t->place())) {
TensorCopySync(*min_t, platform::CPUPlace(), &min_cpu); paddle::framework::TensorCopySync(*min_t, platform::CPUPlace(),
&min_cpu);
min_data = min_cpu.data<T>(); min_data = min_cpu.data<T>();
} }
min = min_data[0]; min = min_data[0];
......
...@@ -36,7 +36,8 @@ class ClipXPUKernel : public framework::OpKernel<T> { ...@@ -36,7 +36,8 @@ class ClipXPUKernel : public framework::OpKernel<T> {
auto* max_t = ctx.Input<Tensor>("Max"); auto* max_t = ctx.Input<Tensor>("Max");
auto* max_data = max_t->data<T>(); auto* max_data = max_t->data<T>();
if (platform::is_xpu_place(max_t->place())) { if (platform::is_xpu_place(max_t->place())) {
TensorCopySync(*max_t, platform::CPUPlace(), &max_cpu); paddle::framework::TensorCopySync(*max_t, platform::CPUPlace(),
&max_cpu);
max_data = max_cpu.data<T>(); max_data = max_cpu.data<T>();
} }
max = max_data[0]; max = max_data[0];
...@@ -48,7 +49,8 @@ class ClipXPUKernel : public framework::OpKernel<T> { ...@@ -48,7 +49,8 @@ class ClipXPUKernel : public framework::OpKernel<T> {
auto* min_t = ctx.Input<Tensor>("Min"); auto* min_t = ctx.Input<Tensor>("Min");
auto* min_data = min_t->data<T>(); auto* min_data = min_t->data<T>();
if (platform::is_xpu_place(min_t->place())) { if (platform::is_xpu_place(min_t->place())) {
TensorCopySync(*min_t, platform::CPUPlace(), &min_cpu); paddle::framework::TensorCopySync(*min_t, platform::CPUPlace(),
&min_cpu);
min_data = min_cpu.data<T>(); min_data = min_cpu.data<T>();
} }
min = min_data[0]; min = min_data[0];
......
...@@ -139,7 +139,7 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -139,7 +139,7 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) {
} }
PrintDebugInfo("input data", init); PrintDebugInfo("input data", init);
TensorFromVector(init, ctx, tensor_x); paddle::framework::TensorFromVector(init, ctx, tensor_x);
tensor_x->Resize({num1, num2}); tensor_x->Resize({num1, num2});
ctx.Wait(); ctx.Wait();
...@@ -165,7 +165,7 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -165,7 +165,7 @@ void TestHCCLAllGatherOp(f::Scope* scope, const p::DeviceContext& ctx) {
ctx.Wait(); ctx.Wait();
std::vector<float> out_vec; std::vector<float> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec); paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec);
ctx.Wait(); ctx.Wait();
PrintDebugInfo("output data", out_vec); PrintDebugInfo("output data", out_vec);
......
...@@ -139,7 +139,7 @@ void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -139,7 +139,7 @@ void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx) {
} }
PrintDebugInfo("input data", init); PrintDebugInfo("input data", init);
TensorFromVector(init, ctx, tensor_x); paddle::framework::TensorFromVector(init, ctx, tensor_x);
tensor_x->Resize({num1, num2}); tensor_x->Resize({num1, num2});
ctx.Wait(); ctx.Wait();
...@@ -164,7 +164,7 @@ void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -164,7 +164,7 @@ void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx) {
ctx.Wait(); ctx.Wait();
std::vector<float> out_vec; std::vector<float> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec); paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec);
ctx.Wait(); ctx.Wait();
PrintDebugInfo("output data", out_vec); PrintDebugInfo("output data", out_vec);
......
...@@ -144,7 +144,7 @@ inline bool ContainsNan(const paddle::platform::NPUDeviceContext& dev_ctx, ...@@ -144,7 +144,7 @@ inline bool ContainsNan(const paddle::platform::NPUDeviceContext& dev_ctx,
try { try {
const auto& runner_mean = paddle::operators::NpuOpRunner( const auto& runner_mean = paddle::operators::NpuOpRunner(
"ReduceMeanD", {*in}, {mean}, {{"axes", axes}, {"keep_dims", false}}); "ReduceMeanD", {*in}, {mean}, {{"axes", axes}, {"keep_dims", false}});
TensorToVector(mean, dev_ctx, &vec); paddle::framework::TensorToVector(mean, dev_ctx, &vec);
} catch (...) { } catch (...) {
LOG(WARNING) << "ContainsNan catch exception"; LOG(WARNING) << "ContainsNan catch exception";
return true; return true;
......
...@@ -146,7 +146,7 @@ void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx, ...@@ -146,7 +146,7 @@ void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx,
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
TensorFromVector(init, ctx, tensor_x); paddle::framework::TensorFromVector(init, ctx, tensor_x);
tensor_x->Resize({num1, num2}); tensor_x->Resize({num1, num2});
ctx.Wait(); ctx.Wait();
...@@ -170,7 +170,7 @@ void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx, ...@@ -170,7 +170,7 @@ void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx,
ctx.Wait(); ctx.Wait();
std::vector<T> out_vec; std::vector<T> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec); paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec);
ctx.Wait(); ctx.Wait();
PrintDebugInfo("output data", out_vec); PrintDebugInfo("output data", out_vec);
......
...@@ -133,7 +133,7 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -133,7 +133,7 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) {
} }
PrintDebugInfo("input data", init); PrintDebugInfo("input data", init);
TensorFromVector(init, ctx, tensor_x); paddle::framework::TensorFromVector(init, ctx, tensor_x);
tensor_x->Resize({num, num}); tensor_x->Resize({num, num});
ctx.Wait(); ctx.Wait();
...@@ -159,7 +159,7 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -159,7 +159,7 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) {
ctx.Wait(); ctx.Wait();
std::vector<float> out_vec; std::vector<float> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec); paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec);
ctx.Wait(); ctx.Wait();
PrintDebugInfo("output data", out_vec); PrintDebugInfo("output data", out_vec);
......
...@@ -71,8 +71,8 @@ void shard_index(const Tensor &table_t, const Tensor &ids_t, int64_t start_idx, ...@@ -71,8 +71,8 @@ void shard_index(const Tensor &table_t, const Tensor &ids_t, int64_t start_idx,
#if (CANN_VERSION_CODE >= 503003) #if (CANN_VERSION_CODE >= 503003)
Tensor factor_tensor(ids_t.type()); Tensor factor_tensor(ids_t.type());
factor_tensor.mutable_data<T>({1}, context.GetPlace()); factor_tensor.mutable_data<T>({1}, context.GetPlace());
TensorFromVector(std::vector<T>{static_cast<T>(start_idx)}, paddle::framework::TensorFromVector(std::vector<T>{static_cast<T>(start_idx)},
context.device_context(), &factor_tensor); context.device_context(), &factor_tensor);
sub_runner.SetType("Sub") sub_runner.SetType("Sub")
.AddInput(ids_t) .AddInput(ids_t)
.AddInput(factor_tensor) .AddInput(factor_tensor)
......
...@@ -48,7 +48,7 @@ class CIdentityOpKernel : public framework::OpKernel<T> { ...@@ -48,7 +48,7 @@ class CIdentityOpKernel : public framework::OpKernel<T> {
"The ring_id (%d) for c_identity op must be non-negative.", rid)); "The ring_id (%d) for c_identity op must be non-negative.", rid));
out->mutable_data<T>(ctx.GetPlace()); out->mutable_data<T>(ctx.GetPlace());
TensorCopy(*x, out->place(), out); paddle::framework::TensorCopy(*x, out->place(), out);
} }
}; };
......
...@@ -137,7 +137,7 @@ void TestHCCLReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) { ...@@ -137,7 +137,7 @@ void TestHCCLReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) {
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
TensorFromVector(init, ctx, tensor_x); paddle::framework::TensorFromVector(init, ctx, tensor_x);
tensor_x->Resize({num1, num2}); tensor_x->Resize({num1, num2});
ctx.Wait(); ctx.Wait();
...@@ -161,7 +161,7 @@ void TestHCCLReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) { ...@@ -161,7 +161,7 @@ void TestHCCLReduceOp(f::Scope* scope, const p::DeviceContext& ctx, int iter) {
ctx.Wait(); ctx.Wait();
std::vector<float> out_vec; std::vector<float> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec); paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec);
ctx.Wait(); ctx.Wait();
PrintDebugInfo("output data", out_vec); PrintDebugInfo("output data", out_vec);
......
...@@ -137,7 +137,7 @@ void TestHCCLReduceScatterOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -137,7 +137,7 @@ void TestHCCLReduceScatterOp(f::Scope* scope, const p::DeviceContext& ctx) {
} }
PrintDebugInfo("input data", init); PrintDebugInfo("input data", init);
TensorFromVector(init, ctx, tensor_x); paddle::framework::TensorFromVector(init, ctx, tensor_x);
tensor_x->Resize({num1, num2}); tensor_x->Resize({num1, num2});
ctx.Wait(); ctx.Wait();
...@@ -166,7 +166,7 @@ void TestHCCLReduceScatterOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -166,7 +166,7 @@ void TestHCCLReduceScatterOp(f::Scope* scope, const p::DeviceContext& ctx) {
} }
std::vector<float> out_vec; std::vector<float> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec); paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec);
ctx.Wait(); ctx.Wait();
PrintDebugInfo("output data", out_vec); PrintDebugInfo("output data", out_vec);
......
...@@ -56,9 +56,9 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -56,9 +56,9 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
init_y.push_back(static_cast<T>(2.0)); init_y.push_back(static_cast<T>(2.0));
} }
TensorFromVector(init_x, ctx, tensor_x); paddle::framework::TensorFromVector(init_x, ctx, tensor_x);
tensor_x->Resize({10, 10}); tensor_x->Resize({10, 10});
TensorFromVector(init_y, ctx, tensor_y); paddle::framework::TensorFromVector(init_y, ctx, tensor_y);
tensor_y->Resize({10, 10}); tensor_y->Resize({10, 10});
f::AttributeMap attrs; f::AttributeMap attrs;
...@@ -85,7 +85,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -85,7 +85,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
sync_op->Run(*scope, place); sync_op->Run(*scope, place);
std::vector<T> out_vec; std::vector<T> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec); paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec);
// sync op copy // sync op copy
auto sync_op2 = f::OpRegistry::CreateOp("c_sync_calc_stream", {{"X", {"X"}}}, auto sync_op2 = f::OpRegistry::CreateOp("c_sync_calc_stream", {{"X", {"X"}}},
......
...@@ -136,7 +136,7 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -136,7 +136,7 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) {
} }
std::cout << std::endl; std::cout << std::endl;
TensorFromVector(init, ctx, tensor_x); paddle::framework::TensorFromVector(init, ctx, tensor_x);
tensor_x->Resize({num, num}); tensor_x->Resize({num, num});
ctx.Wait(); ctx.Wait();
...@@ -169,7 +169,7 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -169,7 +169,7 @@ void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) {
// ctx.Wait(); // ctx.Wait();
std::vector<float> out_vec; std::vector<float> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec); paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec);
EXPECT_EQ(out_vec.size(), init.size()); EXPECT_EQ(out_vec.size(), init.size());
for (uint32_t i = 0; i < out_vec.size(); i++) { for (uint32_t i = 0; i < out_vec.size(); i++) {
......
...@@ -64,7 +64,7 @@ bool Check(T value, int size = 2 * 512 * 8192) { ...@@ -64,7 +64,7 @@ bool Check(T value, int size = 2 * 512 * 8192) {
init.push_back(static_cast<T>(value)); init.push_back(static_cast<T>(value));
} }
TensorFromVector(init, ctx, tensor_x); paddle::framework::TensorFromVector(init, ctx, tensor_x);
bool result = paddle::operators::ContainsNan(ctx, ctx.stream(), tensor_x); bool result = paddle::operators::ContainsNan(ctx, ctx.stream(), tensor_x);
return result; return result;
} }
......
...@@ -145,7 +145,7 @@ void TestHcomRecvOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -145,7 +145,7 @@ void TestHcomRecvOp(f::Scope* scope, const p::DeviceContext& ctx) {
} }
VLOG(3) << "Run op recv_v2"; VLOG(3) << "Run op recv_v2";
std::vector<float> out_vec; std::vector<float> out_vec;
TensorToVector(*tensor_out, ctx, &out_vec); paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec);
ctx.Wait(); ctx.Wait();
std::vector<float> init(num * num, 1.0 * atoi(getenv("DEST_RANK"))); std::vector<float> init(num * num, 1.0 * atoi(getenv("DEST_RANK")));
EXPECT_EQ(out_vec == init, true); EXPECT_EQ(out_vec == init, true);
......
...@@ -119,7 +119,7 @@ void TestHcomSendOp(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -119,7 +119,7 @@ void TestHcomSendOp(f::Scope* scope, const p::DeviceContext& ctx) {
std::vector<float> init(num * num, 1.0 * atoi(getenv("DEST_RANK"))); std::vector<float> init(num * num, 1.0 * atoi(getenv("DEST_RANK")));
int rank_id = atoi(getenv("RANK_ID")); int rank_id = atoi(getenv("RANK_ID"));
VLOG(3) << "rank id:" << rank_id; VLOG(3) << "rank id:" << rank_id;
TensorFromVector(init, ctx, tensor_x); paddle::framework::TensorFromVector(init, ctx, tensor_x);
tensor_x->Resize({num, num}); tensor_x->Resize({num, num});
ctx.Wait(); ctx.Wait();
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
......
...@@ -35,8 +35,9 @@ class BinaryBitwiseOpKernel<platform::CUDADeviceContext, Functor> ...@@ -35,8 +35,9 @@ class BinaryBitwiseOpKernel<platform::CUDADeviceContext, Functor>
std::vector<framework::Tensor*> outs = {out}; std::vector<framework::Tensor*> outs = {out};
const auto& cuda_ctx = const auto& cuda_ctx =
ctx.template device_context<platform::CUDADeviceContext>(); ctx.template device_context<platform::CUDADeviceContext>();
LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>( paddle::operators::LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T,
cuda_ctx, ins, &outs, -1, functor); T>(cuda_ctx, ins, &outs, -1,
functor);
} }
}; };
...@@ -56,8 +57,8 @@ class UnaryBitwiseOpKernel<platform::CUDADeviceContext, Functor> ...@@ -56,8 +57,8 @@ class UnaryBitwiseOpKernel<platform::CUDADeviceContext, Functor>
std::vector<framework::Tensor*> outs = {out}; std::vector<framework::Tensor*> outs = {out};
const auto& cuda_ctx = const auto& cuda_ctx =
ctx.template device_context<platform::CUDADeviceContext>(); ctx.template device_context<platform::CUDADeviceContext>();
LaunchSameDimsElementwiseCudaKernel<ElementwiseType::kUnary, T, T>( paddle::operators::LaunchSameDimsElementwiseCudaKernel<
cuda_ctx, ins, &outs, functor); ElementwiseType::kUnary, T, T>(cuda_ctx, ins, &outs, functor);
} }
}; };
......
...@@ -55,8 +55,8 @@ class CompareReduceOpKernel ...@@ -55,8 +55,8 @@ class CompareReduceOpKernel
context.template device_context<platform::CUDADeviceContext>(); context.template device_context<platform::CUDADeviceContext>();
std::vector<const framework::Tensor*> ins = {x, y}; std::vector<const framework::Tensor*> ins = {x, y};
std::vector<framework::Tensor*> outs = {&tmp}; std::vector<framework::Tensor*> outs = {&tmp};
LaunchSameDimsElementwiseCudaKernel<ElementwiseType::kBinary, T, bool>( paddle::operators::LaunchSameDimsElementwiseCudaKernel<
cuda_ctx, ins, &outs, Functor()); ElementwiseType::kBinary, T, bool>(cuda_ctx, ins, &outs, Functor());
// Reduce by 'bitwise and' operator // Reduce by 'bitwise and' operator
std::vector<int> reduce_dims; std::vector<int> reduce_dims;
......
...@@ -35,7 +35,8 @@ class CompareOpKernel<platform::CUDADeviceContext, Functor, InverseFunctor> ...@@ -35,7 +35,8 @@ class CompareOpKernel<platform::CUDADeviceContext, Functor, InverseFunctor>
ctx.template device_context<platform::CUDADeviceContext>(); ctx.template device_context<platform::CUDADeviceContext>();
int axis = PackTensorsIntoVector<OutT>(ctx, &ins, &outs); int axis = PackTensorsIntoVector<OutT>(ctx, &ins, &outs);
LaunchElementwiseCudaKernel<ElementwiseType::kBinary, InT, OutT>( paddle::operators::LaunchElementwiseCudaKernel<ElementwiseType::kBinary,
InT, OutT>(
cuda_ctx, ins, &outs, axis, functor); cuda_ctx, ins, &outs, axis, functor);
} }
}; };
......
...@@ -39,12 +39,13 @@ static void DataCopy(const framework::LoDTensor &src_item, ...@@ -39,12 +39,13 @@ static void DataCopy(const framework::LoDTensor &src_item,
: paddle::platform::MKLDNNDeviceContext::tls() : paddle::platform::MKLDNNDeviceContext::tls()
.get_cur_paddle_data_layout(), .get_cur_paddle_data_layout(),
src_item, &out, platform::CPUPlace()); src_item, &out, platform::CPUPlace());
TensorCopySync(out, platform::CPUPlace(), dst_item); paddle::framework::TensorCopySync(out, platform::CPUPlace(), dst_item);
} else { } else {
TensorCopySync(src_item, platform::CPUPlace(), dst_item); paddle::framework::TensorCopySync(src_item, platform::CPUPlace(),
dst_item);
} }
#else #else
TensorCopySync(src_item, platform::CPUPlace(), dst_item); paddle::framework::TensorCopySync(src_item, platform::CPUPlace(), dst_item);
#endif #endif
} else { } else {
// Not copy, if the src tensor is empty. // Not copy, if the src tensor is empty.
......
...@@ -50,12 +50,13 @@ static void DeepCopy(const framework::LoDTensor &src_item, ...@@ -50,12 +50,13 @@ static void DeepCopy(const framework::LoDTensor &src_item,
: paddle::platform::MKLDNNDeviceContext::tls() : paddle::platform::MKLDNNDeviceContext::tls()
.get_cur_paddle_data_layout(), .get_cur_paddle_data_layout(),
src_item, &out, platform::CPUPlace()); src_item, &out, platform::CPUPlace());
TensorCopySync(out, platform::CPUPlace(), dst_item); paddle::framework::TensorCopySync(out, platform::CPUPlace(), dst_item);
} else { } else {
TensorCopySync(src_item, platform::CPUPlace(), dst_item); paddle::framework::TensorCopySync(src_item, platform::CPUPlace(),
dst_item);
} }
#else #else
TensorCopySync(src_item, platform::CPUPlace(), dst_item); paddle::framework::TensorCopySync(src_item, platform::CPUPlace(), dst_item);
#endif #endif
} else { } else {
// Not copy, if the src tensor is empty. // Not copy, if the src tensor is empty.
......
...@@ -34,10 +34,12 @@ class BinaryLogicalOpKernel<platform::CUDADeviceContext, Functor> ...@@ -34,10 +34,12 @@ class BinaryLogicalOpKernel<platform::CUDADeviceContext, Functor>
int axis = PackTensorsIntoVector<OutT>(ctx, &ins, &outs); int axis = PackTensorsIntoVector<OutT>(ctx, &ins, &outs);
if (ins.size() == 1) { if (ins.size() == 1) {
LaunchElementwiseCudaKernel<ElementwiseType::kUnary, InT, OutT>( paddle::operators::LaunchElementwiseCudaKernel<ElementwiseType::kUnary,
InT, OutT>(
cuda_ctx, ins, &outs, axis, functor); cuda_ctx, ins, &outs, axis, functor);
} else { } else {
LaunchElementwiseCudaKernel<ElementwiseType::kBinary, InT, OutT>( paddle::operators::LaunchElementwiseCudaKernel<ElementwiseType::kBinary,
InT, OutT>(
cuda_ctx, ins, &outs, axis, functor); cuda_ctx, ins, &outs, axis, functor);
} }
} }
......
...@@ -55,7 +55,7 @@ class WriteToArrayOp : public ArrayOp { ...@@ -55,7 +55,7 @@ class WriteToArrayOp : public ArrayOp {
platform::DeviceContextPool::Instance(); platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(place); auto &dev_ctx = *pool.Get(place);
TensorCopy(x_tensor, place, dev_ctx, out_tensor); paddle::framework::TensorCopy(x_tensor, place, dev_ctx, out_tensor);
} else { } else {
VLOG(10) << "WARNING: The input tensor 'x_tensor' holds no memory, so " VLOG(10) << "WARNING: The input tensor 'x_tensor' holds no memory, so "
"nothing has been written to output array[" "nothing has been written to output array["
......
...@@ -22,9 +22,12 @@ ...@@ -22,9 +22,12 @@
#include "paddle/fluid/operators/controlflow/op_variant.h" #include "paddle/fluid/operators/controlflow/op_variant.h"
#include "paddle/fluid/platform/variant.h" #include "paddle/fluid/platform/variant.h"
namespace pten {
class DenseTensor;
} // namespace pten
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Tensor;
class ProgramDesc; class ProgramDesc;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册