diff --git a/paddle/fluid/eager/CMakeLists.txt b/paddle/fluid/eager/CMakeLists.txt index df000011e659ac4a6ab1877695d758b5240f5971..d3d90debea2c3a7026a22eae2ffec50481d4edf5 100644 --- a/paddle/fluid/eager/CMakeLists.txt +++ b/paddle/fluid/eager/CMakeLists.txt @@ -1,4 +1,4 @@ -set(eager_deps pten pten_api hook_utils tensor_utils utils global_utils backward pten_tensor legacy autograd_meta grad_node_info grad_tensor_holder gradient_accumulation accumulation_node) +set(eager_deps pten pten_api hook_utils tensor_utils utils global_utils backward pten_tensor legacy autograd_meta grad_node_info grad_tensor_holder accumulation_node) set(fluid_deps tracer layer proto_desc operator op_registry variable_helper memcpy) set(generated_deps dygraph_function dygraph_node) @@ -12,7 +12,7 @@ add_subdirectory(accumulation) add_subdirectory(legacy) cc_library(grad_node_info SRCS grad_node_info.cc DEPS pten pten_api) -cc_library(grad_tensor_holder SRCS grad_tensor_holder.cc DEPS grad_node_info gradient_accumulation) +cc_library(grad_tensor_holder SRCS grad_tensor_holder.cc DEPS grad_node_info gradient_accumulator) cc_library(autograd_meta SRCS autograd_meta.cc DEPS pten pten_api) cc_library(utils SRCS utils.cc DEPS pten pten_api global_utils layer proto_desc operator op_registry variable_helper memcpy scale_op autograd_meta hook_utils) diff --git a/paddle/fluid/eager/accumulation/CMakeLists.txt b/paddle/fluid/eager/accumulation/CMakeLists.txt index bfc7b54bef1567dcc2a0e2e181288aed430dbe73..632e289ba230871fd5630d674767b32d9f7b8b3f 100644 --- a/paddle/fluid/eager/accumulation/CMakeLists.txt +++ b/paddle/fluid/eager/accumulation/CMakeLists.txt @@ -1,2 +1 @@ -cc_library(gradient_accumulation SRCS gradient_accumulation.cc DEPS blas pten pten_api var_type_traits layer math_function) -cc_library(accumulation_node SRCS accumulation_node.cc DEPS gradient_accumulation pten pten_api grad_node_info) +cc_library(accumulation_node SRCS accumulation_node.cc DEPS gradient_accumulator pten pten_api grad_node_info) diff --git a/paddle/fluid/eager/accumulation/accumulation_node.cc b/paddle/fluid/eager/accumulation/accumulation_node.cc index 823c0153d71b029235ace7a262d8440b339b89f9..f6d66ac81b56d33870bf51d750e39068ac32e984 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.cc +++ b/paddle/fluid/eager/accumulation/accumulation_node.cc @@ -13,8 +13,8 @@ // limitations under the License. #include "paddle/fluid/eager/accumulation/accumulation_node.h" -#include "paddle/fluid/eager/accumulation/gradient_accumulation.h" #include "paddle/fluid/eager/eager_tensor.h" +#include "paddle/fluid/imperative/gradient_accumulator.h" #include "paddle/pten/api/all.h" #include "paddle/pten/core/dense_tensor.h" @@ -35,7 +35,7 @@ static void CopyOrAddTensor(egr::EagerTensor* tensor, *tensor = t; } else { // Accumulation - egr::TensorAdd(t, tensor); + paddle::imperative::TensorAdd(t, tensor); } } diff --git a/paddle/fluid/eager/accumulation/gradient_accumulation.cc b/paddle/fluid/eager/accumulation/gradient_accumulation.cc deleted file mode 100644 index 1224b92dec835619cbb458cd68761522aa4d2dd2..0000000000000000000000000000000000000000 --- a/paddle/fluid/eager/accumulation/gradient_accumulation.cc +++ /dev/null @@ -1,291 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/eager/accumulation/gradient_accumulation.h" -#include -#include -#include -#include "paddle/fluid/eager/eager_tensor.h" -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/imperative/gradient_accumulator.h" -#include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" -#include "paddle/fluid/operators/math/math_function_impl.h" -#include "paddle/fluid/operators/math/selected_rows_functor.h" -#include "paddle/fluid/platform/complex.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/float16.h" -#include "paddle/pten/api/all.h" -#include "paddle/pten/core/convert_utils.h" -#include "unsupported/Eigen/CXX11/Tensor" -#ifdef PADDLE_WITH_XPU -#include "xpu/refactor/math.h" -#endif -#ifdef PADDLE_WITH_ASCEND_CL -#include "paddle/fluid/platform/device/npu/npu_op_runner.h" -#endif - -namespace egr { -template -class TensorAddFunctor : public boost::static_visitor<> { - public: - TensorAddFunctor(int64_t numel, const T* x, T* y) - : numel_(numel), x_(x), y_(y) {} - - void operator()(const paddle::platform::CPUPlace& place) const { - paddle::platform::CPUDeviceContext* ctx = - dynamic_cast( - paddle::platform::DeviceContextPool::Instance().Get(place)); - auto blas = - paddle::operators::math::GetBlas( - *ctx); - blas.AXPY(numel_, 1., x_, y_); - } - -// TODO(jiabin): Support xpu here from gradient_accumulator.cc - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - void operator()(const paddle::platform::CUDAPlace& place) const { - paddle::platform::CUDADeviceContext* ctx = - dynamic_cast( - paddle::platform::DeviceContextPool::Instance().Get(place)); - auto blas = - paddle::operators::math::GetBlas(*ctx); - blas.AXPY(numel_, 1., x_, y_); - } -#else - void operator()(const paddle::platform::CUDAPlace& place) const { - PADDLE_THROW(paddle::platform::errors::PermissionDenied( - "Gradient accumulation on place (%s) " - "is not supported in imperative mode", - place)); - } -#endif - - // TODO(jiabin): Support Npu here from gradient_accumulator.cc - // there is NO blas in CUDAPinnedPlace - void operator()(const paddle::platform::CUDAPinnedPlace& place) const { - PADDLE_THROW(paddle::platform::errors::PermissionDenied( - "Gradient accumulation on place (%s) " - "is not supported in imperative mode", - place)); - } - -#ifdef PADDLE_WITH_ASCEND_CL - void operator()(const paddle::platform::NPUPlace& place) const { - PADDLE_THROW(paddle::platform::errors::PermissionDenied( - "Gradient accumulation on place (%s) " - "is not supported in imperative mode", - place)); - } -#else - void operator()(const paddle::platform::NPUPlace& place) const { - PADDLE_THROW(paddle::platform::errors::PermissionDenied( - "Gradient accumulation on place (%s) " - "is not supported in imperative mode", - place)); - } -#endif - -#ifdef PADDLE_WITH_XPU - void operator()(const paddle::platform::XPUPlace& place) const { - paddle::platform::XPUDeviceContext* ctx = - dynamic_cast( - paddle::platform::DeviceContextPool::Instance().Get(place)); - xpu::add(ctx->x_context(), x_, y_, y_, static_cast(numel_)); - } -#else - void operator()(const paddle::platform::XPUPlace& place) const { - PADDLE_THROW(paddle::platform::errors::PermissionDenied( - "Gradient accumulation on place (%s) " - "is not supported in imperative mode", - place)); - } -#endif - -#ifdef PADDLE_WITH_MLU - void operator()(const paddle::platform::MLUPlace& place) const { - PADDLE_THROW(paddle::platform::errors::PermissionDenied( - "Gradient accumulation on place (%s) " - "is not supported in imperative mode", - place)); - } -#else - void operator()(const paddle::platform::MLUPlace& place) const { - PADDLE_THROW(paddle::platform::errors::PermissionDenied( - "Gradient accumulation on place (%s) " - "is not supported in imperative mode", - place)); - } -#endif - -#ifdef PADDLE_WITH_IPU - void operator()(const paddle::platform::IPUPlace& place) const { - PADDLE_THROW(paddle::platform::errors::PermissionDenied( - "Gradient accumulation on place (%s) " - "is not supported in imperative mode", - place)); - } -#else - void operator()(const paddle::platform::IPUPlace& place) const { - PADDLE_THROW(paddle::platform::errors::PermissionDenied( - "Gradient accumulation on place (%s) " - "is not supported in imperative mode", - place)); - } -#endif - - void operator()(const paddle::platform::NPUPinnedPlace& place) const { - PADDLE_THROW(paddle::platform::errors::PermissionDenied( - "Gradient accumulation on place (%s) " - "is not supported in imperative mode", - place)); - } - - private: - int64_t numel_; - const T* x_; - mutable T* y_; -}; - -template -void TensorAddImpl(const std::shared_ptr& src, - pten::DenseTensor* dst, - const paddle::platform::Place& place) { - paddle::platform::DeviceContextPool& pool = - paddle::platform::DeviceContextPool::Instance(); - paddle::platform::DeviceContext* ctx = pool.Get(place); - auto dev_ctx = dynamic_cast(ctx); - paddle::operators::math::ElementwiseAddTo func; - func(dev_ctx, *(src.get()), dst); -} - -template -void TensorAddImpl(const paddle::framework::Tensor& src, - paddle::framework::Tensor* dst, - const paddle::platform::Place& place) { - paddle::platform::DeviceContextPool& pool = - paddle::platform::DeviceContextPool::Instance(); - paddle::platform::DeviceContext* ctx = pool.Get(place); - auto dev_ctx = dynamic_cast(ctx); - paddle::operators::math::ElementwiseAddTo func; - func(dev_ctx, src, dst); -} - -void TensorAdd(const egr::EagerTensor& src, egr::EagerTensor* dst) { - // TODO(jiabin): Support other tensor type later - std::shared_ptr dst_tensor = - std::dynamic_pointer_cast(dst->impl()); - std::shared_ptr src_tensor = - std::dynamic_pointer_cast(src.impl()); - - auto numel = src_tensor->numel(); - - if (numel == 0) { - return; - } - - PADDLE_ENFORCE_EQ( - dst_tensor->numel(), numel, - paddle::platform::errors::PreconditionNotMet( - "The number of elements of source tensor and destination tensor " - "should be equal, but got the number of elements of source tensor is " - "%zu and the number of elements of destination tensor is %zu.", - numel, dst_tensor->numel())); - - auto data_type = pten::TransToProtoVarType(src_tensor->dtype()); - auto place = src_tensor->place(); - - PADDLE_ENFORCE_EQ(pten::TransToProtoVarType(dst_tensor->dtype()), data_type, - paddle::platform::errors::PreconditionNotMet( - "The data type of source tensor and destination tensor " - "should be equal, Otherwise, the calculation results " - "will be incorrect.")); - -#define PADDLE_TENSOR_ADD(cpp_type) \ - if (data_type == paddle::framework::DataTypeTrait::DataType()) { \ - TensorAddFunctor func( \ - numel, src_tensor->data(), \ - dst_tensor->mutable_data(place)); \ - paddle::platform::VisitPlace(place, func); \ - return; \ - } - - // TODO(jiabin): Support NPU here - PADDLE_TENSOR_ADD(float); -// NOTE(phlrain): xpu only support float -#ifndef PADDLE_WITH_XPU - PADDLE_TENSOR_ADD(double); - // NOTE(chenweihang): only support complex grad tensor accumulated, - // support selected rows if needed in the future - PADDLE_TENSOR_ADD(paddle::platform::complex); - PADDLE_TENSOR_ADD(paddle::platform::complex); -#endif -#undef PADDLE_TENSOR_ADD - - if (data_type == paddle::framework::proto::VarType::FP16) { - if (paddle::platform::is_gpu_place(place)) { -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - return TensorAddImpl(src_tensor, - dst_tensor.get(), place); -#else - PADDLE_THROW(paddle::platform::errors::Unimplemented( - "Gradient accumulation of data type (%s) on place (%s) is not " - "supported in imperative mode", - paddle::framework::DataTypeToString(data_type), place)); -#endif - } else if (paddle::platform::is_cpu_place(place)) { - return TensorAddImpl(src_tensor, - dst_tensor.get(), place); - } - } - PADDLE_THROW(paddle::platform::errors::Unimplemented( - "Gradient accumulation of data type (%s) on place (%s) is not " - "supported in imperative mode", - paddle::framework::DataTypeToString(data_type), place)); -} - -void VariableAdd(const egr::EagerTensor& src_tensor, - egr::EagerTensor* dst_tensor) { - auto& src = src_tensor.Var(); - auto* dst = dst_tensor->MutableVar(); - - if (dst->IsType()) { - if (src.IsType()) { - paddle::imperative::TensorAdd(src, dst); - } else if (src.IsType()) { - paddle::imperative::SelectedRowsAddToTensor(src, dst); - } else { - PADDLE_THROW(paddle::platform::errors::InvalidArgument( - "Unexpected branch, output variable type is %s", - paddle::framework::ToTypeName(dst->Type()))); - } - } else { - if (src.IsType()) { - paddle::framework::Variable new_dst; - paddle::imperative::SelectedRowsAddTensor(*dst, src, &new_dst); - *dst = std::move(new_dst); - } else { - PADDLE_THROW(paddle::platform::errors::InvalidArgument( - "Unexpected branch, output variable type is %s", - paddle::framework::ToTypeName(dst->Type()))); - } - } -} - -} // namespace egr diff --git a/paddle/fluid/eager/accumulation/gradient_accumulation.h b/paddle/fluid/eager/accumulation/gradient_accumulation.h deleted file mode 100644 index 725410dac729e6421cbcd6a4d104f7d271404cff..0000000000000000000000000000000000000000 --- a/paddle/fluid/eager/accumulation/gradient_accumulation.h +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "paddle/fluid/eager/eager_tensor.h" -#include "paddle/pten/api/all.h" -namespace egr { -// Accumulation API -void TensorAdd(const egr::EagerTensor& src, egr::EagerTensor* dst); -void VariableAdd(const egr::EagerTensor& src, egr::EagerTensor* dst); - -} // namespace egr diff --git a/paddle/fluid/eager/grad_tensor_holder.cc b/paddle/fluid/eager/grad_tensor_holder.cc index c0344e20fb9bbdedaf37aa91809011d2fd0b6276..0183f88772f81aff34199767e5da60c789c1ad40 100644 --- a/paddle/fluid/eager/grad_tensor_holder.cc +++ b/paddle/fluid/eager/grad_tensor_holder.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/fluid/eager/grad_tensor_holder.h" -#include "paddle/fluid/eager/accumulation/gradient_accumulation.h" +#include "paddle/fluid/imperative/gradient_accumulator.h" #include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/operators/math/math_function.h" @@ -72,17 +72,17 @@ void GradTensorHolder::add(size_t slot_id, size_t rank, } else { // Accumulation if (t.initialized() && buffer_tensor.initialized()) { - TensorAdd(t, &buffer_tensor); + paddle::imperative::TensorAdd(t, &buffer_tensor); } else if (t.Var().IsInitialized() && buffer_tensor.Var().IsInitialized()) { - VariableAdd(t, &buffer_tensor); + paddle::imperative::VariableAdd(t, &buffer_tensor); } else if (t.Var().IsInitialized() && buffer_tensor.initialized()) { // TODO(jiabin): This can be merge to upper if case. buffer_tensor.SyncToVar(); - VariableAdd(t, &buffer_tensor); + paddle::imperative::VariableAdd(t, &buffer_tensor); } else if (t.initialized() && buffer_tensor.Var().IsInitialized()) { buffer_tensor.SyncToTensor(); - TensorAdd(t, &buffer_tensor); + paddle::imperative::TensorAdd(t, &buffer_tensor); } else { // Should not happend case // 1. both not init diff --git a/paddle/fluid/imperative/CMakeLists.txt b/paddle/fluid/imperative/CMakeLists.txt index d0f8d39f927f652c744eb3ff9400a97338fd8aa6..e421647c11723cec5b5c264f7cdb7fd1ca49073c 100644 --- a/paddle/fluid/imperative/CMakeLists.txt +++ b/paddle/fluid/imperative/CMakeLists.txt @@ -1,8 +1,8 @@ cc_library(imperative_flag SRCS flags.cc DEPS gflags flags) IF(WITH_XPU) -cc_library(prepared_operator SRCS prepared_operator.cc DEPS xpu_op_list proto_desc operator device_context lod_tensor selected_rows_utils var_type_traits op_kernel_type data_transform nan_inf_utils pten pten_utils) +cc_library(prepared_operator SRCS prepared_operator.cc DEPS xpu_op_list proto_desc operator device_context lod_tensor selected_rows_utils var_type_traits op_kernel_type data_transform nan_inf_utils pten pten_utils pten_api) ELSE() -cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator device_context lod_tensor selected_rows_utils var_type_traits op_kernel_type data_transform nan_inf_utils pten pten_utils) +cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator device_context lod_tensor selected_rows_utils var_type_traits op_kernel_type data_transform nan_inf_utils pten pten_utils pten_api) ENDIF() cc_library(layer SRCS layer.cc DEPS prepared_operator math_function imperative_flag variable_helper op_registry) add_subdirectory(jit) diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc index ffd9aae8ff013145ee166ba687b71405160905ec..257953252bc3092f5589ca7fdd577ac1301ba015 100644 --- a/paddle/fluid/imperative/gradient_accumulator.cc +++ b/paddle/fluid/imperative/gradient_accumulator.cc @@ -214,9 +214,37 @@ void TensorAddImpl(const framework::Tensor& src, framework::Tensor* dst, func(dev_ctx, src, dst); } -void TensorAdd(const framework::Variable& src, framework::Variable* dst) { +std::shared_ptr GetInnerDstTensor(egr::EagerTensor* dst) { + std::shared_ptr dst_tensor = + std::dynamic_pointer_cast(dst->impl()); + return dst_tensor; +} + +std::shared_ptr GetInnerSrcTensor( + const egr::EagerTensor& src) { + std::shared_ptr dst_tensor = + std::dynamic_pointer_cast(src.impl()); + return dst_tensor; +} + +std::shared_ptr GetInnerDstTensor(framework::Variable* dst) { auto* dst_tensor = dst->GetMutable(); + return std::make_shared(*dst_tensor); +} + +std::shared_ptr GetInnerSrcTensor( + const framework::Variable& src) { auto& src_tensor = src.Get(); + return std::make_shared(src_tensor); +} + +template +void TensorAdd(const VarType& src, VarType* dst) { + std::shared_ptr d_tensor = GetInnerDstTensor(dst); + std::shared_ptr s_tensor = GetInnerSrcTensor(src); + + auto* dst_tensor = d_tensor.get(); + auto& src_tensor = *s_tensor.get(); auto numel = src_tensor.numel(); @@ -336,6 +364,11 @@ void TensorAdd(const framework::Variable& src, framework::Variable* dst) { framework::DataTypeToString(data_type), place)); } +template void TensorAdd(const framework::Variable& src, + framework::Variable* dst); +template void TensorAdd(const egr::EagerTensor& src, + egr::EagerTensor* dst); + void SelectedRowsAddToTensor(const framework::Variable& src, framework::Variable* dst) { auto* dst_tensor = dst->GetMutable(); @@ -462,13 +495,41 @@ std::shared_ptr SelectedRowsMerge( framework::DataTypeToString(data_type))); } +void VariableAdd(const egr::EagerTensor& src_tensor, + egr::EagerTensor* dst_tensor) { + auto& src = src_tensor.Var(); + auto* dst = dst_tensor->MutableVar(); + + if (dst->IsType()) { + if (src.IsType()) { + paddle::imperative::TensorAdd(src, dst); + } else if (src.IsType()) { + paddle::imperative::SelectedRowsAddToTensor(src, dst); + } else { + PADDLE_THROW(paddle::platform::errors::InvalidArgument( + "Unexpected branch, output variable type is %s", + paddle::framework::ToTypeName(dst->Type()))); + } + } else { + if (src.IsType()) { + paddle::framework::Variable new_dst; + paddle::imperative::SelectedRowsAddTensor(*dst, src, &new_dst); + *dst = std::move(new_dst); + } else { + PADDLE_THROW(paddle::platform::errors::InvalidArgument( + "Unexpected branch, output variable type is %s", + paddle::framework::ToTypeName(dst->Type()))); + } + } +} + void VariableWrapperAdd(std::shared_ptr var, VariableWrapper* dst_var, bool unchange_input) { auto& src = var->Var(); auto* dst = dst_var->MutableVar(); if (dst->IsType()) { if (src.IsType()) { - TensorAdd(src, dst); + TensorAdd(src, dst); } else if (src.IsType()) { SelectedRowsAddToTensor(src, dst); } else { @@ -535,7 +596,7 @@ void GradientAccumulator::AccumulateGrad() { "previous gradient."; if (dst->IsType()) { if (src->IsType()) { - TensorAdd(*src, dst); + TensorAdd(*src, dst); } else if (src->IsType()) { SelectedRowsAddToTensor(*src, dst); } diff --git a/paddle/fluid/imperative/gradient_accumulator.h b/paddle/fluid/imperative/gradient_accumulator.h index f9f8081d30fd47621abb0ab90c40108db0e21295..a57335d08a28b356d72ad8df9ca055a4749a9124 100644 --- a/paddle/fluid/imperative/gradient_accumulator.h +++ b/paddle/fluid/imperative/gradient_accumulator.h @@ -18,6 +18,7 @@ #include #include +#include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/imperative/hooks.h" #include "paddle/fluid/imperative/layer.h" @@ -170,7 +171,10 @@ void SelectedRowsAddTensor(const framework::Variable& src_selected_rows_var, const framework::Variable& src_tensor_var, framework::Variable* dst_tensor_var); -void TensorAdd(const framework::Variable& src, framework::Variable* dst); +template +void TensorAdd(const VarType& src, VarType* dst); + +void VariableAdd(const egr::EagerTensor& src, egr::EagerTensor* dst); } // namespace imperative } // namespace paddle diff --git a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc index 25ffab470646b3e69e02e049967f540adb776a08..6210cb108bd7966eab0dbb81ce5560122241ea66 100644 --- a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc +++ b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc @@ -28,8 +28,6 @@ namespace framework = paddle::framework; namespace paddle { namespace imperative { -void TensorAdd(const framework::Variable& src, framework::Variable* dst); - template int TensorddTest(Place1 place1, Place2 place2, T t1, T t2) { framework::Variable var1; @@ -69,7 +67,7 @@ int TensorddTest(Place1 place1, Place2 place2, T t1, T t2) { sizeof(T) * dst_data.size(), 0); #endif } - imperative::TensorAdd(var1, &var2); + imperative::TensorAdd(var1, &var2); framework::LoDTensor rlt; platform::CPUPlace rlt_place; framework::TensorCopySync(*dst, rlt_place, &rlt);