未验证 提交 7df301f2 编写于 作者: Z Zhanlue Yang 提交者: GitHub

Added performance tests for Eager Dygraph #1 (#37638)

上级 48faf638
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/eager/tests/performance_tests/benchmark_utils.h"
#include <iostream>
#include <memory>
#include <set>
#include <string>
#include <vector>
// Eager
#include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/backward.h"
#include "paddle/fluid/eager/tests/test_utils.h"
#include "paddle/fluid/eager/utils.h"
// Eager Generated
#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
// Fluid
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/imperative/basic_engine.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/eager/tests/benchmark/benchmark_utils.h"
#include "paddle/pten/core/kernel_registry.h"
static size_t max_num_benchmark_runs = 5000;
namespace egr {
/* --------------------- */
/* ---- Eager Scale ---- */
/* --------------------- */
void benchmark_eager_scale(const EagerTensor& tensor, bool accuracy_check) {
EagerTensor input_tensor = tensor;
float scale = 2.0;
float bias = 3.0;
size_t max_num_runs = accuracy_check ? 10 : max_num_benchmark_runs;
for (size_t i = 0; i < max_num_runs; i++) {
input_tensor =
egr::scale(input_tensor, scale, bias, true /*bias_after_scale*/,
true /*trace_backward*/);
std::vector<EagerTensor> target_tensors = {input_tensor};
RunBackward(target_tensors, {});
if (accuracy_check) {
// Examine Forward Grad (w.r.t max_num_runs = 10)
CompareTensorWithValue<float>(input_tensor, 8189.0);
// Examine Backward Grad (w.r.t max_num_runs = 10)
CompareGradTensorWithValue<float>(tensor, 1024.0);
/* ----------------------------------- */
/* ---- Eager Intermediate Matmul ---- */
/* ----------------------------------- */
void benchmark_eager_intermediate_matmul(const EagerTensor& X,
const EagerTensor& Y,
bool accuracy_check) {
EagerTensor input_tensor0 = X;
size_t max_num_runs = accuracy_check ? 2 : max_num_benchmark_runs;
for (size_t i = 0; i < max_num_runs; i++) {
input_tensor0 = matmul_v2_dygraph_function(
input_tensor0, Y, {{"trans_x", false}, {"trans_y", false}});
std::vector<EagerTensor> target_tensors = {input_tensor0};
RunBackward(target_tensors, {});
if (accuracy_check) {
// Examine Forward Grad (w.r.t max_num_runs = 2)
CompareVariableWithValue<float>(input_tensor0, 16);
// Examine Backward Grad (w.r.t max_num_runs = 2)
CompareGradVariableWithValue<float>(X, 16);
CompareGradVariableWithValue<float>(Y, 16);
/* -------------------------------- */
/* ---- Eager Intermediate MLP ---- */
/* -------------------------------- */
void benchmark_eager_intermediate_mlp(const EagerTensor& X,
const std::vector<EagerTensor>& Ws,
const std::vector<EagerTensor>& Bs,
bool accuracy_check) {
EagerTensor input0 = X;
for (size_t i = 0; i < MLP_NUM_LINEAR; i++) {
EagerTensor Out = matmul_v2_dygraph_function(
input0, Ws[i], {{"trans_x", false}, {"trans_y", false}});
input0 = elementwise_add_dygraph_function(Out, Bs[i], {});
EagerTensor Out = reduce_sum_dygraph_function(input0, {{"reduce_all", true}});
std::vector<EagerTensor> target_tensors = {Out};
RunBackward(target_tensors, {});
if (accuracy_check) {
std::unordered_map<std::string, float> result =
// Examine Forward Grad (w.r.t max_num_runs = 2)
CompareVariableWithValue<float>(Out, result["Out"]);
// Examine Backward Grad (w.r.t max_num_runs = 2)
CompareGradVariableWithValue<float>(X, result["GradX"]);
CompareGradVariableWithValue<float>(Ws[0], result["GradW"]);
} // namespace egr
namespace paddle {
namespace imperative {
static void FluidCheckTensorValue(const std::shared_ptr<imperative::VarBase>& X,
const paddle::platform::Place& place,
float value) {
auto* tensor = X->MutableVar()->GetMutable<framework::LoDTensor>();
float* t_ptr = tensor->mutable_data<float>(place);
std::vector<float> host_data(tensor->numel());
if (place == paddle::platform::CUDAPlace()) {
paddle::platform::DeviceContextPool& pool =
auto* dev_ctx =
auto stream = dev_ctx->stream();
paddle::memory::Copy(paddle::platform::CPUPlace(), host_data.data(),
paddle::platform::CUDAPlace(), t_ptr,
sizeof(float) * tensor->numel(), stream);
t_ptr = host_data.data();
VLOG(6) << "Tensor Value: " << t_ptr[0] << ", Expected Value: " << value;
t_ptr[0] == value,
"Detected numerical Error, Expected %f but got %f", value, t_ptr[0]));
static void FluidCheckGradTensorValue(
const std::shared_ptr<imperative::VarBase>& X,
const paddle::platform::Place& place, float value) {
auto* grad_tensor = X->MutableGradVar()->GetMutable<framework::LoDTensor>();
float* g_ptr = grad_tensor->mutable_data<float>(place);
std::vector<float> g_host_data(grad_tensor->numel());
if (place == paddle::platform::CUDAPlace()) {
paddle::platform::DeviceContextPool& pool =
auto* dev_ctx =
auto stream = dev_ctx->stream();
paddle::memory::Copy(paddle::platform::CPUPlace(), g_host_data.data(),
paddle::platform::CUDAPlace(), g_ptr,
sizeof(float) * grad_tensor->numel(), stream);
g_ptr = g_host_data.data();
VLOG(6) << "Tensor Value: " << g_ptr[0] << ", Expected Value: " << value;
g_ptr[0] == value,
"Detected numerical Error, Expected %f but got %f", value, g_ptr[0]));
/* --------------------- */
/* ---- Fluid Scale ---- */
/* --------------------- */
// TODO(jiabin): Change this and remove nolint
void benchmark_fluid_scale(const std::shared_ptr<imperative::VarBase>& X,
const paddle::platform::Place& place,
bool accuracy_check) {
imperative::Tracer tracer;
framework::AttributeMap attrs;
attrs["use_mkldnn"] = false;
attrs["scale"] = 2;
attrs["bias"] = 3;
attrs["bias_after_scale"] = true;
std::shared_ptr<imperative::VarBase> tmp_out = X;
size_t max_num_runs = accuracy_check ? 10 : max_num_benchmark_runs;
for (size_t i = 0; i < max_num_runs; i++) {
imperative::NameVarBaseMap ins = {{"X", {tmp_out}}};
imperative::NameVarBaseMap outs = {
new imperative::VarBase(true, "Out"))}}};
tracer.TraceOp("scale", ins, outs, attrs, place, true);
tmp_out = outs["Out"][0];
auto* engine = tracer.GetEngine();
std::vector<std::shared_ptr<imperative::VarBase>> grad_tensors{nullptr};
engine->Init({tmp_out}, grad_tensors, false /*retain_graph*/);
if (accuracy_check) {
FluidCheckTensorValue(tmp_out, place, 8189.0);
FluidCheckGradTensorValue(X, place, 1024.0);
/* ---------------------- */
/* ---- Fluid Matmul ---- */
/* ---------------------- */
void benchmark_fluid_matmul(const std::shared_ptr<imperative::VarBase>& X,
const std::shared_ptr<imperative::VarBase>& Y,
const paddle::platform::Place& place,
bool accuracy_check) {
imperative::Tracer tracer;
std::shared_ptr<imperative::VarBase> tmp_out = X;
size_t max_num_runs = accuracy_check ? 2 : max_num_benchmark_runs;
for (size_t i = 0; i < max_num_runs; i++) {
framework::AttributeMap attrs;
imperative::NameVarBaseMap ins = {{"X", {tmp_out}}, {"Y", {Y}}};
imperative::NameVarBaseMap outs = {
new imperative::VarBase(true, "Out"))}}};
tracer.TraceOp("matmul_v2", ins, outs, attrs, place, true);
tmp_out = outs["Out"][0];
auto* engine = tracer.GetEngine();
std::vector<std::shared_ptr<imperative::VarBase>> grad_tensors{nullptr};
engine->Init({tmp_out}, grad_tensors, false /*retain_graph*/);
if (accuracy_check) {
FluidCheckTensorValue(tmp_out, place, 16);
FluidCheckGradTensorValue(X, place, 16);
FluidCheckGradTensorValue(Y, place, 16);
/* ------------------- */
/* ---- Fluid MLP ---- */
/* ------------------- */
void benchmark_fluid_mlp(
const std::shared_ptr<imperative::VarBase>& X,
const std::vector<std::shared_ptr<imperative::VarBase>>& Ws,
const std::vector<std::shared_ptr<imperative::VarBase>>& Bs,
const paddle::platform::Place& place, bool accuracy_check) {
imperative::Tracer tracer;
imperative::NameVarBaseMap ins;
imperative::NameVarBaseMap outs;
framework::AttributeMap attrs;
std::shared_ptr<imperative::VarBase> input0 = X;
for (size_t i = 0; i < MLP_NUM_LINEAR; i++) {
// Matmul0
ins = {{"X", {input0}}, {"Y", {Ws[0]}}};
outs = {{"Out",
new imperative::VarBase(true, "Out"))}}};
tracer.TraceOp("matmul_v2", ins, outs, attrs, place, true);
// EW-Add0
ins = {{"X", outs["Out"]}, {"Y", {Bs[i]}}};
outs = {{"Out",
new imperative::VarBase(true, "Out"))}}};
tracer.TraceOp("elementwise_add", ins, outs, attrs, place, true);
input0 = outs["Out"][0];
// ReduceSum
ins = {{"X", {input0}}};
outs = {{"Out",
new imperative::VarBase(true, "Out"))}}};
attrs = {{"reduce_all", true}};
tracer.TraceOp("reduce_sum", ins, outs, attrs, place, true);
auto* engine = tracer.GetEngine();
std::vector<std::shared_ptr<imperative::VarBase>> grad_tensors{nullptr};
engine->Init(outs["Out"], grad_tensors, false /*retain_graph*/);
if (accuracy_check) {
std::unordered_map<std::string, float> result =
FluidCheckTensorValue(outs["Out"][0], place, result["Out"]);
FluidCheckGradTensorValue(X, place, result["GradX"]);
FluidCheckGradTensorValue(Ws[0], place, result["GradW"]);
} // namespace imperative
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <math.h>
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/pten/api/all.h"
/* MLP Configurations */
// Out1 = X[M, N] x W[N, K] + B[K]
// Out = ReduceSum(OutN)
#define MLP_M 4
#define MLP_N 16
#define MLP_K MLP_N
#define MLP_X_VAL 1.0
#define MLP_W_VAL 2.0
#define MLP_B_VAL 3.0
#define MLP_NUM_LINEAR 1000
namespace egr {
inline std::unordered_map<std::string, float> compute_mlp_expected_results() {
float Out = MLP_X_VAL;
for (size_t i = 0; i < MLP_NUM_LINEAR; i++) {
Out = Out * MLP_W_VAL * MLP_N + MLP_B_VAL;
Out = Out * MLP_M * MLP_N;
float GradX = 1.0 * pow((MLP_W_VAL * MLP_N), MLP_NUM_LINEAR);
float GradW0 =
1.0 * pow((MLP_W_VAL * MLP_N), (MLP_NUM_LINEAR - 1)) * MLP_X_VAL * MLP_M;
return {{"Out", Out}, {"GradX", GradX}, {"GradW", GradW0}};
/* ---- Eager Scale ---- */
void benchmark_eager_scale(const EagerTensor& tensor,
bool accuracy_check = false);
/* ---- Eager MatMul ---- */
void benchmark_eager_matmul(const EagerTensor& X, const EagerTensor& Y,
bool accuracy_check = false);
void benchmark_eager_mlp(const EagerTensor& X,
const std::vector<EagerTensor>& Ws,
const std::vector<EagerTensor>& Bs,
bool accuracy_check = false);
void benchmark_eager_intermediate_matmul(const EagerTensor& X,
const EagerTensor& Y,
bool accuracy_check = false);
void benchmark_eager_intermediate_mlp(const EagerTensor& X,
const std::vector<EagerTensor>& Ws,
const std::vector<EagerTensor>& Bs,
bool accuracy_check = false);
} // namespace egr
namespace paddle {
namespace imperative {
/* ---- Fluid Scale ---- */
// TODO(jiabin): Change this and remove nolint
void benchmark_fluid_scale(
const std::shared_ptr<imperative::VarBase>& X, // NOLINT
const paddle::platform::Place& place, bool accuracy_check = false);
/* ---- Fluid MatMul ---- */
void benchmark_fluid_matmul(
const std::shared_ptr<imperative::VarBase>& X,
const std::shared_ptr<imperative::VarBase>& Y, // NOLINT
const paddle::platform::Place& place, bool accuracy_check = false);
/* ---- Fluid MLP ---- */
void benchmark_fluid_mlp(
const std::shared_ptr<imperative::VarBase>& X,
const std::vector<std::shared_ptr<imperative::VarBase>>& Ws,
const std::vector<std::shared_ptr<imperative::VarBase>>& Bs,
const paddle::platform::Place& place, bool accuracy_check = false);
} // namespace imperative
} // namespace paddle
...@@ -4,3 +4,4 @@ cc_test(test_egr_task_forward_autograd SRCS forward_autograd_test.cc DEPS ${eage ...@@ -4,3 +4,4 @@ cc_test(test_egr_task_forward_autograd SRCS forward_autograd_test.cc DEPS ${eage
cc_test(test_egr_task_backward SRCS backward_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node) cc_test(test_egr_task_backward SRCS backward_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
cc_test(test_egr_task_hook SRCS hook_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node) cc_test(test_egr_task_hook SRCS hook_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
cc_test(test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node) cc_test(test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
cc_test(test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
#include "glog/logging.h"
#include "gtest/gtest.h"
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/api/all.h"
#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/backward.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/tensor_meta.h"
#include "paddle/fluid/eager/tests/test_utils.h"
// TODO(jiabin): remove nolint here!!!
using namespace egr; // NOLINT
namespace eager_test {
egr::EagerTensor hook_function(const egr::EagerTensor& t) {
auto t_dense = std::dynamic_pointer_cast<pten::DenseTensor>(t.impl());
auto ret_meta = pten::DenseTensorMeta(t_dense->dtype(), t_dense->dims(),
auto place = t_dense->place();
size_t bytes_size =
paddle::framework::product(t_dense->dims()) * SizeOf(t_dense->dtype());
auto ret_dense = std::make_shared<pten::DenseTensor>(
paddle::memory::Alloc(place, bytes_size), 0),
float* t_ptr = t_dense->mutable_data<float>();
float* ret_ptr = ret_dense->mutable_data<float>();
for (int i = 0; i < ret_dense->numel(); i++) {
ret_ptr[i] = t_ptr[i] + 5.0;
auto ret_impl = std::dynamic_pointer_cast<pten::TensorBase>(ret_dense);
egr::EagerTensor ret = egr::EagerTensor();
return ret;
TEST(FwdBwdJoint, SingleNode) {
// 1. Prepare Input
paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32});
egr::EagerTensor tensor = CreateTensorWithValue(
ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/);
// 3. Run Forward
float scale = 2.0;
float bias = 3.0;
egr::EagerTensor out = egr::scale(
tensor, scale, bias, true /*bias_after_scale*/, true /*trace_backward*/);
// Examine Forward Output
CompareTensorWithValue<float>(out, 13.0);
std::vector<egr::EagerTensor> outs = {out};
// 4. Run Backward
RunBackward(outs, {});
VLOG(7) << "Target Grad is: "
<< std::static_pointer_cast<pten::DenseTensor>(
// Examine Backward Grad
CompareGradTensorWithValue<float>(tensor, 2.0);
TEST(FwdBwdJoint, LinearNodes) {
// 1. Prepare Input
paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32});
egr::EagerTensor tensor = CreateTensorWithValue(
ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/);
// 3. Run Forward
// Run Forward Node 0
float scale0 = 2.0;
float bias0 = 3.0;
egr::EagerTensor out0 =
egr::scale(tensor, scale0, bias0, true /*bias_after_scale*/,
true /*trace_backward*/);
// Run Forward Node 1
float scale1 = 5.0;
float bias1 = 10.0;
egr::EagerTensor out1 = egr::scale(
out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/);
// Examine Forward Output 0
CompareTensorWithValue<float>(out0, 13.0);
// Examine Forward Output 1
CompareTensorWithValue<float>(out1, 75.0);
std::vector<egr::EagerTensor> outs = {out1};
// 4. Run Backward
RunBackward(outs, {});
// Examine Backward Grad
CompareGradTensorWithValue<float>(tensor, 10.0);
| |
Node1 Node2
| |
out1 out2
TEST(FwdBwdJoint, BranchedNodes) {
// 1. Prepare Input
paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32});
egr::EagerTensor tensor = CreateTensorWithValue(
ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/);
// 3. Run Forward
// Run Forward Node 0
float scale0 = 2.0;
float bias0 = 3.0;
egr::EagerTensor out0 =
egr::scale(tensor, scale0, bias0, true /*bias_after_scale*/,
true /*trace_backward*/);
// Run Forward Node 1
float scale1 = 5.0;
float bias1 = 10.0;
egr::EagerTensor out1 = egr::scale(
out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/);
// Run Forward Node 2
float scale2 = 10.0;
float bias2 = 20.0;
egr::EagerTensor out2 = egr::scale(
out0, scale2, bias2, true /*bias_after_scale*/, true /*trace_backward*/);
// Examine Forward Output 0
CompareTensorWithValue<float>(out0, 13.0);
// Examine Forward Output 1
CompareTensorWithValue<float>(out1, 75.0);
// Examine Forward Output 2
auto dense_out = std::dynamic_pointer_cast<pten::DenseTensor>(out2.impl());
float* ptr = dense_out->mutable_data<float>();
for (int i = 0; i < 20; i++) {
PADDLE_ENFORCE(ptr[i] == 150.0,
"Detected numerical Error, Expected %f but got %f",
150.0, ptr[i]));
// 4. Run Backward
std::vector<egr::EagerTensor> outs = {out1, out2};
RunBackward(outs, {});
// Examine Backward Grad
CompareGradTensorWithValue<float>(tensor, 30.0);
| |
Node1 Node2
| |
out1 out2
TEST(FwdBwdJoint, GradientHook) {
// 1. Prepare Input
paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32});
egr::EagerTensor tensor = CreateTensorWithValue(
ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/);
std::function<egr::EagerTensor(const egr::EagerTensor&)> hook =
// 3. Run Forward
// Run Forward Node 0
float scale0 = 2.0;
float bias0 = 3.0;
egr::EagerTensor out0 =
egr::scale(tensor, scale0, bias0, true /*bias_after_scale*/,
true /*trace_backward*/);
RetainGradForTensor(out0); // hook: +5
RegisterGradientHookForTensor(out0, hook); // hook: +5
// Run Forward Node 1
float scale1 = 5.0;
float bias1 = 10.0;
egr::EagerTensor out1 = egr::scale(
out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/);
RetainGradForTensor(out1); // hook: +5
RegisterGradientHookForTensor(out1, hook); // hook: +5
// Run Forward Node 2
float scale2 = 10.0;
float bias2 = 20.0;
egr::EagerTensor out2 = egr::scale(
out0, scale2, bias2, true /*bias_after_scale*/, true /*trace_backward*/);
RetainGradForTensor(out2); // hook: +5
RegisterGradientHookForTensor(out2, hook); // hook: +5
// 4. Run Backward
std::vector<egr::EagerTensor> outs = {out1, out2};
RunBackward(outs, {});
// Examine Backward Grad
// leaf grad
CompareGradTensorWithValue<float>(tensor, 190.0);
// out0 grad
CompareGradTensorWithValue<float>(out0, 90.0);
// out1 grad
CompareGradTensorWithValue<float>(out1, 1.0);
// out2 grad
CompareGradTensorWithValue<float>(out2, 1.0);
| |
Node1 Node2
| |
out1 out2
TEST(FwdBwdJoint, CrossBatchAccumulation) {
// 1. Prepare Input
paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32});
egr::EagerTensor tensor = CreateTensorWithValue(
ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/);
// 3. Run Forward
// Run Forward Node 0
float scale0 = 2.0;
float bias0 = 3.0;
egr::EagerTensor out0 =
egr::scale(tensor, scale0, bias0, true /*bias_after_scale*/,
true /*trace_backward*/);
// Run Forward Node 1
float scale1 = 5.0;
float bias1 = 10.0;
egr::EagerTensor out1 = egr::scale(
out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/);
// Run Forward Node 2
float scale2 = 10.0;
float bias2 = 20.0;
egr::EagerTensor out2 = egr::scale(
out0, scale2, bias2, true /*bias_after_scale*/, true /*trace_backward*/);
// 4. Run Backward
std::vector<egr::EagerTensor> outs = {out1, out2};
RunBackward(outs, {});
// Examine Backward Grad
CompareGradTensorWithValue<float>(tensor, 30.0);
// Cross Batch Accumulation
RunBackward(outs, {});
// Examine Backward Grad
CompareGradTensorWithValue<float>(tensor, 60.0);
/* ---------------------------------------------------- */
/* ---------------------- CUDA Tests ------------------ */
/* ---------------------------------------------------- */
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
TEST(FwdBwdJoint, SingleNodeCUDA) {
// 1. Prepare Input
paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32});
egr::EagerTensor tensor = CreateTensorWithValue(
ddim, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/);
// 3. Run Forward
float scale = 2.0;
float bias = 3.0;
egr::EagerTensor out = egr::scale(
tensor, scale, bias, true /*bias_after_scale*/, true /*trace_backward*/);
// Examine Forward Output
CompareTensorWithValue<float>(out, 13.0);
std::vector<egr::EagerTensor> outs = {out};
// 4. Run Backward
RunBackward(outs, {});
// Examine Backward Grad
CompareGradTensorWithValue<float>(tensor, 2.0);
| |
Node1 Node2
| |
out1 out2
TEST(FwdBwdJoint, BranchedNodesCUDA) {
// 1. Prepare Input
paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32});
egr::EagerTensor tensor = CreateTensorWithValue(
ddim, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32,
pten::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/);
// 3. Run Forward
// Run Forward Node 0
float scale0 = 2.0;
float bias0 = 3.0;
egr::EagerTensor out0 =
egr::scale(tensor, scale0, bias0, true /*bias_after_scale*/,
true /*trace_backward*/);
// Run Forward Node 1
float scale1 = 5.0;
float bias1 = 10.0;
egr::EagerTensor out1 = egr::scale(
out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/);
// Run Forward Node 2
float scale2 = 10.0;
float bias2 = 20.0;
egr::EagerTensor out2 = egr::scale(
out0, scale2, bias2, true /*bias_after_scale*/, true /*trace_backward*/);
// Examine Forward Output 0
CompareTensorWithValue<float>(out0, 13.0);
// Examine Forward Output 1
CompareTensorWithValue<float>(out1, 75.0);
// Examine Forward Output 2
CompareTensorWithValue<float>(out2, 150.0);
// TODO(jiabin): fix this with add functor
// 4. Run Backward
std::vector<egr::EagerTensor> outs = {out1, out2};
RunBackward(outs, {});
// Examine Backward Grad
CompareGradTensorWithValue<float>(tensor, 30.0);
} // namespace eager_test
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册