fused_dropout_test.h 7.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include <random>
#include <vector>

#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/string/printf.h"
H
hong 已提交
27
#include "paddle/phi/backends/gpu/gpu_context.h"
28
#include "paddle/phi/kernels/funcs/layer_norm_impl.cu.h"
29
#include "paddle/phi/kernels/funcs/math_function.h"
H
hong 已提交
30
#include "paddle/phi/kernels/layer_norm_kernel.h"
31 32 33 34 35

namespace framework = paddle::framework;
namespace platform = paddle::platform;
namespace memory = paddle::memory;

H
hong 已提交
36
USE_OP_ITSELF(dropout);
H
hong 已提交
37
USE_OP_ITSELF(layer_norm);
38 39

template <typename T>
40
using CudnnDataType = phi::backends::gpu::CudnnDataType<T>;
41 42
template <typename T>
using LayerNormParamType = typename CudnnDataType<T>::BatchNormParamType;
43 44 45 46 47

/**
 * @brief call paddle dropout op
 */
template <typename T>
48 49 50 51
void Dropout(const std::vector<T> &x,
             const framework::DDim &x_dim,
             std::vector<T> *out,
             std::vector<uint8_t> *mask,
L
Leo Chen 已提交
52
             const phi::GPUContext &ctx,
53 54 55 56
             uint64_t seed,
             float dropout_prob,
             bool is_upscale_in_train,
             bool is_test) {
57 58
  framework::Scope scope;
  auto var_x = scope.Var("X");
59
  auto tensor_x = var_x->GetMutable<phi::DenseTensor>();
60 61 62 63
  framework::TensorFromVector(x, ctx, tensor_x);
  tensor_x->Resize(x_dim);

  auto var_out = scope.Var("Out");
64
  auto tensor_out = var_out->GetMutable<phi::DenseTensor>();
65 66

  auto var_mask = scope.Var("Mask");
67
  auto tensor_mask = var_mask->GetMutable<phi::DenseTensor>();
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95

  framework::AttributeMap attrs;
  attrs.insert({"fix_seed", 1});
  attrs.insert({"seed", static_cast<int>(seed)});
  attrs.insert({"dropout_prob", dropout_prob});
  if (is_upscale_in_train) {
    attrs.insert({"dropout_implementation", std::string("upscale_in_train")});
  }

  if (is_test) {
    attrs.insert({"is_test", true});
  }

  auto op = framework::OpRegistry::CreateOp(
      "dropout", {{"X", {"X"}}}, {{"Out", {"Out"}}, {"Mask", {"Mask"}}}, attrs);
  op->Run(scope, ctx.GetPlace());

  framework::TensorToVector<T>(*tensor_out, ctx, out);
  if (!is_test) {
    framework::TensorToVector<uint8_t>(*tensor_mask, ctx, mask);
  }
  ctx.Wait();
}

/**
 * @brief call paddle dropout_grad op
 */
template <typename T>
96 97 98 99
void DropoutGrad(std::vector<T> *dx,
                 const framework::DDim &x_dim,
                 const std::vector<T> &dout,
                 const std::vector<uint8_t> &mask,
L
Leo Chen 已提交
100
                 const phi::GPUContext &ctx,
101
                 float dropout_prob,
102 103 104 105
                 bool is_upscale_in_train) {
  framework::Scope scope;
  const size_t n = x_dim[0] * x_dim[1];
  auto var_out = scope.Var("DOut");
106
  auto tensor_out = var_out->GetMutable<phi::DenseTensor>();
107 108 109 110
  framework::TensorFromVector(dout, ctx, tensor_out);
  tensor_out->Resize(x_dim);

  auto var_mask = scope.Var("Mask");
111
  auto tensor_mask = var_mask->GetMutable<phi::DenseTensor>();
112 113 114 115
  framework::TensorFromVector(mask, ctx, tensor_mask);
  tensor_mask->Resize(x_dim);

  auto var_dx = scope.Var("DX");
116
  auto tensor_dx = var_dx->GetMutable<phi::DenseTensor>();
117 118 119 120 121 122 123 124 125 126 127

  framework::AttributeMap attrs;
  attrs.insert({"dropout_prob", dropout_prob});
  attrs.insert({"is_test", false});
  if (is_upscale_in_train) {
    attrs.insert({"dropout_implementation", std::string("upscale_in_train")});
  } else {
    attrs.insert({"dropout_implementation", std::string("downgrade_in_infer")});
  }

  auto op = framework::OpRegistry::CreateOp(
128 129 130 131
      "dropout_grad",
      {{"Out@GRAD", {"DOut"}}, {"Mask", {"Mask"}}},
      {{"X@GRAD", {"DX"}}},
      attrs);
132 133 134 135 136
  op->Run(scope, ctx.GetPlace());

  framework::TensorToVector(*tensor_dx, ctx, dx);
  ctx.Wait();
}
137

138 139 140 141 142 143 144 145
/**
 * @brief call paddle layer_norm op
 */
template <typename T>
void LayerNorm(const std::vector<LayerNormParamType<T>> &scale,
               const std::vector<LayerNormParamType<T>> &bias,
               const std::vector<T> &x,
               std::vector<LayerNormParamType<T>> *means,
146 147 148 149 150
               std::vector<LayerNormParamType<T>> *vars,
               std::vector<T> *y,
               const float epsilon,
               const int rows,
               const int cols,
L
Leo Chen 已提交
151
               const phi::GPUContext &ctx) {
152 153
  framework::Scope scope;
  auto place = ctx.GetPlace();
154
  paddle::optional<phi::DenseTensor> scale_opt;
155 156
  if (scale.size() > 0) {
    auto var_scale = scope.Var("Scale");
157
    auto tensor_scale = var_scale->GetMutable<phi::DenseTensor>();
158 159
    framework::TensorFromVector(scale, ctx, tensor_scale);
    tensor_scale->Resize({cols});
H
hong 已提交
160
    scale_opt = *tensor_scale;
161 162
  }

163
  paddle::optional<phi::DenseTensor> bias_opt;
164 165
  if (bias.size() > 0) {
    auto var_bias = scope.Var("Bias");
166
    auto tensor_bias = var_bias->GetMutable<phi::DenseTensor>();
167 168
    framework::TensorFromVector(bias, ctx, tensor_bias);
    tensor_bias->Resize({cols});
H
hong 已提交
169 170

    bias_opt = *tensor_bias;
171 172 173
  }

  auto var_x = scope.Var("X");
174
  auto tensor_x = var_x->GetMutable<phi::DenseTensor>();
175 176 177 178
  framework::TensorFromVector(x, ctx, tensor_x);
  tensor_x->Resize({rows, cols});

  auto var_y = scope.Var("Y");
179
  auto tensor_y = var_y->GetMutable<phi::DenseTensor>();
H
hong 已提交
180
  tensor_y->Resize({rows, cols});
181 182

  auto var_mean = scope.Var("Mean");
183
  auto tensor_mean = var_mean->GetMutable<phi::DenseTensor>();
H
hong 已提交
184
  tensor_mean->Resize({rows});
185 186

  auto var_variance = scope.Var("Variance");
187
  auto tensor_variance = var_variance->GetMutable<phi::DenseTensor>();
H
hong 已提交
188 189
  tensor_variance->Resize({rows});
  ctx.Wait();
190 191 192 193 194 195 196 197 198
  phi::LayerNormKernel<T>(static_cast<const phi::GPUContext &>(ctx),
                          *tensor_x,
                          scale_opt,
                          bias_opt,
                          1e-5,
                          1,
                          tensor_y,
                          tensor_mean,
                          tensor_variance);
199 200 201 202 203 204
  framework::TensorToVector(*tensor_y, ctx, y);
  framework::TensorToVector(*tensor_mean, ctx, means);
  framework::TensorToVector(*tensor_variance, ctx, vars);
  ctx.Wait();
}

205
template <typename T>
206 207 208 209
inline void ReduceSum(const std::vector<T> &dout,
                      std::vector<T> *dbias,
                      const int rows,
                      const int cols) {
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
  for (int j = 0; j < cols; j++) {
    std::vector<T> tmp_dbias(rows);
    for (int i = 0; i < rows; i++) {
      tmp_dbias[i] = dout[i * cols + j];
    }
    int tmp_rows = rows / 2;
    while (tmp_rows) {
      for (int i = 0; i < tmp_rows; i++) {
        tmp_dbias[i] += tmp_dbias[i + tmp_rows];
      }
      tmp_rows /= 2;
    }
    (*dbias)[j] = tmp_dbias[0];
  }
}