未验证 提交 669353c1 编写于 作者: S seemingwang 提交者: GitHub

move renorm op (#44676)

* move renorm op

* change python api

* change op class func

* alloc data

* remove comments

* fix grad arguments

* fix python argument

* fix python argument

* change unit-test

* remove shape func registration

* recover extra-arguments

* recover shape functor
上级 859c4077
...@@ -12,15 +12,14 @@ ...@@ -12,15 +12,14 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/operators/renorm_op.h"
#include <memory> #include <memory>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/framework/op_registry.h"
#endif #include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/unary.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -29,15 +28,6 @@ class RenormOp : public framework::OperatorWithKernel { ...@@ -29,15 +28,6 @@ class RenormOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
using DDim = paddle::framework::DDim; using DDim = paddle::framework::DDim;
void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "abs");
OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "abs");
auto in_dims = ctx->GetInputDim("X");
ctx->SetOutputDim("Out", in_dims);
ctx->ShareLoD("X", /*->*/ "Out");
}
}; };
class RenormOpMaker : public framework::OpProtoAndCheckerMaker { class RenormOpMaker : public framework::OpProtoAndCheckerMaker {
...@@ -70,26 +60,6 @@ This operator is used to scale tensor sliced by axis if its p-norm execeeds maxn ...@@ -70,26 +60,6 @@ This operator is used to scale tensor sliced by axis if its p-norm execeeds maxn
class RenormGradOp : public framework::OperatorWithKernel { class RenormGradOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")),
"Input",
"Out@Grad",
"AbsGrad");
OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")),
"Output",
"X@Grad",
"AbsGrad");
auto dout_dims = ctx->GetInputDim(framework::GradVarName("Out"));
ctx->SetOutputDim(framework::GradVarName("X"), dout_dims);
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto dtype = OperatorWithKernel::IndicateVarDataType(ctx, "X");
return framework::OpKernelType(dtype, ctx.GetPlace());
}
}; };
template <typename T> template <typename T>
...@@ -110,18 +80,19 @@ class RenormGradMaker : public framework::SingleGradOpMaker<T> { ...@@ -110,18 +80,19 @@ class RenormGradMaker : public framework::SingleGradOpMaker<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
DECLARE_INFER_SHAPE_FUNCTOR(renorm,
RenormInferShapeFunctor,
PD_INFER_META(phi::UnchangedInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(renorm_grad,
RenormGradInferShapeFunctor,
PD_INFER_META(phi::UnchangedInferMeta));
REGISTER_OPERATOR(renorm, REGISTER_OPERATOR(renorm,
ops::RenormOp, ops::RenormOp,
ops::RenormOpMaker, ops::RenormOpMaker,
ops::RenormGradMaker<paddle::framework::OpDesc>, ops::RenormGradMaker<paddle::framework::OpDesc>,
ops::RenormGradMaker<paddle::imperative::OpBase>); ops::RenormGradMaker<paddle::imperative::OpBase>,
RenormInferShapeFunctor)
REGISTER_OPERATOR(renorm_grad, ops::RenormGradOp);
REGISTER_OP_CPU_KERNEL(renorm,
ops::CPURenormKernel<float>,
ops::CPURenormKernel<double>);
REGISTER_OP_CPU_KERNEL(renorm_grad, REGISTER_OPERATOR(renorm_grad, ops::RenormGradOp, RenormGradInferShapeFunctor);
ops::CPURenormGradKernel<float>,
ops::CPURenormGradKernel<double>);
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "math.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/kernels/funcs/complex_functors.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
// template <typename T>
// struct NormDimValueFunctor<T> {
// NormDimValueFunctor(T* input, T* output, int64_t dim_divisor, int64_t
// dimension_each, float p)
// : input_(input), output_(output),dim_divisor_(dim_divisor),
// dimension_each_(dimension_each),p_(p) {}
// HOSTDEVICE void operator()(int64_t i) const {
// auto dim_index = i / dim_divsor % dimension_each;
// dim_value[dim_index] += std::pow(std::abs(input[i]), p);
// }
// T* input_;
// T* output_;
// int64_t dimension_each_, dim_divisor_;
// float p_,max_norm_;
// };
// template <typename DeviceContext, typename T>
template <typename T>
class CPURenormKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const Tensor* x = context.Input<Tensor>("X");
Tensor* out = context.Output<Tensor>("Out");
auto numel = x->numel();
auto* x_data = x->data<T>();
auto input_dims = x->dims();
float max_norm = context.Attr<float>("max_norm");
float p = context.Attr<float>("p");
int dim = context.Attr<int>("axis");
auto dimension_each = input_dims[dim];
auto dim_size = input_dims.size();
int64_t dim_divisor = 1;
for (int i = dim + 1; i < dim_size; i++) dim_divisor *= input_dims[i];
// auto& dev_ctx = ctx.template device_context<DeviceContext>();
// std::vector<int64_t> dim_index(dim_size, 0);
std::vector<T> dim_value(dimension_each,
0); // dim_value = (x1^p + x2^p + x3^p....)^(1/p)
auto* out_data =
out->mutable_data<T>(context.GetPlace(), size_t(numel * sizeof(T)));
int64_t index = 0, dim_index = 0;
for (int64_t i = 0; i < numel; i++) {
// auto dim_index = i / dim_divsor % dimension_each;
dim_value[dim_index] += std::pow(std::abs(x_data[i]), p);
index++;
if (index == dim_divisor) {
dim_index++;
if (dim_index == dimension_each) {
dim_index = 0;
}
index = 0;
}
}
for (int64_t i = 0; i < dimension_each; i++) {
dim_value[i] = std::pow(dim_value[i], 1.0 / p);
if (dim_value[i] > max_norm)
dim_value[i] = max_norm / dim_value[i];
else
dim_value[i] = 1.0;
// dim_index[i] = 0;
}
index = dim_index = 0;
for (int64_t i = 0; i < numel; i++) {
// auto dim_index = i / dim_divsor % dimension_each;
out_data[i] = dim_value[dim_index] < 1.0
? dim_value[dim_index] * x_data[i]
: x_data[i];
index++;
if (index == dim_divisor) {
dim_index++;
if (dim_index == dimension_each) {
dim_index = 0;
}
index = 0;
}
}
}
};
// template <typename DeviceContext, typename T>
template <typename T>
class CPURenormGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const {
const framework::Tensor* d_out =
ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
const framework::Tensor* x = ctx.Input<framework::Tensor>("X");
framework::Tensor* d_x =
ctx.Output<framework::Tensor>(framework::GradVarName("X"));
auto numel = d_out->numel();
auto* dout_data = d_out->data<T>();
auto* x_data = x->data<T>();
auto input_dims = x->dims();
float max_norm = ctx.Attr<float>("max_norm");
float p = ctx.Attr<float>("p");
int dim = ctx.Attr<int>("axis");
auto dimension_each = input_dims[dim];
auto dim_size = input_dims.size();
int64_t dim_divisor = 1;
for (int i = dim + 1; i < dim_size; i++) dim_divisor *= input_dims[i];
auto* dx_data = d_x->mutable_data<T>(
ctx.GetPlace(), static_cast<size_t>(numel * sizeof(T)));
std::vector<T> dim_value(dimension_each, 0),
dim_power_sum(dimension_each, 0),
weight_derivative(dimension_each, 0.0);
int64_t index = 0, dim_index = 0;
for (int64_t i = 0; i < numel; i++) {
// auto dim_index = i / dim_divsor % dimension_each;
dim_value[dim_index] += std::pow(std::abs(x_data[i]), p);
index++;
if (index == dim_divisor) {
dim_index++;
if (dim_index == dimension_each) {
dim_index = 0;
}
index = 0;
}
}
for (int64_t i = 0; i < dimension_each; i++) {
auto temp = std::pow(dim_value[i], 1.0 / p);
if (temp > max_norm) {
dim_power_sum[i] =
std::pow(dim_value[i], (T)(-1.0 - 1.0 / p)) * -1 * max_norm;
dim_value[i] = max_norm / temp;
} else
dim_value[i] = 1.0;
}
index = dim_index = 0;
for (int64_t i = 0; i < numel; i++) {
// auto dim_index = i / dim_divsor % dimension_each;
dx_data[i] = dim_value[dim_index] * dout_data[i];
weight_derivative[dim_index] += x_data[i] * dout_data[i];
index++;
if (index == dim_divisor) {
dim_index++;
if (dim_index == dimension_each) {
dim_index = 0;
}
index = 0;
}
}
index = dim_index = 0;
for (int64_t i = 0; i < numel; i++) {
// auto dim_index = i / dim_divsor % dimension_each;
dx_data[i] += weight_derivative[dim_index] * dim_power_sum[dim_index] *
std::pow(std::abs(x_data[i]), p - 1.0) *
(x_data[i] >= 0 ? 1 : -1);
index++;
if (index == dim_divisor) {
dim_index++;
if (dim_index == dimension_each) {
dim_index = 0;
}
index = 0;
}
}
}
};
} // namespace operators
} // namespace paddle
...@@ -1931,6 +1931,16 @@ ...@@ -1931,6 +1931,16 @@
func : relu6 func : relu6
backward : relu6_grad backward : relu6_grad
- api : renorm
args : (Tensor x, float p, int axis, float max_norm)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : renorm
backward : renorm_grad
- api : reshape - api : reshape
args : (Tensor x, IntArray shape) args : (Tensor x, IntArray shape)
output : Tensor(out), Tensor(xshape) output : Tensor(out), Tensor(xshape)
......
...@@ -1780,6 +1780,16 @@ ...@@ -1780,6 +1780,16 @@
backward: relu_double_grad backward: relu_double_grad
inplace : (out_grad -> x_grad) inplace : (out_grad -> x_grad)
- backward_api : renorm_grad
forward : renorm (Tensor x, float p, int axis, float max_norm) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float p, int axis, float max_norm)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out_grad]
kernel :
func : renorm_grad
- backward_api : reshape_double_grad - backward_api : reshape_double_grad
forward : reshape_grad (Tensor xshape, Tensor grad_out) -> Tensor(grad_x) forward : reshape_grad (Tensor xshape, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor grad_out, Tensor grad_x_grad) args : (Tensor grad_out, Tensor grad_x_grad)
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/renorm_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/renorm_grad_kernel_impl.h"
PD_REGISTER_KERNEL(
renorm_grad, CPU, ALL_LAYOUT, phi::RenormGradKernel, float, double) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/renorm_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/renorm_kernel_impl.h"
PD_REGISTER_KERNEL(renorm, CPU, ALL_LAYOUT, phi::RenormKernel, float, double) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/renorm_grad_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/renorm_grad_kernel_impl.h"
PD_REGISTER_KERNEL(
renorm_grad, GPU, ALL_LAYOUT, phi::RenormGradKernel, float, double) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/renorm_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/renorm_kernel_impl.h"
PD_REGISTER_KERNEL(renorm, GPU, ALL_LAYOUT, phi::RenormKernel, float, double) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/impl/renorm_impl.h"
#include "paddle/phi/kernels/renorm_grad_kernel.h"
namespace phi {
template <typename T, typename Context>
void RenormGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& dout,
float p,
int axis,
float max_norm,
DenseTensor* dx) {
int64_t numel = dout.numel();
const T* dout_data = dout.template data<T>();
const T* x_data = x.template data<T>();
auto input_dims = x.dims();
int dim = axis;
auto dimension_each = input_dims[dim];
dx->Resize(x.dims());
dev_ctx.template Alloc<T>(dx);
phi::funcs::RenormGradFunc(dev_ctx,
x_data,
dout_data,
dx->data<T>(),
p,
dim,
max_norm,
dimension_each,
input_dims,
numel);
}
} // namespace phi
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,19 +12,143 @@ ...@@ -12,19 +12,143 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <algorithm> #pragma once
#include <cstdio>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/device_context.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h" #include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
#include "paddle/fluid/operators/renorm_op.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
namespace paddle { #if defined(__NVCC__) || defined(__HIPCC__)
namespace operators { #include "paddle/phi/kernels/funcs/reduce_function.h"
#include "paddle/phi/kernels/primitive/functor_primitives.h"
#ifdef __NVCC__
#include "cub/cub.cuh"
#else
#include <hipcub/hipcub.hpp>
namespace cub = hipcub;
#endif
#endif
namespace phi {
namespace funcs {
template <typename T>
void RenormFunc(const phi::CPUContext& ctx,
const T* x_data,
T* out_data,
float p,
int dim,
float max_norm,
int64_t dimension_each,
phi::DDim& input_dims,
int64_t numel) {
auto dim_size = input_dims.size();
int64_t dim_divisor = 1;
for (int i = dim + 1; i < dim_size; i++) dim_divisor *= input_dims[i];
std::vector<T> dim_value(dimension_each,
0); // dim_value = (x1^p + x2^p + x3^p....)^(1/p)
int64_t index = 0, dim_index = 0;
for (int64_t i = 0; i < numel; i++) {
dim_value[dim_index] += std::pow(std::abs(x_data[i]), p);
index++;
if (index == dim_divisor) {
dim_index++;
if (dim_index == dimension_each) {
dim_index = 0;
}
index = 0;
}
}
for (int64_t i = 0; i < dimension_each; i++) {
dim_value[i] = std::pow(dim_value[i], 1.0 / p);
if (dim_value[i] > max_norm)
dim_value[i] = max_norm / dim_value[i];
else
dim_value[i] = 1.0;
}
index = dim_index = 0;
for (int64_t i = 0; i < numel; i++) {
out_data[i] = dim_value[dim_index] < 1.0 ? dim_value[dim_index] * x_data[i]
: x_data[i];
index++;
if (index == dim_divisor) {
dim_index++;
if (dim_index == dimension_each) {
dim_index = 0;
}
index = 0;
}
}
}
template <typename T>
void RenormGradFunc(const phi::CPUContext& ctx,
const T* x_data,
const T* dout_data,
T* dx_data,
float p,
int dim,
float max_norm,
int64_t dimension_each,
phi::DDim& input_dims,
int64_t numel) {
auto dim_size = input_dims.size();
int64_t dim_divisor = 1;
for (int i = dim + 1; i < dim_size; i++) dim_divisor *= input_dims[i];
std::vector<T> dim_value(dimension_each, 0), dim_power_sum(dimension_each, 0),
weight_derivative(dimension_each, 0.0);
int64_t index = 0, dim_index = 0;
for (int64_t i = 0; i < numel; i++) {
dim_value[dim_index] += std::pow(std::abs(x_data[i]), p);
index++;
if (index == dim_divisor) {
dim_index++;
if (dim_index == dimension_each) {
dim_index = 0;
}
index = 0;
}
}
for (int64_t i = 0; i < dimension_each; i++) {
auto temp = std::pow(dim_value[i], 1.0 / p);
if (temp > max_norm) {
dim_power_sum[i] =
std::pow(dim_value[i], (T)(-1.0 - 1.0 / p)) * -1 * max_norm;
dim_value[i] = max_norm / temp;
} else
dim_value[i] = 1.0;
}
index = dim_index = 0;
for (int64_t i = 0; i < numel; i++) {
dx_data[i] = dim_value[dim_index] * dout_data[i];
weight_derivative[dim_index] += x_data[i] * dout_data[i];
index++;
if (index == dim_divisor) {
dim_index++;
if (dim_index == dimension_each) {
dim_index = 0;
}
index = 0;
}
}
index = dim_index = 0;
for (int64_t i = 0; i < numel; i++) {
dx_data[i] += weight_derivative[dim_index] * dim_power_sum[dim_index] *
std::pow(std::abs(x_data[i]), p - 1.0) *
(x_data[i] >= 0 ? 1 : -1);
index++;
if (index == dim_divisor) {
dim_index++;
if (dim_index == dimension_each) {
dim_index = 0;
}
index = 0;
}
}
}
#if defined(__NVCC__) || defined(__HIPCC__)
__device__ __forceinline__ float inline_pow(float base, float exponent) { __device__ __forceinline__ float inline_pow(float base, float exponent) {
return pow(base, exponent); return pow(base, exponent);
} }
...@@ -77,6 +201,17 @@ __global__ void RenormKernelFunc4(const T* x_data, ...@@ -77,6 +201,17 @@ __global__ void RenormKernelFunc4(const T* x_data,
} }
} }
template <typename T>
__global__ void RenormElementwisePow(const T* x_data,
T* pow_value,
int64_t size,
float p) {
int64_t i = ((int64_t)blockIdx.x) * blockDim.x + threadIdx.x;
if (i < size) {
pow_value[i] = pow(abs(x_data[i]), (T)p);
}
}
template <typename T> template <typename T>
__global__ void RenormGradKernelFunc1(const T* x_data, __global__ void RenormGradKernelFunc1(const T* x_data,
const T* dout_data, const T* dout_data,
...@@ -129,150 +264,99 @@ __global__ void RenormGradKernelFunc2(const T* x_data, ...@@ -129,150 +264,99 @@ __global__ void RenormGradKernelFunc2(const T* x_data,
} }
template <typename T> template <typename T>
class CUDARenormKernel : public framework::OpKernel<T> { void RenormFunc(const phi::GPUContext& ctx,
public: const T* x_data,
void Compute(const framework::ExecutionContext& context) const override { T* out_data,
const Tensor* x = context.Input<Tensor>("X"); float p,
Tensor* out = context.Output<Tensor>("Out"); int dim,
auto numel = x->numel(); float max_norm,
const T* x_data = x->data<T>(); int64_t dimension_each,
auto input_dims = x->dims(); phi::DDim& input_dims,
float max_norm = context.Attr<float>("max_norm"); int64_t numel) {
float p = context.Attr<float>("p"); auto dim_size = input_dims.size();
int dim = context.Attr<int>("axis"); DenseTensor pow_value, dim_value;
auto dimension_each = input_dims[dim]; int64_t dim_divisor = 1, pre_mul = 1;
auto dim_size = input_dims.size(); for (int i = dim + 1; i < dim_size; i++) dim_divisor *= input_dims[i];
framework::Tensor pow_value, dim_value; for (int i = 0; i < dim; i++) pre_mul *= input_dims[i];
int64_t dim_divisor = 1, pre_mul = 1; pow_value.Resize(phi::make_ddim({pre_mul, dimension_each, dim_divisor}));
for (int i = dim + 1; i < dim_size; i++) dim_divisor *= input_dims[i]; dim_value.Resize(phi::make_ddim({dimension_each}));
for (int i = 0; i < dim; i++) pre_mul *= input_dims[i]; T* pow_value_data = ctx.template Alloc<T>(&pow_value);
pow_value.Resize(phi::make_ddim({pre_mul, dimension_each, dim_divisor})); T* dim_value_data = ctx.template Alloc<T>(&dim_value);
dim_value.Resize(phi::make_ddim({dimension_each})); auto stream = ctx.stream();
pow_value.mutable_data<T>(context.GetPlace()); int block = std::min(numel, static_cast<int64_t>(256));
out->Resize(phi::make_ddim(phi::vectorize(input_dims))); int grid = (numel + block - 1) / block;
T* out_data = out->mutable_data<T>(context.GetPlace()); RenormElementwisePow<T>
auto stream = context.cuda_device_context().stream(); <<<grid, block, 0, stream>>>(x_data, pow_value_data, numel, p);
int block = std::min(numel, static_cast<int64_t>(256)); int block2 = std::min(dimension_each, static_cast<int64_t>(256));
using MT = typename details::MPTypeTrait<T>::Type; int grid2 = (dimension_each + block2 - 1) / block2;
int grid = (numel + block - 1) / block; std::vector<int> reduce_axis = {0, 2};
phi::funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
int block2 = std::min(dimension_each, static_cast<int64_t>(256)); ctx, pow_value, &dim_value, kps::IdentityFunctor<T>(), reduce_axis);
int grid2 = (dimension_each + block2 - 1) / block2; RenormKernelFunc3<T>
std::vector<const framework::Tensor*> ins = {x}; <<<grid2, block2, 0, stream>>>(numel, dim_value_data, p, max_norm);
std::vector<framework::Tensor*> outs = {&pow_value}; RenormKernelFunc4<T><<<grid, block, 0, stream>>>(
auto func = UnsignedPowFunctor<MT, T>(p); x_data, out_data, numel, dim_value_data, dimension_each, dim_divisor);
const auto& cuda_ctx = context.template device_context<phi::GPUContext>(); }
paddle::operators::LaunchSameDimsElementwiseCudaKernel<T>(
cuda_ctx, ins, &outs, func);
std::vector<int> reduce_axis = {0, 2};
TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
cuda_ctx,
pow_value,
&dim_value,
kps::IdentityFunctor<T>(),
reduce_axis,
stream);
RenormKernelFunc3<T><<<grid2, block2, 0, stream>>>(
numel, dim_value.mutable_data<T>(context.GetPlace()), p, max_norm);
RenormKernelFunc4<T><<<grid, block, 0, stream>>>(
x_data,
out_data,
numel,
dim_value.mutable_data<T>(context.GetPlace()),
dimension_each,
dim_divisor);
// platform::GpuStreamSync(stream);
}
};
template <typename T> template <typename T>
class CUDAGradRenormKernel : public framework::OpKernel<T> { void RenormGradFunc(const phi::GPUContext& ctx,
public: const T* x_data,
void Compute(const framework::ExecutionContext& ctx) const override { const T* dout_data,
const framework::Tensor* d_out = T* dx_data,
ctx.Input<framework::Tensor>(framework::GradVarName("Out")); float p,
const framework::Tensor* x = ctx.Input<framework::Tensor>("X"); int dim,
framework::Tensor* d_x = float max_norm,
ctx.Output<framework::Tensor>(framework::GradVarName("X")); int64_t dimension_each,
phi::DDim& input_dims,
auto numel = d_out->numel(); int64_t numel) {
const T* dout_data = d_out->data<T>(); auto dim_size = input_dims.size();
const T* x_data = x->data<T>(); int64_t dim_divisor = 1, pre_mul = 1;
auto input_dims = x->dims(); for (int i = dim + 1; i < dim_size; i++) dim_divisor *= input_dims[i];
float max_norm = ctx.Attr<float>("max_norm"); for (int i = 0; i < dim; i++) pre_mul *= input_dims[i];
float p = ctx.Attr<float>("p"); DenseTensor pow_value, mul_value, dim_value, dim_power_sum, weight_derivative;
int dim = ctx.Attr<int>("axis"); pow_value.Resize(phi::make_ddim({pre_mul, dimension_each, dim_divisor}));
auto dimension_each = input_dims[dim]; mul_value.Resize(phi::make_ddim({pre_mul, dimension_each, dim_divisor}));
auto dim_size = input_dims.size(); dim_value.Resize(phi::make_ddim({dimension_each}));
int64_t dim_divisor = 1, pre_mul = 1; dim_power_sum.Resize(phi::make_ddim({dimension_each}));
for (int i = dim + 1; i < dim_size; i++) dim_divisor *= input_dims[i]; weight_derivative.Resize(phi::make_ddim({dimension_each}));
for (int i = 0; i < dim; i++) pre_mul *= input_dims[i]; auto stream = ctx.stream();
d_x->Resize(phi::make_ddim(phi::vectorize(input_dims))); int block = std::min(numel, static_cast<int64_t>(256));
T* dx_data = d_x->mutable_data<T>(ctx.GetPlace()); int grid = (numel + block - 1) / block;
framework::Tensor pow_value, mul_value, dim_value, dim_power_sum, T* pow_value_data = ctx.template Alloc<T>(&pow_value);
weight_derivative; T* mul_value_data = ctx.template Alloc<T>(&mul_value);
pow_value.Resize(phi::make_ddim({pre_mul, dimension_each, dim_divisor})); T* dim_value_data = ctx.template Alloc<T>(&dim_value);
mul_value.Resize(phi::make_ddim({pre_mul, dimension_each, dim_divisor})); T* dim_power_sum_data = ctx.template Alloc<T>(&dim_power_sum);
dim_value.Resize(phi::make_ddim({dimension_each})); T* weight_derivative_data = ctx.template Alloc<T>(&weight_derivative);
dim_power_sum.Resize(phi::make_ddim({dimension_each})); RenormGradKernelFunc1<T><<<grid, block, 0, stream>>>(x_data,
weight_derivative.Resize(phi::make_ddim({dimension_each})); dout_data,
auto stream = ctx.cuda_device_context().stream(); pow_value_data,
int block = std::min(numel, static_cast<int64_t>(256)); mul_value_data,
int grid = (numel + block - 1) / block; numel,
pow_value.mutable_data<T>(ctx.GetPlace()); dimension_each,
mul_value.mutable_data<T>(ctx.GetPlace()); p,
dim_value.mutable_data<T>(ctx.GetPlace()); dim_divisor);
dim_power_sum.mutable_data<T>(ctx.GetPlace()); std::vector<int> reduce_axis = {0, 2};
weight_derivative.mutable_data<T>(ctx.GetPlace()); phi::funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
RenormGradKernelFunc1<T> ctx, pow_value, &dim_value, kps::IdentityFunctor<T>(), reduce_axis);
<<<grid, block, 0, stream>>>(x_data, phi::funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
dout_data, ctx,
pow_value.mutable_data<T>(ctx.GetPlace()), mul_value,
mul_value.mutable_data<T>(ctx.GetPlace()), &weight_derivative,
numel, kps::IdentityFunctor<T>(),
dimension_each, reduce_axis);
p, RenormGradKernelFunc2<T><<<grid, block, 0, stream>>>(x_data,
dim_divisor); dout_data,
std::vector<int> reduce_axis = {0, 2}; dx_data,
TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>( numel,
ctx.cuda_device_context(), dim_value_data,
pow_value, dim_power_sum_data,
&dim_value, weight_derivative_data,
kps::IdentityFunctor<T>(), dimension_each,
reduce_axis, p,
stream); max_norm,
TensorReduceImpl<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>( dim_divisor);
ctx.cuda_device_context(), }
mul_value, #endif
&weight_derivative,
kps::IdentityFunctor<T>(),
reduce_axis,
stream);
RenormGradKernelFunc2<T><<<grid, block, 0, stream>>>(
x_data,
dout_data,
dx_data,
numel,
dim_value.mutable_data<T>(ctx.GetPlace()),
dim_power_sum.mutable_data<T>(ctx.GetPlace()),
weight_derivative.mutable_data<T>(ctx.GetPlace()),
dimension_each,
p,
max_norm,
dim_divisor);
// platform::GpuStreamSync(stream);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(renorm,
ops::CUDARenormKernel<float>,
ops::CUDARenormKernel<double>);
REGISTER_OP_CUDA_KERNEL(renorm_grad, } // namespace funcs
ops::CUDAGradRenormKernel<float>, } // namespace phi
ops::CUDAGradRenormKernel<double>);
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/impl/renorm_impl.h"
#include "paddle/phi/kernels/renorm_kernel.h"
namespace phi {
template <typename T, typename Context>
void RenormKernel(const Context& dev_ctx,
const DenseTensor& x,
float p,
int axis,
float max_norm,
DenseTensor* out) {
out->Resize(x.dims());
dev_ctx.template Alloc<T>(out);
auto x_ptr = x.template data<T>();
auto numel = x.numel();
int dim = axis;
auto input_dims = x.dims();
auto dimension_each = input_dims[dim];
phi::funcs::RenormFunc(dev_ctx,
x_ptr,
out->data<T>(),
p,
axis,
max_norm,
dimension_each,
input_dims,
numel);
}
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
template <typename T, typename Context>
void RenormGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& dout,
float p,
int axis,
float max_norm,
DenseTensor* dx);
}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
template <typename T, typename Context>
void RenormKernel(const Context& dev_ctx,
const DenseTensor& x,
float p,
int axis,
float max_norm,
DenseTensor* out);
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/core/compat/op_utils.h"
namespace phi {
KernelSignature RenormOpArgumentMapping(const ArgumentMappingContext& ctx) {
VLOG(3) << "in renrom arguments mapping";
return KernelSignature("renorm", {"X"}, {"p", "axis", "max_norm"}, {"Out"});
}
KernelSignature RenormGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
VLOG(3) << "in renrom grad arguments mapping";
return KernelSignature(
"renorm_grad", {"X", "Out@GRAD"}, {"p", "axis", "max_norm"}, {"X@GRAD"});
}
} // namespace phi
PD_REGISTER_ARG_MAPPING_FN(renorm, phi::RenormOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(renorm_grad, phi::RenormGradOpArgumentMapping);
...@@ -1752,6 +1752,10 @@ py_test_modules( ...@@ -1752,6 +1752,10 @@ py_test_modules(
set_tests_properties(test_add_reader_dependency_for_interpretercore set_tests_properties(test_add_reader_dependency_for_interpretercore
PROPERTIES TIMEOUT 120) PROPERTIES TIMEOUT 120)
py_test_modules(test_renorm_op_without_eager MODULES test_renorm_op ENVS
FLAGS_enable_eager_mode=0)
set_tests_properties(test_renorm_op_without_eager PROPERTIES TIMEOUT 120)
py_test_modules( py_test_modules(
test_eager_deletion_padding_rnn_for_interpretercore MODULES test_eager_deletion_padding_rnn_for_interpretercore MODULES
test_eager_deletion_padding_rnn ENVS FLAGS_CONVERT_GRAPH_TO_PROGRAM=true) test_eager_deletion_padding_rnn ENVS FLAGS_CONVERT_GRAPH_TO_PROGRAM=true)
...@@ -71,7 +71,7 @@ class TestRenormAPI(unittest.TestCase): ...@@ -71,7 +71,7 @@ class TestRenormAPI(unittest.TestCase):
[[0, 0.01045918, 0.00683333], [[0, 0.01045918, 0.00683333],
[0, 0.01394558, 0.00683333]]]) [0, 0.01394558, 0.00683333]]])
self.assertTrue(np.allclose(expected_grad, np.array(x.grad))) self.assertTrue(np.allclose(expected_grad, np.array(x.grad)))
#test exception: # #test exception:
with fluid.dygraph.guard(): with fluid.dygraph.guard():
input = [[[2.0, 2, -2], [3, 0.3, 3]], [[2, -8, 2], [3.1, 3.7, 3]]] input = [[[2.0, 2, -2], [3, 0.3, 3]], [[2, -8, 2], [3.1, 3.7, 3]]]
x = paddle.to_tensor(input, stop_gradient=False) x = paddle.to_tensor(input, stop_gradient=False)
......
...@@ -1799,7 +1799,10 @@ def renorm(x, p, axis, max_norm): ...@@ -1799,7 +1799,10 @@ def renorm(x, p, axis, max_norm):
if not axis >= -1 * len(input_shape): if not axis >= -1 * len(input_shape):
raise ValueError("the axis:{} should not be less than -1 * length of input_shape:{}".format(axis,-1 * len(input_shape))) raise ValueError("the axis:{} should not be less than -1 * length of input_shape:{}".format(axis,-1 * len(input_shape)))
axis = axis + len(input_shape) axis = axis + len(input_shape)
if paddle.in_dynamic_mode(): if in_dygraph_mode():
out = _C_ops.final_state_renorm(x, p, axis, max_norm)
return out
elif _in_legacy_dygraph():
out = _C_ops.renorm(x, 'p',p, 'axis',axis, 'max_norm', max_norm) out = _C_ops.renorm(x, 'p',p, 'axis',axis, 'max_norm', max_norm)
return out return out
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册