// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/reduce_ops/cub_reduce.h" #include "paddle/fluid/operators/trace_op.h" namespace paddle { namespace operators { struct IdentityFunctor { HOSTDEVICE explicit inline IdentityFunctor() {} template HOSTDEVICE inline U operator()(const U& x) const { return x; } }; template class TraceCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* input = context.Input("Input"); auto* out = context.Output("Out"); const int64_t offset = context.Attr("offset"); const int64_t dim1 = context.Attr("axis1"); const int64_t dim2 = context.Attr("axis2"); T* out_data = out->mutable_data(context.GetPlace()); const framework::Tensor diag = Diagonal(context, input, offset, dim1, dim2); if (diag.numel() > 0) { auto stream = context.cuda_device_context().stream(); std::vector reduce_dims; reduce_dims.push_back(out->dims().size()); TensorReduce( diag, out, reduce_dims, static_cast(0), cub::Sum(), IdentityFunctor(), stream); } else { math::SetConstant functor; functor(context.device_context(), out, static_cast(0)); } } }; } // namespace operators } // namespace paddle namespace ops = paddle::operators; namespace platform = paddle::platform; REGISTER_OP_CUDA_KERNEL( trace, ops::TraceCUDAKernel, ops::TraceCUDAKernel, ops::TraceCUDAKernel, ops::TraceCUDAKernel, ops::TraceCUDAKernel, ops::TraceCUDAKernel>, ops::TraceCUDAKernel>); REGISTER_OP_CUDA_KERNEL( trace_grad, ops::TraceGradKernel, ops::TraceGradKernel, ops::TraceGradKernel, ops::TraceGradKernel, ops::TraceGradKernel, ops::TraceGradKernel>, ops::TraceGradKernel>);