From 930a5136078b0ad32b4846631fcf9ce982adb9c2 Mon Sep 17 00:00:00 2001 From: Zhou Wei <1183042833@qq.com> Date: Mon, 14 Mar 2022 15:45:26 +0800 Subject: [PATCH] [Phi] Migrate triangular_solve dependence to phi (#40417) --- paddle/fluid/operators/lstsq_op.cu | 15 +++-- paddle/fluid/operators/lstsq_op.h | 1 - paddle/fluid/operators/lu_op.h | 28 +++++--- paddle/fluid/operators/math/matrix_solve.cc | 39 ----------- .../fluid/operators/math/matrix_solve.cu.cc | 61 ------------------ paddle/fluid/operators/math/matrix_solve.h | 8 --- paddle/fluid/operators/triangular_solve_op.cc | 2 - paddle/fluid/operators/triangular_solve_op.h | 64 ------------------- .../kernels/cpu/triangular_solve_kernel.cc | 1 + 9 files changed, 31 insertions(+), 188 deletions(-) delete mode 100644 paddle/fluid/operators/triangular_solve_op.h diff --git a/paddle/fluid/operators/lstsq_op.cu b/paddle/fluid/operators/lstsq_op.cu index 92c9857f0b9..10e2867bf29 100644 --- a/paddle/fluid/operators/lstsq_op.cu +++ b/paddle/fluid/operators/lstsq_op.cu @@ -17,9 +17,11 @@ #include #include +#include "paddle/fluid/framework/phi_utils.h" #include "paddle/fluid/operators/lstsq_op.h" #include "paddle/fluid/operators/qr_op.h" #include "paddle/fluid/platform/dynload/cusolver.h" +#include "paddle/phi/kernels/triangular_solve_kernel.h" namespace paddle { namespace operators { @@ -70,6 +72,10 @@ class LstsqCUDAKernel : public framework::OpKernel { Tensor tau = dito.Fill(tau_dims_vec, 0); auto tau_data = tau.mutable_data(context.GetPlace()); + using Context = + typename framework::ConvertToPhiContext::TYPE; + auto& phi_dev_ctx = static_cast(dev_ctx); + if (m >= n) { Tensor tmp_x = dito.Transpose(new_x); Tensor tmp_y = dito.Transpose(new_y); @@ -93,8 +99,9 @@ class LstsqCUDAKernel : public framework::OpKernel { Tensor slice_y = dito.Slice(trans_y, {-2}, {0}, {min_mn}); // Step 3, solve R X = Y - triangular_solve(dev_ctx, res_r, slice_y, solution, - true, false, false); + phi::TriangularSolveKernel(phi_dev_ctx, res_r, slice_y, true, + false, false, solution); + } else { auto x_data = new_x.mutable_data(context.GetPlace()); auto y_data = new_y.mutable_data(context.GetPlace()); @@ -105,8 +112,8 @@ class LstsqCUDAKernel : public framework::OpKernel { // Step 2, solve R^H Z = Y Tensor trans_r = dito.Transpose(new_x); - triangular_solve(dev_ctx, trans_r, new_y, solution, - true, true, false); + phi::TriangularSolveKernel(phi_dev_ctx, trans_r, new_y, true, + true, false, solution); // Step 3, X <- Q Z BatchedOrgqr(dev_ctx, batch_count, n, n, min_mn, x_data, diff --git a/paddle/fluid/operators/lstsq_op.h b/paddle/fluid/operators/lstsq_op.h index 3cbbc62e7be..520722dafcb 100644 --- a/paddle/fluid/operators/lstsq_op.h +++ b/paddle/fluid/operators/lstsq_op.h @@ -22,7 +22,6 @@ #include "paddle/fluid/operators/math/matrix_solve.h" #include "paddle/fluid/operators/svd_helper.h" #include "paddle/fluid/operators/transpose_op.h" -#include "paddle/fluid/operators/triangular_solve_op.h" #include "paddle/fluid/platform/for_range.h" #include "paddle/phi/kernels/funcs/complex_functors.h" #include "paddle/phi/kernels/funcs/lapack/lapack_function.h" diff --git a/paddle/fluid/operators/lu_op.h b/paddle/fluid/operators/lu_op.h index f323e2e041d..214b2eccae9 100644 --- a/paddle/fluid/operators/lu_op.h +++ b/paddle/fluid/operators/lu_op.h @@ -15,12 +15,13 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/phi_utils.h" #include "paddle/fluid/operators/set_value_op.h" #include "paddle/fluid/operators/svd_helper.h" -#include "paddle/fluid/operators/triangular_solve_op.h" #include "paddle/fluid/operators/tril_triu_op.h" #include "paddle/phi/kernels/funcs/lapack/lapack_function.h" #include "paddle/phi/kernels/math_kernel.h" +#include "paddle/phi/kernels/triangular_solve_kernel.h" namespace paddle { namespace operators { @@ -555,6 +556,11 @@ class LUGradKernel : public framework::OpKernel { framework::Tensor Pmat; Unpack_Pivot(dev_ctx, *P, &Pmat, m, k); + + using Context = + typename framework::ConvertToPhiContext::TYPE; + auto& phi_dev_ctx = static_cast(dev_ctx); + if (m <= n) { if (k < n) { framework::Tensor U_complement, U_grad_complement, phi_complement, @@ -605,8 +611,9 @@ class LUGradKernel : public framework::OpKernel { framework::Tensor psi_principal, phi_mH, psi_tmp; Tensor_Conj(dev_ctx, phi, &phi_mH); phi_mH = helper.Transpose(phi_mH); - triangular_solve(dev_ctx, U_narrow, phi_mH, - &psi_principal, true, false, false); + + phi::TriangularSolveKernel( + phi_dev_ctx, U_narrow, phi_mH, true, false, false, &psi_principal); Tensor_Conj(dev_ctx, psi_principal, &psi_principal); psi_principal = helper.Transpose(psi_principal); @@ -620,8 +627,9 @@ class LUGradKernel : public framework::OpKernel { SetValueCompute_dispatch(ctx, &psi, &psi_principal, &psi, axes, &slice_starts, &slice_ends, valuedims, xrank); - triangular_solve(dev_ctx, L_narrow_mH, psi, &psi_tmp, - true, false, true); + + phi::TriangularSolveKernel(phi_dev_ctx, L_narrow_mH, psi, + true, false, true, &psi_tmp); auto mat_dim_p = phi::funcs::CreateMatrixDescriptor(Pmat.dims(), 0, false); @@ -672,8 +680,10 @@ class LUGradKernel : public framework::OpKernel { &psi, axes, &slice_starts, &slice_ends, valuedims, xrank); framework::Tensor psi_principal, phi_mH, psi_tmp, U_narrow_mH; - triangular_solve(dev_ctx, L_narrow_mH, phi, - &psi_principal, true, false, true); + + phi::TriangularSolveKernel(phi_dev_ctx, L_narrow_mH, phi, + true, false, true, &psi_principal); + slice_starts[0] = 0; slice_starts[1] = 0; slice_ends[0] = k; @@ -695,8 +705,8 @@ class LUGradKernel : public framework::OpKernel { psi_tmp = helper.Transpose(psi_tmp); Tensor_Conj(dev_ctx, U_narrow, &U_narrow_mH); - triangular_solve(dev_ctx, U_narrow_mH, psi_tmp, &psi, - true, false, false); + phi::TriangularSolveKernel(phi_dev_ctx, U_narrow_mH, psi_tmp, + true, false, false, &psi); *dx = helper.Transpose(psi); } } diff --git a/paddle/fluid/operators/math/matrix_solve.cc b/paddle/fluid/operators/math/matrix_solve.cc index 883ee9b1486..7b239b81666 100644 --- a/paddle/fluid/operators/math/matrix_solve.cc +++ b/paddle/fluid/operators/math/matrix_solve.cc @@ -34,45 +34,6 @@ class MatrixSolveFunctor { template class MatrixSolveFunctor; template class MatrixSolveFunctor; -template -class TriangularSolveFunctor { - public: - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor* a, framework::Tensor* b, bool left, - bool upper, bool transpose, bool unitriangular) { - CBLAS_SIDE side = left ? CblasLeft : CblasRight; - CBLAS_UPLO uplo = upper ? CblasUpper : CblasLower; - CBLAS_TRANSPOSE transA = transpose ? CblasTrans : CblasNoTrans; - CBLAS_DIAG diag = unitriangular ? CblasUnit : CblasNonUnit; - - const T* a_data = a->data(); - T* b_data = b->mutable_data(context.GetPlace()); - - int a_dim_size = a->dims().size(); - int b_dim_size = b->dims().size(); - - int M = static_cast(b->dims()[b_dim_size - 2]); - int N = static_cast(b->dims()[b_dim_size - 1]); - auto lda = left ? std::max(1, M) : std::max(1, N); - auto ldb = std::max(1, N); - - int batch_size = 1; - auto& a_dim = a->dims(); - for (int i = 0; i < a_dim_size - 2; i++) { - batch_size *= a_dim[i]; - } - - auto blas = phi::funcs::GetBlas(context); - for (int i = 0; i < batch_size; i++) { - blas.TRSM(side, uplo, transA, diag, M, N, T(1), a_data + i * M * M, lda, - b_data + i * N * M, ldb); - } - } -}; - -template class TriangularSolveFunctor; -template class TriangularSolveFunctor; - } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/math/matrix_solve.cu.cc b/paddle/fluid/operators/math/matrix_solve.cu.cc index d3490ead212..737196dde1d 100644 --- a/paddle/fluid/operators/math/matrix_solve.cu.cc +++ b/paddle/fluid/operators/math/matrix_solve.cu.cc @@ -161,67 +161,6 @@ class MatrixSolveFunctor { template class MatrixSolveFunctor; template class MatrixSolveFunctor; -template -class TriangularSolveFunctor { - public: - void operator()(const platform::CUDADeviceContext& context, const Tensor* a, - Tensor* b, bool left, bool upper, bool transpose, - bool unitriangular) { - CBLAS_SIDE side = left ? CblasLeft : CblasRight; - CBLAS_UPLO uplo = upper ? CblasUpper : CblasLower; - CBLAS_TRANSPOSE transA = transpose ? CblasTrans : CblasNoTrans; - CBLAS_DIAG diag = unitriangular ? CblasUnit : CblasNonUnit; - - const T* a_data = a->data(); - T* b_data = b->mutable_data(context.GetPlace()); - - int a_dim_size = a->dims().size(); - int b_dim_size = b->dims().size(); - - int M = static_cast(b->dims()[b_dim_size - 2]); - int N = static_cast(b->dims()[b_dim_size - 1]); - auto lda = left ? std::max(1, M) : std::max(1, N); - auto ldb = std::max(1, N); - - int batch_size = 1; - auto& a_dim = a->dims(); - for (int i = 0; i < a_dim_size - 2; i++) { - batch_size *= a_dim[i]; - } - - auto blas = phi::funcs::GetBlas(context); - if (batch_size <= 8 && M >= 64) { - for (auto i = 0; i < batch_size; i++) { - blas.TRSM(side, uplo, transA, diag, M, N, static_cast(1.0), - a_data + i * M * M, lda, b_data + i * N * M, ldb); - } - } else { - std::vector cpu_ptrs(batch_size * 2); - for (int i = 0; i < batch_size; ++i) { - cpu_ptrs[i] = a_data + i * M * M; - cpu_ptrs[i + batch_size] = b_data + i * M * N; - } - - // Copy the addresses of A and tmp_b from host to device. - memory::allocation::AllocationPtr tmp_gpu_ptrs_data = - memory::Alloc(context, cpu_ptrs.size() * sizeof(T*)); - memory::Copy(context.GetPlace(), tmp_gpu_ptrs_data->ptr(), - platform::CPUPlace(), static_cast(cpu_ptrs.data()), - cpu_ptrs.size() * sizeof(T*), context.stream()); - - const T** gpu_a_ptrs = - reinterpret_cast(tmp_gpu_ptrs_data->ptr()); - T** gpu_b_ptrs = - reinterpret_cast(tmp_gpu_ptrs_data->ptr()) + batch_size; - blas.BatchedTRSM(side, uplo, transA, diag, M, N, static_cast(1.0), - gpu_a_ptrs, lda, gpu_b_ptrs, ldb, batch_size); - } - } -}; - -template class TriangularSolveFunctor; -template class TriangularSolveFunctor; - } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/math/matrix_solve.h b/paddle/fluid/operators/math/matrix_solve.h index 1dc43205592..415d0c6dd8e 100644 --- a/paddle/fluid/operators/math/matrix_solve.h +++ b/paddle/fluid/operators/math/matrix_solve.h @@ -117,14 +117,6 @@ class MatrixSolveFunctor { const framework::Tensor& b, framework::Tensor* out); }; -template -class TriangularSolveFunctor { - public: - void operator()(const DeviceContext& context, const framework::Tensor* a, - framework::Tensor* b, bool left, bool upper, bool transpose, - bool unitriangular); -}; - } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/triangular_solve_op.cc b/paddle/fluid/operators/triangular_solve_op.cc index df84659a00f..35b925ca172 100644 --- a/paddle/fluid/operators/triangular_solve_op.cc +++ b/paddle/fluid/operators/triangular_solve_op.cc @@ -12,10 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/triangular_solve_op.h" #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/solve_op.h" #include "paddle/phi/infermeta/binary.h" namespace paddle { diff --git a/paddle/fluid/operators/triangular_solve_op.h b/paddle/fluid/operators/triangular_solve_op.h deleted file mode 100644 index fd46aca456c..00000000000 --- a/paddle/fluid/operators/triangular_solve_op.h +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "glog/logging.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/operators/reduce_ops/reduce_op.h" -#include "paddle/fluid/operators/solve_op.h" -#include "paddle/fluid/operators/tril_triu_op.h" -#include "paddle/phi/core/ddim.h" -#include "paddle/phi/kernels/funcs/complex_functors.h" - -namespace paddle { -namespace operators { - -using Tensor = framework::Tensor; - -template -static void triangular_solve(const DeviceContext &context, const Tensor &x, - const Tensor &y, Tensor *out, bool upper, - bool transpose, bool unitriangular) { - // Tensor broadcast use eigen library - std::vector x_bst_dims_vec; - std::vector y_bst_dims_vec; - std::tie(x_bst_dims_vec, y_bst_dims_vec) = get_broadcast_dims(x, y); - - Tensor x_bst(x.type()); - TensorExpand(context, x, &x_bst, x_bst_dims_vec); - - Tensor y_bst(y.type()); - TensorExpand(context, y, &y_bst, y_bst_dims_vec); - - // TriangularSolveFunctor performs calculations in-place - // x_clone should be a copy of 'x' after broadcast - // out should be a copy of 'y' after broadcast - Tensor x_clone(x.type()); - x_clone.Resize(phi::make_ddim(x_bst_dims_vec)); - x_clone.mutable_data(context.GetPlace()); - framework::TensorCopy(x_bst, context.GetPlace(), context, &x_clone); - - out->Resize(phi::make_ddim(y_bst_dims_vec)); - out->mutable_data(context.GetPlace()); - framework::TensorCopy(y_bst, context.GetPlace(), context, out); - - math::TriangularSolveFunctor functor; - functor(context, &x_clone, out, /*left=*/true, upper, transpose, - unitriangular); -} - -} // namespace operators -} // namespace paddle diff --git a/paddle/phi/kernels/cpu/triangular_solve_kernel.cc b/paddle/phi/kernels/cpu/triangular_solve_kernel.cc index 5aca5be1279..c91e7475f5b 100644 --- a/paddle/phi/kernels/cpu/triangular_solve_kernel.cc +++ b/paddle/phi/kernels/cpu/triangular_solve_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/triangular_solve_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" -- GitLab