Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
ecfddebb
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ecfddebb
编写于
4月 27, 2020
作者:
Y
Yiqun Liu
提交者:
GitHub
4月 27, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add the implementation of inverse (#23310)
上级
34122e66
变更
19
显示空白变更内容
内联
并排
Showing
19 changed file
with
772 addition
and
18 deletion
+772
-18
paddle/fluid/framework/tensor_util.cc
paddle/fluid/framework/tensor_util.cc
+10
-9
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+1
-1
paddle/fluid/operators/inverse_op.cc
paddle/fluid/operators/inverse_op.cc
+129
-0
paddle/fluid/operators/inverse_op.cu.cc
paddle/fluid/operators/inverse_op.cu.cc
+25
-0
paddle/fluid/operators/inverse_op.h
paddle/fluid/operators/inverse_op.h
+70
-0
paddle/fluid/operators/math/CMakeLists.txt
paddle/fluid/operators/math/CMakeLists.txt
+4
-0
paddle/fluid/operators/math/blas.h
paddle/fluid/operators/math/blas.h
+30
-0
paddle/fluid/operators/math/blas_impl.cu.h
paddle/fluid/operators/math/blas_impl.cu.h
+74
-0
paddle/fluid/operators/math/matrix_inverse.cc
paddle/fluid/operators/math/matrix_inverse.cc
+62
-0
paddle/fluid/operators/math/matrix_inverse.cu.cc
paddle/fluid/operators/math/matrix_inverse.cu.cc
+102
-0
paddle/fluid/operators/math/matrix_inverse.h
paddle/fluid/operators/math/matrix_inverse.h
+34
-0
paddle/fluid/platform/dynload/cublas.h
paddle/fluid/platform/dynload/cublas.h
+3
-1
paddle/fluid/platform/dynload/mklml.h
paddle/fluid/platform/dynload/mklml.h
+3
-0
python/paddle/__init__.py
python/paddle/__init__.py
+1
-1
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+3
-3
python/paddle/fluid/tests/unittests/test_inverse_op.py
python/paddle/fluid/tests/unittests/test_inverse_op.py
+144
-0
python/paddle/fluid/tests/unittests/white_list/op_threshold_white_list.py
...uid/tests/unittests/white_list/op_threshold_white_list.py
+2
-1
python/paddle/tensor/__init__.py
python/paddle/tensor/__init__.py
+1
-1
python/paddle/tensor/math.py
python/paddle/tensor/math.py
+74
-1
未找到文件。
paddle/fluid/framework/tensor_util.cc
浏览文件 @
ecfddebb
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/tensor_util.h"
#include <algorithm>
#include <limits>
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
ecfddebb
...
...
@@ -85,7 +85,7 @@ endif()
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor device_memory_aligment
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
dynload_warpctc
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler sample_prob tree2col
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search fc
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search fc
matrix_inverse
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
box_wrapper
)
if
(
WITH_GPU
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
depthwise_conv prelu bert_encoder_functor
)
...
...
paddle/fluid/operators/inverse_op.cc
0 → 100644
浏览文件 @
ecfddebb
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/inverse_op.h"
#include <string>
#include <unordered_map>
namespace
paddle
{
namespace
operators
{
class
InverseOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"Input"
),
"Input"
,
"Input"
,
"Inverse"
);
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"Output"
),
"Output"
,
"Output"
,
"Inverse"
);
auto
input_dims
=
ctx
->
GetInputDim
(
"Input"
);
int64_t
input_rank
=
input_dims
.
size
();
PADDLE_ENFORCE_GE
(
input_rank
,
2
,
platform
::
errors
::
InvalidArgument
(
"The dimension of Input(Input) is expected to be no less than 2. "
"But recieved: Input(Input)'s dimension = %d, shape = [%s]."
,
input_rank
,
input_dims
));
if
(
input_dims
[
input_rank
-
2
]
>
0
&&
input_dims
[
input_rank
-
1
]
>
0
)
{
PADDLE_ENFORCE_EQ
(
input_dims
[
input_rank
-
2
],
input_dims
[
input_rank
-
1
],
platform
::
errors
::
InvalidArgument
(
"The last two dimensions are expected to be equal. "
"But recieved: %d and %d; "
"Input(Input)'s shape = [%s]."
,
input_dims
[
input_rank
-
2
],
input_dims
[
input_rank
-
1
],
input_dims
));
}
ctx
->
SetOutputDim
(
"Output"
,
input_dims
);
ctx
->
ShareLoD
(
"Input"
,
/*->*/
"Output"
);
}
};
class
InverseOpInferVarType
:
public
framework
::
PassInDtypeAndVarTypeToOutput
{
protected:
std
::
unordered_map
<
std
::
string
,
std
::
string
>&
GetInputOutputWithSameType
()
const
override
{
static
std
::
unordered_map
<
std
::
string
,
std
::
string
>
m
{
{
"Input"
,
/*->*/
"Output"
}};
return
m
;
}
};
class
InverseGradOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
auto
input_grad
=
framework
::
GradVarName
(
"Input"
);
auto
output_grad
=
framework
::
GradVarName
(
"Output"
);
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"Output"
),
"Input"
,
"Output"
,
"InverseGrad"
);
OP_INOUT_CHECK
(
ctx
->
HasInput
(
output_grad
),
"Input"
,
output_grad
,
"InverseGrad"
);
if
(
ctx
->
HasOutput
(
input_grad
))
{
ctx
->
SetOutputDim
(
input_grad
,
ctx
->
GetInputDim
(
output_grad
));
}
}
};
class
InverseOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"Input"
,
"(Tensor) A square matrix (2-D Tensor) or batches of square matrices"
" to inverse."
);
AddOutput
(
"Output"
,
"(Tensor) The inverse of input matrix."
);
AddComment
(
R"DOC(
Inverse Operator
Takes the inverse of the square matrix.
)DOC"
);
}
};
template
<
typename
T
>
class
InverseGradOpMaker
:
public
framework
::
SingleGradOpMaker
<
T
>
{
public:
using
framework
::
SingleGradOpMaker
<
T
>::
SingleGradOpMaker
;
protected:
void
Apply
(
GradOpPtr
<
T
>
grad
)
const
override
{
grad
->
SetType
(
this
->
ForwardOpType
()
+
"_grad"
);
grad
->
SetInput
(
"Output"
,
this
->
Output
(
"Output"
));
grad
->
SetInput
(
framework
::
GradVarName
(
"Output"
),
this
->
OutputGrad
(
"Output"
));
grad
->
SetOutput
(
framework
::
GradVarName
(
"Input"
),
this
->
InputGrad
(
"Input"
));
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
inverse
,
ops
::
InverseOp
,
ops
::
InverseOpMaker
,
ops
::
InverseOpInferVarType
,
ops
::
InverseGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
InverseGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OPERATOR
(
inverse_grad
,
ops
::
InverseGradOp
);
REGISTER_OP_CPU_KERNEL
(
inverse
,
ops
::
InverseKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
InverseKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
inverse_grad
,
ops
::
InverseGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
InverseGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/fluid/operators/inverse_op.cu.cc
0 → 100644
浏览文件 @
ecfddebb
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/inverse_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
inverse
,
ops
::
InverseKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
InverseKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
REGISTER_OP_CUDA_KERNEL
(
inverse_grad
,
ops
::
InverseGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
InverseGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
paddle/fluid/operators/inverse_op.h
0 → 100644
浏览文件 @
ecfddebb
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/matrix_inverse.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
class
InverseKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
input
=
context
.
Input
<
framework
::
Tensor
>
(
"Input"
);
auto
*
output
=
context
.
Output
<
framework
::
Tensor
>
(
"Output"
);
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
math
::
MatrixInverseFunctor
<
DeviceContext
,
T
>
mat_inv
;
mat_inv
(
dev_ctx
,
*
input
,
output
);
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
InverseGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
a_inv
=
context
.
Input
<
framework
::
Tensor
>
(
"Output"
);
auto
*
a_inv_grad
=
context
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Output"
));
auto
*
a_grad
=
context
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Input"
));
if
(
a_grad
)
{
a_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
context
);
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
framework
::
Tensor
tmp_out
=
context
.
AllocateTmpTensor
<
T
,
DeviceContext
>
(
a_inv
->
dims
(),
dev_ctx
);
auto
mat_dim_a0
=
math
::
CreateMatrixDescriptor
(
a_inv_grad
->
dims
(),
0
,
false
);
auto
mat_dim_b0
=
math
::
CreateMatrixDescriptor
(
a_inv
->
dims
(),
0
,
true
);
blas
.
MatMul
(
*
a_inv_grad
,
mat_dim_a0
,
*
a_inv
,
mat_dim_b0
,
T
(
1
),
&
tmp_out
,
T
(
0
));
auto
mat_dim_a1
=
math
::
CreateMatrixDescriptor
(
a_inv
->
dims
(),
0
,
true
);
auto
mat_dim_b1
=
math
::
CreateMatrixDescriptor
(
tmp_out
.
dims
(),
0
,
false
);
blas
.
MatMul
(
*
a_inv
,
mat_dim_a1
,
tmp_out
,
mat_dim_b1
,
T
(
-
1
),
a_grad
,
T
(
0
));
}
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/math/CMakeLists.txt
浏览文件 @
ecfddebb
...
...
@@ -21,6 +21,9 @@ function(math_library TARGET)
if
(
EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
TARGET
}
.cu
)
list
(
APPEND cu_srcs
${
TARGET
}
.cu
)
endif
()
if
(
EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
TARGET
}
.cu.cc
)
list
(
APPEND cu_srcs
${
TARGET
}
.cu.cc
)
endif
()
if
(
EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
TARGET
}
.hip.cu
)
list
(
APPEND hip_srcs
${
TARGET
}
.hip.cu
)
endif
()
...
...
@@ -68,6 +71,7 @@ math_library(vol2col)
math_library
(
prelu
)
math_library
(
bert_encoder_functor
)
math_library
(
tree2col DEPS math_function
)
math_library
(
matrix_inverse
)
cc_test
(
math_function_test SRCS math_function_test.cc DEPS math_function
)
cc_test
(
selected_rows_functor_test SRCS selected_rows_functor_test.cc DEPS selected_rows_functor
)
...
...
paddle/fluid/operators/math/blas.h
浏览文件 @
ecfddebb
...
...
@@ -223,6 +223,19 @@ class Blas {
CBLAS_DIAG
diag
,
int
M
,
int
N
,
T
alpha
,
const
T
*
A
,
int
lda
,
T
*
B
,
int
ldb
)
const
;
#ifdef PADDLE_WITH_CUDA
template
<
typename
T
>
void
BatchedGETRF
(
int
n
,
T
**
a
,
int
*
ipiv
,
int
*
info
,
int
batch_size
)
const
;
template
<
typename
T
>
void
BatchedGETRI
(
int
n
,
const
T
**
a
,
const
int
*
ipiv
,
T
**
a_inv
,
int
*
info
,
int
batch_size
)
const
;
template
<
typename
T
>
void
BatchedMatInv
(
int
n
,
const
T
**
a
,
T
**
a_inv
,
int
*
info
,
int
batch_size
)
const
;
#endif
private:
const
DeviceContext
&
context_
;
};
...
...
@@ -361,6 +374,23 @@ class BlasT : private Blas<DeviceContext> {
Base
()
->
template
TRSM
<
T
>(
args
...);
}
#ifdef PADDLE_WITH_CUDA
template
<
typename
...
ARGS
>
void
BatchedGETRF
(
ARGS
...
args
)
const
{
Base
()
->
template
BatchedGETRF
<
T
>(
args
...);
}
template
<
typename
...
ARGS
>
void
BatchedGETRI
(
ARGS
...
args
)
const
{
Base
()
->
template
BatchedGETRI
<
T
>(
args
...);
}
template
<
typename
...
ARGS
>
void
BatchedMatInv
(
ARGS
...
args
)
const
{
Base
()
->
template
BatchedMatInv
<
T
>(
args
...);
}
#endif
private:
const
Blas
<
DeviceContext
>*
Base
()
const
{
return
static_cast
<
const
Blas
<
DeviceContext
>*>
(
this
);
...
...
paddle/fluid/operators/math/blas_impl.cu.h
浏览文件 @
ecfddebb
...
...
@@ -93,6 +93,24 @@ struct CUBlas<float> {
static
void
TRSM
(
ARGS
...
args
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cublasStrsm
(
args
...));
}
template
<
typename
...
ARGS
>
static
void
GETRF_BATCH
(
ARGS
...
args
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cublasSgetrfBatched
(
args
...));
}
template
<
typename
...
ARGS
>
static
void
GETRI_BATCH
(
ARGS
...
args
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cublasSgetriBatched
(
args
...));
}
template
<
typename
...
ARGS
>
static
void
MATINV_BATCH
(
ARGS
...
args
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cublasSmatinvBatched
(
args
...));
}
};
template
<
>
...
...
@@ -141,6 +159,24 @@ struct CUBlas<double> {
static
void
TRSM
(
ARGS
...
args
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cublasDtrsm
(
args
...));
}
template
<
typename
...
ARGS
>
static
void
GETRF_BATCH
(
ARGS
...
args
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cublasDgetrfBatched
(
args
...));
}
template
<
typename
...
ARGS
>
static
void
GETRI_BATCH
(
ARGS
...
args
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cublasDgetriBatched
(
args
...));
}
template
<
typename
...
ARGS
>
static
void
MATINV_BATCH
(
ARGS
...
args
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cublasDmatinvBatched
(
args
...));
}
};
template
<
>
...
...
@@ -446,6 +482,44 @@ void Blas<platform::CUDADeviceContext>::TRSM(CBLAS_SIDE side, CBLAS_UPLO uplo,
});
}
template
<
>
template
<
typename
T
>
void
Blas
<
platform
::
CUDADeviceContext
>::
BatchedGETRF
(
int
n
,
T
**
a
,
int
*
ipiv
,
int
*
info
,
int
batch_size
)
const
{
context_
.
CublasCall
([
&
](
cublasHandle_t
handle
)
{
CUBlas
<
T
>::
GETRF_BATCH
(
handle
,
n
,
a
,
n
,
ipiv
,
info
,
batch_size
);
});
}
template
<
>
template
<
typename
T
>
void
Blas
<
platform
::
CUDADeviceContext
>::
BatchedGETRI
(
int
n
,
const
T
**
a
,
const
int
*
ipiv
,
T
**
a_inv
,
int
*
info
,
int
batch_size
)
const
{
PADDLE_ENFORCE_NE
(
a_inv
,
a
,
platform
::
errors
::
InvalidArgument
(
"cuBLAS fuction 'cublas<S/D>getrfBatched' cannot be executed "
"in-place. The memory space of output matrix (address: %p) cannot "
"overlap memory space of input matrix (address: %p)."
,
a_inv
,
a
));
context_
.
CublasCall
([
&
](
cublasHandle_t
handle
)
{
CUBlas
<
T
>::
GETRI_BATCH
(
handle
,
n
,
a
,
n
,
ipiv
,
a_inv
,
n
,
info
,
batch_size
);
});
}
template
<
>
template
<
typename
T
>
void
Blas
<
platform
::
CUDADeviceContext
>::
BatchedMatInv
(
int
n
,
const
T
**
a
,
T
**
a_inv
,
int
*
info
,
int
batch_size
)
const
{
context_
.
CublasCall
([
&
](
cublasHandle_t
handle
)
{
CUBlas
<
T
>::
MATINV_BATCH
(
handle
,
n
,
a
,
n
,
a_inv
,
n
,
info
,
batch_size
);
});
}
}
// namespace math
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/math/matrix_inverse.cc
0 → 100644
浏览文件 @
ecfddebb
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math/matrix_inverse.h"
#include "Eigen/Core"
#include "Eigen/LU"
#include "paddle/fluid/operators/math/blas.h"
namespace
paddle
{
namespace
operators
{
namespace
math
{
template
<
typename
T
>
class
MatrixInverseFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
using
Matrix
=
Eigen
::
Matrix
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
,
Eigen
::
RowMajor
>
;
using
EigenMatrixMap
=
Eigen
::
Map
<
Matrix
>
;
using
ConstEigenMatrixMap
=
Eigen
::
Map
<
const
Matrix
>
;
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
Tensor
&
a
,
framework
::
Tensor
*
a_inv
)
{
const
auto
&
mat_dims
=
a
.
dims
();
const
int
rank
=
mat_dims
.
size
();
int
n
=
mat_dims
[
rank
-
1
];
int
batch_size
=
rank
>
2
?
a
.
numel
()
/
(
n
*
n
)
:
1
;
const
T
*
a_ptr
=
a
.
data
<
T
>
();
T
*
a_inv_ptr
=
a_inv
->
mutable_data
<
T
>
(
context
.
GetPlace
());
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
ConstEigenMatrixMap
mat
(
a_ptr
+
i
*
n
*
n
,
n
,
n
);
EigenMatrixMap
mat_inv
(
a_inv_ptr
+
i
*
n
*
n
,
n
,
n
);
Eigen
::
PartialPivLU
<
Matrix
>
lu
;
lu
.
compute
(
mat
);
const
T
min_abs_pivot
=
lu
.
matrixLU
().
diagonal
().
cwiseAbs
().
minCoeff
();
PADDLE_ENFORCE_GT
(
min_abs_pivot
,
static_cast
<
T
>
(
0
),
platform
::
errors
::
InvalidArgument
(
"Input is not invertible."
));
mat_inv
.
noalias
()
=
lu
.
inverse
();
}
}
};
template
class
MatrixInverseFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
MatrixInverseFunctor
<
platform
::
CPUDeviceContext
,
double
>;
}
// namespace math
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/math/matrix_inverse.cu.cc
0 → 100644
浏览文件 @
ecfddebb
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math/matrix_inverse.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/operators/math/blas.h"
namespace
paddle
{
namespace
operators
{
namespace
math
{
template
<
typename
T
>
class
MatrixInverseFunctor
<
platform
::
CUDADeviceContext
,
T
>
{
public:
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
const
framework
::
Tensor
&
a
,
framework
::
Tensor
*
a_inv
)
{
const
auto
&
mat_dims
=
a
.
dims
();
const
int
rank
=
mat_dims
.
size
();
int
n
=
mat_dims
[
rank
-
1
];
int
batch_size
=
rank
>
2
?
a
.
numel
()
/
(
n
*
n
)
:
1
;
memory
::
allocation
::
AllocationPtr
tmp_gpu_mat_data
;
const
T
*
gpu_mat
=
a
.
data
<
T
>
();
if
(
n
>=
32
)
{
// Copy all elements of input matrix A to a temporary memory space to
// avoid being overriden by getrf.
tmp_gpu_mat_data
=
memory
::
Alloc
(
context
,
a
.
numel
()
*
sizeof
(
T
));
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
context
.
GetPlace
()),
tmp_gpu_mat_data
->
ptr
(),
boost
::
get
<
platform
::
CUDAPlace
>
(
context
.
GetPlace
()),
a
.
data
<
void
>
(),
a
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
gpu_mat
=
reinterpret_cast
<
const
T
*>
(
tmp_gpu_mat_data
->
ptr
());
}
std
::
vector
<
const
T
*>
cpu_ptrs
(
batch_size
*
2
);
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
cpu_ptrs
[
i
]
=
gpu_mat
+
i
*
n
*
n
;
cpu_ptrs
[
i
+
batch_size
]
=
a_inv
->
data
<
T
>
()
+
i
*
n
*
n
;
}
// Copy the addresses of A and A_inv from host to device.
memory
::
allocation
::
AllocationPtr
tmp_gpu_ptrs_data
=
memory
::
Alloc
(
context
,
cpu_ptrs
.
size
()
*
sizeof
(
T
*
));
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
context
.
GetPlace
()),
tmp_gpu_ptrs_data
->
ptr
(),
platform
::
CPUPlace
(),
static_cast
<
void
*>
(
cpu_ptrs
.
data
()),
cpu_ptrs
.
size
()
*
sizeof
(
T
*
),
context
.
stream
());
T
**
gpu_inv_ptrs
=
reinterpret_cast
<
T
**>
(
tmp_gpu_ptrs_data
->
ptr
())
+
batch_size
;
// Allocate device memory for info and pivots.
int
num_ints
=
n
<
32
?
batch_size
:
batch_size
*
(
n
+
1
);
memory
::
allocation
::
AllocationPtr
tmp_gpu_info_data
=
memory
::
Alloc
(
context
,
num_ints
*
sizeof
(
int
));
int
*
gpu_info_ptr
=
reinterpret_cast
<
int
*>
(
tmp_gpu_info_data
->
ptr
());
auto
blas
=
math
::
GetBlas
<
platform
::
CUDADeviceContext
,
T
>
(
context
);
// This functions in cuBLAS is intended to be used for matrices of small
// sizes where the launch overhead is a significant factor.
// TODO(Xreki): call function in cusolver for large matrices.
if
(
n
<
32
)
{
// cublas<S/D>matinvBatched is a short cut of cublas<S/D>getrfBatched
// plus cublas<S/D>getriBatched.
// However it only works if N is less than 32. If not, we need to
// go through cublas<S/D>getrfBatched and cublas<S/D>getriBatched.
blas
.
BatchedMatInv
(
n
,
reinterpret_cast
<
const
T
**>
(
tmp_gpu_ptrs_data
->
ptr
()),
gpu_inv_ptrs
,
gpu_info_ptr
,
batch_size
);
}
else
{
// This function performs the LU factorization of each matrix A by the
// equation P * A = L * U. L and U are written back to original matrix A,
// and diagonal elements of L are discarded.
int
*
gpu_pivot_ptr
=
reinterpret_cast
<
int
*>
(
tmp_gpu_info_data
->
ptr
())
+
batch_size
;
blas
.
BatchedGETRF
(
n
,
reinterpret_cast
<
T
**>
(
tmp_gpu_ptrs_data
->
ptr
()),
gpu_pivot_ptr
,
gpu_info_ptr
,
batch_size
);
blas
.
BatchedGETRI
(
n
,
reinterpret_cast
<
const
T
**>
(
tmp_gpu_ptrs_data
->
ptr
()),
gpu_pivot_ptr
,
gpu_inv_ptrs
,
gpu_info_ptr
,
batch_size
);
}
}
};
template
class
MatrixInverseFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
class
MatrixInverseFunctor
<
platform
::
CUDADeviceContext
,
double
>;
}
// namespace math
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/math/matrix_inverse.h
0 → 100644
浏览文件 @
ecfddebb
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device_context.h"
namespace
paddle
{
namespace
operators
{
namespace
math
{
template
<
typename
DeviceContext
,
typename
T
>
class
MatrixInverseFunctor
{
public:
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
&
a
,
framework
::
Tensor
*
a_inv
);
};
}
// namespace math
}
// namespace operators
}
// namespace paddle
paddle/fluid/platform/dynload/cublas.h
浏览文件 @
ecfddebb
...
...
@@ -90,7 +90,9 @@ extern void *cublas_dso_handle;
__macro(cublasSgetrfBatched); \
__macro(cublasSgetriBatched); \
__macro(cublasDgetrfBatched); \
__macro(cublasDgetriBatched);
__macro(cublasDgetriBatched); \
__macro(cublasSmatinvBatched); \
__macro(cublasDmatinvBatched);
CUBLAS_BLAS_ROUTINE_EACH
(
DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP
)
...
...
paddle/fluid/platform/dynload/mklml.h
浏览文件 @
ecfddebb
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
...
python/paddle/__init__.py
浏览文件 @
ecfddebb
...
...
@@ -167,7 +167,7 @@ from .tensor.math import div #DEFINE_ALIAS
from
.tensor.math
import
add
#DEFINE_ALIAS
from
.tensor.math
import
atan
#DEFINE_ALIAS
from
.tensor.math
import
logsumexp
#DEFINE_ALIAS
# from .tensor.math import inverse
#DEFINE_ALIAS
from
.tensor.math
import
inverse
#DEFINE_ALIAS
from
.tensor.math
import
log1p
#DEFINE_ALIAS
from
.tensor.math
import
erf
#DEFINE_ALIAS
from
.tensor.math
import
addcmul
#DEFINE_ALIAS
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
ecfddebb
...
...
@@ -1221,9 +1221,9 @@ class OpTest(unittest.TestCase):
def
err_msg
():
offset
=
np
.
argmax
(
diff_mat
>
max_relative_error
)
return
(
"
%s error, %s variable %s max gradient diff %f over limit %f
, "
"the first error element is %d, expected %
f, but got %f
."
)
\
%
(
self
.
op_type
,
msg_prefix
,
name
,
max_diff
,
max_relative_error
,
return
(
"
Operator %s error, %s variable %s (shape: %s, dtype: %s) max gradient diff %e over limit %e
, "
"the first error element is %d, expected %
e, but got %e
."
)
\
%
(
self
.
op_type
,
msg_prefix
,
name
,
str
(
a
.
shape
),
self
.
dtype
,
max_diff
,
max_relative_error
,
offset
,
a
.
flatten
()[
offset
],
b
.
flatten
()[
offset
])
self
.
assertLessEqual
(
max_diff
,
max_relative_error
,
err_msg
())
...
...
python/paddle/fluid/tests/unittests/test_inverse_op.py
0 → 100644
浏览文件 @
ecfddebb
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle
from
op_test
import
OpTest
class
TestInverseOp
(
OpTest
):
def
config
(
self
):
self
.
matrix_shape
=
[
10
,
10
]
self
.
dtype
=
"float64"
def
setUp
(
self
):
self
.
op_type
=
"inverse"
self
.
config
()
np
.
random
.
seed
(
123
)
mat
=
np
.
random
.
random
(
self
.
matrix_shape
).
astype
(
self
.
dtype
)
inverse
=
np
.
linalg
.
inv
(
mat
)
self
.
inputs
=
{
'Input'
:
mat
}
self
.
outputs
=
{
'Output'
:
inverse
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_grad
(
self
):
self
.
check_grad
([
'Input'
],
'Output'
)
class
TestInverseOpBatched
(
TestInverseOp
):
def
config
(
self
):
self
.
matrix_shape
=
[
8
,
4
,
4
]
self
.
dtype
=
"float64"
class
TestInverseOpLarge
(
TestInverseOp
):
def
config
(
self
):
self
.
matrix_shape
=
[
32
,
32
]
self
.
dtype
=
"float64"
def
test_grad
(
self
):
self
.
check_grad
([
'Input'
],
'Output'
,
max_relative_error
=
1e-6
)
class
TestInverseOpFP32
(
TestInverseOp
):
def
config
(
self
):
self
.
matrix_shape
=
[
10
,
10
]
self
.
dtype
=
"float32"
def
test_grad
(
self
):
self
.
check_grad
([
'Input'
],
'Output'
,
max_relative_error
=
1e-2
)
class
TestInverseOpBatchedFP32
(
TestInverseOpFP32
):
def
config
(
self
):
self
.
matrix_shape
=
[
8
,
4
,
4
]
self
.
dtype
=
"float32"
class
TestInverseOpLargeFP32
(
TestInverseOpFP32
):
def
config
(
self
):
self
.
matrix_shape
=
[
32
,
32
]
self
.
dtype
=
"float32"
class
TestInverseAPI
(
unittest
.
TestCase
):
def
setUp
(
self
):
np
.
random
.
seed
(
123
)
self
.
places
=
[
fluid
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
self
.
places
.
append
(
fluid
.
CUDAPlace
(
0
))
def
check_static_result
(
self
,
place
,
with_out
=
False
):
with
fluid
.
program_guard
(
fluid
.
Program
(),
fluid
.
Program
()):
input
=
fluid
.
data
(
name
=
"input"
,
shape
=
[
4
,
4
],
dtype
=
"float64"
)
if
with_out
:
out
=
fluid
.
data
(
name
=
"output"
,
shape
=
[
4
,
4
],
dtype
=
"float64"
)
else
:
out
=
None
result
=
paddle
.
inverse
(
input
=
input
,
out
=
out
)
input_np
=
np
.
random
.
random
([
4
,
4
]).
astype
(
"float64"
)
result_np
=
np
.
linalg
.
inv
(
input_np
)
exe
=
fluid
.
Executor
(
place
)
fetches
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
"input"
:
input_np
},
fetch_list
=
[
result
])
self
.
assertTrue
(
np
.
allclose
(
fetches
[
0
],
np
.
linalg
.
inv
(
input_np
)))
def
test_static
(
self
):
for
place
in
self
.
places
:
self
.
check_static_result
(
place
=
place
)
def
test_dygraph
(
self
):
for
place
in
self
.
places
:
with
fluid
.
dygraph
.
guard
(
place
):
input_np
=
np
.
random
.
random
([
4
,
4
]).
astype
(
"float64"
)
input
=
fluid
.
dygraph
.
to_variable
(
input_np
)
result
=
paddle
.
inverse
(
input
)
self
.
assertTrue
(
np
.
allclose
(
result
.
numpy
(),
np
.
linalg
.
inv
(
input_np
)))
class
TestInverseAPIError
(
unittest
.
TestCase
):
def
test_errors
(
self
):
input_np
=
np
.
random
.
random
([
4
,
4
]).
astype
(
"float64"
)
# input must be Variable.
self
.
assertRaises
(
TypeError
,
paddle
.
inverse
,
input_np
)
# The data type of input must be float32 or float64.
for
dtype
in
[
"bool"
,
"int32"
,
"int64"
,
"float16"
]:
input
=
fluid
.
data
(
name
=
'input_'
+
dtype
,
shape
=
[
4
,
4
],
dtype
=
dtype
)
self
.
assertRaises
(
TypeError
,
paddle
.
inverse
,
input
)
# When out is set, the data type must be the same as input.
input
=
fluid
.
data
(
name
=
'input_1'
,
shape
=
[
4
,
4
],
dtype
=
"float32"
)
out
=
fluid
.
data
(
name
=
'output'
,
shape
=
[
4
,
4
],
dtype
=
"float64"
)
self
.
assertRaises
(
TypeError
,
paddle
.
inverse
,
input
,
out
)
# The number of dimensions of input must be >= 2.
input
=
fluid
.
data
(
name
=
'input_2'
,
shape
=
[
4
],
dtype
=
"float32"
)
self
.
assertRaises
(
ValueError
,
paddle
.
inverse
,
input
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/white_list/op_threshold_white_list.py
浏览文件 @
ecfddebb
...
...
@@ -39,7 +39,8 @@ NEED_FIX_FP64_CHECK_GRAD_THRESHOLD_OP_LIST = [
'spp'
,
\
'teacher_student_sigmoid_loss'
,
\
'unpool'
,
\
'yolov3_loss'
'yolov3_loss'
,
\
'inverse'
]
NEED_FIX_FP64_CHECK_OUTPUT_THRESHOLD_OP_LIST
=
[
'bilinear_interp'
]
python/paddle/tensor/__init__.py
浏览文件 @
ecfddebb
...
...
@@ -145,7 +145,7 @@ from .math import div #DEFINE_ALIAS
from
.math
import
add
#DEFINE_ALIAS
from
.math
import
atan
#DEFINE_ALIAS
from
.math
import
logsumexp
#DEFINE_ALIAS
# from .math import inverse
#DEFINE_ALIAS
from
.math
import
inverse
#DEFINE_ALIAS
from
.math
import
log1p
#DEFINE_ALIAS
from
.math
import
erf
#DEFINE_ALIAS
from
.math
import
addcmul
#DEFINE_ALIAS
...
...
python/paddle/tensor/math.py
浏览文件 @
ecfddebb
...
...
@@ -105,7 +105,7 @@ __all__ = [
'add'
,
'atan'
,
'logsumexp'
,
#
'inverse',
'inverse'
,
'log1p'
,
'erf'
,
'addcmul'
,
...
...
@@ -986,6 +986,7 @@ def mm(input, mat2, out=None, name=None):
'Y'
:
mat2
},
outputs
=
{
'Out'
:
out
})
return
out
def
addmm
(
input
,
x
,
y
,
alpha
=
1.0
,
beta
=
1.0
,
name
=
None
):
"""
**addmm**
...
...
@@ -1120,6 +1121,78 @@ def logsumexp(x, dim=None, keepdim=False, out=None, name=None):
return
layers
.
log
(
sum_out
,
name
)
def
inverse
(
input
,
out
=
None
,
name
=
None
):
"""
Takes the inverse of the square matrix. A square matrix is a matrix with
the same number of rows and columns. The input can be a square matrix
(2-D Tensor) or batches of square matrices.
Args:
input (Variable): The input Variable which holds a Tensor. The last two
dimensions should be equal. When the number of dimensions is
greater than 2, it is treated as batches of square matrix. The data
type can be float32 and float64.
out (Variable, optional): Optional output which can be any created
Variable that meets the requirements to store the result of operation.
If out is None, a new Varibale will be create to store the result.
name (str, optional): The default value is None. Normally there is no need for
user to set this property. For more information,
please refer to :ref:`api_guide_Name`
Returns:
Variable: A Tensor holds the inverse of input. The shape and data type
is the same as input.
Examples:
.. code-block:: python
import numpy as np
import paddle
import paddle.fluid as fluid
mat_np = np.array([[2, 0], [0, 2]]).astype("float32")
# example for static graph
input = fluid.data("input", shape=[2, 2], dtype="float32")
out = paddle.inverse(input)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
results = exe.run(feed={"input": mat_np },
fetch_list=[out.name])
print(results[0]) # [[0.5, 0], [0, 0.5]]
# example for dynamic graph
with fluid.dygraph.guard():
mat = fluid.dygraph.to_variable(mat_np)
inv = paddle.inverse(mat)
print(inv) # [[0.5, 0], [0, 0.5]]
"""
if
in_dygraph_mode
():
return
core
.
ops
.
inverse
(
input
)
def
_check_input
(
input
):
check_variable_and_dtype
(
input
,
'input'
,
[
'float32'
,
'float64'
],
'inverse'
)
if
len
(
input
.
shape
)
<
2
:
raise
ValueError
(
"The input of inverse is expected to be a Tensor whose number "
"of dimensions is no less than 2. But reviced: %d, "
"input's shape: %s."
%
(
len
(
input
.
shape
),
input
.
shape
))
if
out
is
not
None
:
check_variable_and_dtype
(
out
,
'out'
,
input
.
dtype
,
'inverse'
)
_check_input
(
input
)
helper
=
LayerHelper
(
'inverse'
,
**
locals
())
if
out
is
None
:
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
input
.
dtype
)
helper
.
append_op
(
type
=
'inverse'
,
inputs
=
{
'Input'
:
[
input
]
},
outputs
=
{
'Output'
:
[
out
]})
return
out
def
max
(
input
,
dim
=
None
,
keep_dim
=
False
,
out
=
None
,
name
=
None
):
"""
Computes the maximum of tensor elements over the given dimension.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录