Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
4e21457d
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4e21457d
编写于
12月 30, 2021
作者:
Z
zhiboniu
提交者:
GitHub
12月 30, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add OP lu forward (#38559)
LGTM
上级
790cadd1
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
973 addition
and
0 deletion
+973
-0
cmake/operators.cmake
cmake/operators.cmake
+1
-0
paddle/fluid/operators/lu_op.cc
paddle/fluid/operators/lu_op.cc
+162
-0
paddle/fluid/operators/lu_op.cu
paddle/fluid/operators/lu_op.cu
+156
-0
paddle/fluid/operators/lu_op.h
paddle/fluid/operators/lu_op.h
+474
-0
paddle/fluid/platform/dynload/cusolver.h
paddle/fluid/platform/dynload/cusolver.h
+8
-0
python/paddle/fluid/tests/unittests/test_lu_op.py
python/paddle/fluid/tests/unittests/test_lu_op.py
+171
-0
tools/static_mode_white_list.py
tools/static_mode_white_list.py
+1
-0
未找到文件。
cmake/operators.cmake
浏览文件 @
4e21457d
...
...
@@ -197,6 +197,7 @@ function(op_library TARGET)
list
(
REMOVE_ITEM miopen_cu_cc_srcs
"grid_sampler_cudnn_op.cu.cc"
)
list
(
REMOVE_ITEM hip_srcs
"cholesky_op.cu"
)
list
(
REMOVE_ITEM hip_srcs
"cholesky_solve_op.cu"
)
list
(
REMOVE_ITEM hip_srcs
"lu_op.cu"
)
list
(
REMOVE_ITEM hip_srcs
"matrix_rank_op.cu"
)
list
(
REMOVE_ITEM hip_srcs
"svd_op.cu"
)
list
(
REMOVE_ITEM hip_srcs
"eigvalsh_op.cu"
)
...
...
paddle/fluid/operators/lu_op.cc
0 → 100644
浏览文件 @
4e21457d
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/lu_op.h"
namespace
paddle
{
namespace
operators
{
class
LUOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddComment
(
R"DOC(LU decomposition,
Computes the LU factorization of a matrix or batches of matrices A.
)DOC"
);
AddInput
(
"X"
,
"(Tensor) The input tensor, shape of (*,m,n)"
);
AddOutput
(
"Out"
,
"(Tensor) The output tensor, shape same to X"
);
AddOutput
(
"Pivots"
,
"Stores all the intermediate transpositions of rows. shape of "
"(*,min(m,n))"
);
AddOutput
(
"Infos"
,
"(Tensor) This is a tensor of size (*) where non-zero values "
"indicate whether factorization for the matrix has succeeded"
);
AddAttr
<
bool
>
(
"pivots"
,
"Whether pivoting is done"
).
SetDefault
(
true
);
}
};
class
LUOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
context
)
const
override
{
OP_INOUT_CHECK
(
context
->
HasInput
(
"X"
),
"Input"
,
"X"
,
"LU"
);
OP_INOUT_CHECK
(
context
->
HasOutput
(
"Out"
),
"Output"
,
"Out"
,
"LU"
);
bool
pivots
=
context
->
Attrs
().
Get
<
bool
>
(
"pivots"
);
auto
x_dims
=
context
->
GetInputDim
(
"X"
);
int
x_rank
=
x_dims
.
size
();
PADDLE_ENFORCE_GE
(
x_rank
,
2
,
platform
::
errors
::
InvalidArgument
(
"the rank of input must greater than 2"
));
context
->
SetOutputDim
(
"Out"
,
x_dims
);
int
m
=
x_dims
[
x_rank
-
1
];
int
n
=
x_dims
[
x_rank
-
2
];
int
min_mn
=
std
::
min
(
m
,
n
);
auto
dims_vec
=
framework
::
vectorize
(
x_dims
);
OP_INOUT_CHECK
(
context
->
HasOutput
(
"Infos"
),
"Output"
,
"Infos"
,
"LU"
);
if
(
x_rank
==
2
)
{
auto
Infos_dim
=
std
::
vector
<
int
>
(
1
);
context
->
SetOutputDim
(
"Infos"
,
framework
::
make_ddim
(
Infos_dim
));
}
else
{
auto
Infos_dim
=
std
::
vector
<
int
>
(
dims_vec
.
begin
(),
dims_vec
.
begin
()
+
x_rank
-
2
);
context
->
SetOutputDim
(
"Infos"
,
framework
::
make_ddim
(
Infos_dim
));
}
if
(
pivots
)
{
OP_INOUT_CHECK
(
context
->
HasOutput
(
"Pivots"
),
"Output"
,
"Pivots"
,
"LU"
);
auto
Pivots_dim
=
std
::
vector
<
int
>
(
dims_vec
.
begin
(),
dims_vec
.
begin
()
+
x_rank
-
1
);
Pivots_dim
[
x_rank
-
2
]
=
min_mn
;
context
->
SetOutputDim
(
"Pivots"
,
framework
::
make_ddim
(
Pivots_dim
));
}
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
"X"
),
ctx
.
GetPlace
());
}
};
class
LUOpVarTypeInference
:
public
framework
::
VarTypeInference
{
public:
void
operator
()(
framework
::
InferVarTypeContext
*
ctx
)
const
override
{
auto
var_type
=
ctx
->
GetInputType
(
"X"
,
0
);
auto
data_type
=
ctx
->
GetInputDataType
(
"X"
,
0
);
ctx
->
SetOutputType
(
"Out"
,
var_type
,
framework
::
ALL_ELEMENTS
);
ctx
->
SetOutputDataType
(
"Out"
,
data_type
,
framework
::
ALL_ELEMENTS
);
ctx
->
SetOutputType
(
"Pivots"
,
var_type
,
framework
::
ALL_ELEMENTS
);
ctx
->
SetOutputDataType
(
"Pivots"
,
framework
::
proto
::
VarType
::
INT32
,
framework
::
ALL_ELEMENTS
);
ctx
->
SetOutputType
(
"Infos"
,
var_type
,
framework
::
ALL_ELEMENTS
);
ctx
->
SetOutputDataType
(
"Infos"
,
framework
::
proto
::
VarType
::
INT32
,
framework
::
ALL_ELEMENTS
);
}
};
template
<
typename
T
>
class
LUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
pivots
=
ctx
.
Attr
<
bool
>
(
"pivots"
);
auto
*
xin
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
*
IpivT
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Pivots"
);
auto
*
InfoT
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Infos"
);
PADDLE_ENFORCE_EQ
(
pivots
,
true
,
platform
::
errors
::
InvalidArgument
(
"lu without pivoting is not implemented on the CPU, "
"but got pivots=False"
));
math
::
DeviceIndependenceTensorOperations
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
helper
(
ctx
);
*
out
=
helper
.
Transpose
(
*
xin
);
auto
outdims
=
out
->
dims
();
auto
outrank
=
outdims
.
size
();
int
m
=
static_cast
<
int
>
(
outdims
[
outrank
-
1
]);
int
n
=
static_cast
<
int
>
(
outdims
[
outrank
-
2
]);
int
lda
=
std
::
max
(
1
,
m
);
auto
ipiv_dims
=
slice_ddim
(
outdims
,
0
,
outrank
-
1
);
ipiv_dims
[
outrank
-
2
]
=
std
::
min
(
m
,
n
);
IpivT
->
Resize
(
ipiv_dims
);
auto
ipiv_data
=
IpivT
->
mutable_data
<
int
>
(
ctx
.
GetPlace
());
auto
info_dims
=
slice_ddim
(
outdims
,
0
,
outrank
-
2
);
if
(
info_dims
.
size
()
==
0
)
{
info_dims
=
framework
::
make_ddim
({
1
});
}
InfoT
->
Resize
(
info_dims
);
auto
info_data
=
InfoT
->
mutable_data
<
int
>
(
ctx
.
GetPlace
());
auto
batchsize
=
product
(
info_dims
);
batchsize
=
std
::
max
(
static_cast
<
int
>
(
batchsize
),
1
);
auto
out_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
for
(
int
b
=
0
;
b
<
batchsize
;
b
++
)
{
auto
out_data_item
=
&
out_data
[
b
*
m
*
n
];
int
*
info_data_item
=
&
info_data
[
b
];
int
*
ipiv_data_item
=
&
ipiv_data
[
b
*
std
::
min
(
m
,
n
)];
math
::
lapackLu
<
T
>
(
m
,
n
,
out_data_item
,
lda
,
ipiv_data_item
,
info_data_item
);
}
*
out
=
helper
.
Transpose
(
*
out
);
}
};
DECLARE_INPLACE_OP_INFERER
(
LUOpInplaceInferer
,
{
"X"
,
"Out"
});
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OPERATOR
(
lu
,
ops
::
LUOp
,
ops
::
LUOpMaker
,
ops
::
LUOpVarTypeInference
,
ops
::
LUOpInplaceInferer
);
REGISTER_OP_CPU_KERNEL
(
lu
,
ops
::
LUKernel
<
float
>
,
ops
::
LUKernel
<
double
>
);
paddle/fluid/operators/lu_op.cu
0 → 100644
浏览文件 @
4e21457d
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef PADDLE_WITH_HIP
// HIP not support cusolver
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/operators/lu_op.h"
#include "paddle/fluid/platform/dynload/cusolver.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
CUDADeviceContext
=
paddle
::
platform
::
CUDADeviceContext
;
template
<
typename
T
>
void
cusolver_bufferSize
(
const
cusolverDnHandle_t
&
cusolverH
,
int
m
,
int
n
,
T
*
d_A
,
int
lda
,
int
*
lwork
);
template
<
typename
T
>
void
cusolver_getrf
(
const
cusolverDnHandle_t
&
cusolverH
,
int
m
,
int
n
,
T
*
d_A
,
int
lda
,
T
*
d_work
,
int
*
d_Ipiv
,
int
*
d_info
);
template
<
>
void
cusolver_bufferSize
<
float
>
(
const
cusolverDnHandle_t
&
cusolverH
,
int
m
,
int
n
,
float
*
d_A
,
int
lda
,
int
*
lwork
)
{
PADDLE_ENFORCE_GPU_SUCCESS
(
platform
::
dynload
::
cusolverDnSgetrf_bufferSize
(
cusolverH
,
m
,
n
,
d_A
,
lda
,
lwork
));
}
template
<
>
void
cusolver_bufferSize
<
double
>
(
const
cusolverDnHandle_t
&
cusolverH
,
int
m
,
int
n
,
double
*
d_A
,
int
lda
,
int
*
lwork
)
{
PADDLE_ENFORCE_GPU_SUCCESS
(
platform
::
dynload
::
cusolverDnDgetrf_bufferSize
(
cusolverH
,
m
,
n
,
d_A
,
lda
,
lwork
));
}
template
<
>
void
cusolver_getrf
<
float
>
(
const
cusolverDnHandle_t
&
cusolverH
,
int
m
,
int
n
,
float
*
d_A
,
int
lda
,
float
*
d_work
,
int
*
d_Ipiv
,
int
*
d_info
)
{
PADDLE_ENFORCE_GPU_SUCCESS
(
platform
::
dynload
::
cusolverDnSgetrf
(
cusolverH
,
m
,
n
,
d_A
,
lda
,
d_work
,
d_Ipiv
,
d_info
));
}
template
<
>
void
cusolver_getrf
<
double
>
(
const
cusolverDnHandle_t
&
cusolverH
,
int
m
,
int
n
,
double
*
d_A
,
int
lda
,
double
*
d_work
,
int
*
d_Ipiv
,
int
*
d_info
)
{
PADDLE_ENFORCE_GPU_SUCCESS
(
platform
::
dynload
::
cusolverDnDgetrf
(
cusolverH
,
m
,
n
,
d_A
,
lda
,
d_work
,
d_Ipiv
,
d_info
));
}
template
<
typename
T
>
void
lu_decomposed_kernel
(
int
m
,
int
n
,
T
*
d_A
,
int
lda
,
int
*
d_Ipiv
,
int
*
d_info
,
const
framework
::
ExecutionContext
&
ctx
)
{
/* step 1: get cusolver handle*/
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
auto
cusolverH
=
dev_ctx
.
cusolver_dn_handle
();
/* step 2: query working space of getrf */
int
lwork
;
cusolver_bufferSize
(
cusolverH
,
m
,
n
,
d_A
,
lda
,
&
lwork
);
auto
work_buff
=
memory
::
Alloc
(
dev_ctx
,
lwork
*
sizeof
(
T
));
T
*
d_work
=
reinterpret_cast
<
T
*>
(
work_buff
->
ptr
());
/* step 3: LU factorization */
if
(
d_Ipiv
)
{
cusolver_getrf
(
cusolverH
,
m
,
n
,
d_A
,
lda
,
d_work
,
d_Ipiv
,
d_info
);
}
else
{
cusolver_getrf
(
cusolverH
,
m
,
n
,
d_A
,
lda
,
d_work
,
NULL
,
d_info
);
}
PADDLE_ENFORCE_GPU_SUCCESS
(
cudaDeviceSynchronize
());
}
template
<
typename
T
>
class
LUCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
#ifdef __HIPCC__
const
int64_t
kMaxBlockDim
=
256
;
#else
const
int64_t
kMaxBlockDim
=
512
;
#endif
auto
*
xin
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
*
IpivT
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Pivots"
);
auto
*
InfoT
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Infos"
);
auto
pivots
=
ctx
.
Attr
<
bool
>
(
"pivots"
);
math
::
DeviceIndependenceTensorOperations
<
paddle
::
platform
::
CUDADeviceContext
,
T
>
helper
(
ctx
);
*
out
=
helper
.
Transpose
(
*
xin
);
auto
outdims
=
out
->
dims
();
auto
outrank
=
outdims
.
size
();
int
m
=
static_cast
<
int
>
(
outdims
[
outrank
-
1
]);
int
n
=
static_cast
<
int
>
(
outdims
[
outrank
-
2
]);
int
lda
=
std
::
max
(
1
,
m
);
if
(
pivots
)
{
auto
ipiv_dims
=
slice_ddim
(
outdims
,
0
,
outrank
-
1
);
ipiv_dims
[
outrank
-
2
]
=
std
::
min
(
m
,
n
);
IpivT
->
Resize
(
ipiv_dims
);
}
auto
ipiv_data
=
IpivT
->
mutable_data
<
int
>
(
ctx
.
GetPlace
());
auto
info_dims
=
slice_ddim
(
outdims
,
0
,
outrank
-
2
);
if
(
info_dims
.
size
()
==
0
)
{
info_dims
=
framework
::
make_ddim
({
1
});
}
InfoT
->
Resize
(
info_dims
);
auto
info_data
=
InfoT
->
mutable_data
<
int
>
(
ctx
.
GetPlace
());
auto
batchsize
=
product
(
info_dims
);
batchsize
=
std
::
max
(
static_cast
<
int
>
(
batchsize
),
1
);
auto
out_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
for
(
int
b
=
0
;
b
<
batchsize
;
b
++
)
{
auto
out_data_item
=
&
out_data
[
b
*
m
*
n
];
int
*
info_data_item
=
&
info_data
[
b
];
if
(
pivots
)
{
auto
ipiv_data_item
=
&
ipiv_data
[
b
*
std
::
min
(
m
,
n
)];
lu_decomposed_kernel
(
m
,
n
,
out_data_item
,
lda
,
ipiv_data_item
,
info_data_item
,
ctx
);
}
else
{
lu_decomposed_kernel
(
m
,
n
,
out_data_item
,
lda
,
NULL
,
info_data_item
,
ctx
);
}
}
*
out
=
helper
.
Transpose
(
*
out
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
lu
,
ops
::
LUCUDAKernel
<
float
>
,
ops
::
LUCUDAKernel
<
double
>
);
#endif // not PADDLE_WITH_HIP
paddle/fluid/operators/lu_op.h
0 → 100644
浏览文件 @
4e21457d
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/lapack_function.h"
#include "paddle/fluid/operators/set_value_op.h"
#include "paddle/fluid/operators/svd_helper.h"
#include "paddle/fluid/operators/triangular_solve_op.h"
#include "paddle/fluid/operators/tril_triu_op.h"
#include "paddle/pten/include/math.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensorArray
=
framework
::
LoDTensorArray
;
template
<
typename
DeviceContext
,
typename
T
,
size_t
D
>
void
SetValueCompute
(
const
framework
::
ExecutionContext
&
ctx
,
framework
::
Tensor
*
in
,
framework
::
Tensor
*
value_tensor
,
framework
::
Tensor
*
out
,
const
std
::
vector
<
int64_t
>&
axes
,
std
::
vector
<
int64_t
>*
starts
,
std
::
vector
<
int64_t
>*
ends
,
const
std
::
vector
<
int64_t
>&
shape
)
{
std
::
vector
<
int64_t
>
steps
=
{
1
,
1
};
std
::
vector
<
int64_t
>
decrease_axes
=
{};
std
::
vector
<
int64_t
>
none_axes
=
{};
auto
dtype
=
in
->
type
();
auto
in_dims
=
in
->
dims
();
CheckAndUpdateSliceAttrs
<
int64_t
>
(
in_dims
,
axes
,
starts
,
ends
,
&
steps
);
auto
slice_dims
=
GetSliceDims
(
in_dims
,
axes
,
*
starts
,
*
ends
,
&
steps
);
auto
decrease_slice_dims
=
GetDecreasedDims
(
slice_dims
,
decrease_axes
);
auto
slice_dims_for_assign
=
decrease_slice_dims
;
if
(
!
none_axes
.
empty
())
{
std
::
vector
<
int64_t
>
slice_dims_with_none
;
size_t
none_axes_cur
=
0
,
decrease_axes_cur
=
0
;
for
(
int
i
=
0
;
i
<
slice_dims
.
size
();
++
i
)
{
while
(
none_axes_cur
<
none_axes
.
size
()
&&
none_axes
[
none_axes_cur
]
<=
i
)
{
slice_dims_with_none
.
push_back
(
1
);
none_axes_cur
++
;
}
if
(
decrease_axes_cur
<
decrease_axes
.
size
()
&&
decrease_axes
[
decrease_axes_cur
]
==
i
)
{
decrease_axes_cur
++
;
}
else
{
slice_dims_with_none
.
push_back
(
slice_dims
[
i
]);
}
}
while
(
none_axes_cur
<
none_axes
.
size
())
{
slice_dims_with_none
.
push_back
(
1
);
none_axes_cur
++
;
}
slice_dims_for_assign
=
framework
::
make_ddim
(
slice_dims_with_none
);
}
auto
place
=
ctx
.
GetPlace
();
auto
&
eigen_place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
// Here copy data from input to avoid data loss at PE and Graph level.
// TODO(liym27): Speed up in the future version.
// - Q: Why don't call ShareDataWith to speed up?
// - A: Because it's not supported to ShareDataWith on OP's input and output
// https://github.com/PaddlePaddle/Paddle/wiki/ShareDataWith-and-ShareBufferWith-are-prohibited-in-OP
// - Q: Why don't delete Input, after all, the input and output are the same
// Tensor at program level?
// - A: If deleting Input, the graph will be complex, such as there will
// be two ops points to the output in graph: op1 -> output <- set_value.
// In this case, we have to find a way to handle the running order of
// set_value is what we want.
TensorCopy
(
*
in
,
place
,
out
);
Tensor
slice_tensor
(
dtype
),
pad_tensor
(
dtype
);
slice_tensor
.
mutable_data
<
T
>
(
slice_dims
,
place
);
pad_tensor
.
mutable_data
<
T
>
(
in_dims
,
place
);
auto
pad_e
=
framework
::
EigenTensor
<
T
,
D
>::
From
(
pad_tensor
,
in_dims
);
auto
out_e
=
framework
::
EigenTensor
<
T
,
D
>::
From
(
*
out
);
auto
slice_e
=
framework
::
EigenTensor
<
T
,
D
>::
From
(
slice_tensor
,
slice_dims
);
// Step 1: Set the value of out at `_index` to zero
slice_e
.
device
(
eigen_place
)
=
slice_e
.
constant
(
T
(
0
));
auto
starts_indices
=
Eigen
::
DSizes
<
Eigen
::
DenseIndex
,
D
>
();
auto
ends_indices
=
Eigen
::
DSizes
<
Eigen
::
DenseIndex
,
D
>
();
auto
strides_indices
=
Eigen
::
DSizes
<
Eigen
::
DenseIndex
,
D
>
();
for
(
size_t
i
=
0
;
i
<
D
;
++
i
)
{
starts_indices
[
i
]
=
0
;
ends_indices
[
i
]
=
slice_dims
[
i
];
strides_indices
[
i
]
=
1
;
}
for
(
size_t
i
=
0
;
i
<
axes
.
size
();
i
++
)
{
int
axis_index
=
axes
[
i
];
starts_indices
[
axis_index
]
=
(
*
starts
)[
i
];
ends_indices
[
axis_index
]
=
(
*
ends
)[
i
];
strides_indices
[
axis_index
]
=
steps
[
i
];
if
((
*
starts
)[
i
]
==
(
*
ends
)[
i
])
{
// slice is empty, data will not be changed
return
;
}
}
out_e
.
stridedSlice
(
starts_indices
,
ends_indices
,
strides_indices
)
.
device
(
eigen_place
)
=
slice_e
;
// Step 2: Set a tensor with the same shape as out tensor. And its data at
// '_index' is the same as value_tensor, and data out of '_index' to zero
// - Step 2.1 Set slice tensor with value
// NOTE(liym27): [ Why resize slice_tensor here? ]
// A: When do broadcasting on slice_tensor and value_tensor, the shape of
// slice_tensor should be decreased dims.
// e.g.
// x[:,0] = value_tensor
// x's shape = [3, 4], value_tensor's shape = [3]
// We get slice_dims = [3, 1], decrease_slice_dims = [3]
// If do broadcasting on Tensor with shape [3, 1] and [3], the result's
// shape is [3, 3], which cross the border;
// If do broadcasting on Tensor with shape [3] and [3], the result's shape
// is [3], which is right.
slice_tensor
.
Resize
(
slice_dims_for_assign
);
if
(
value_tensor
!=
nullptr
)
{
CheckIsDimsMatch
(
slice_dims_for_assign
,
value_tensor
->
dims
());
// ElementwiseComputeEx can do broadcasting
ElementwiseComputeEx
<
SubFunctor
<
T
>
,
DeviceContext
,
T
>
(
ctx
,
&
slice_tensor
,
value_tensor
,
-
1
,
SubFunctor
<
T
>
(),
&
slice_tensor
);
}
else
{
Tensor
value_t
(
dtype
);
auto
value_dims
=
framework
::
make_ddim
(
shape
);
CheckIsDimsMatch
(
slice_dims_for_assign
,
value_dims
);
value_t
.
mutable_data
<
T
>
(
value_dims
,
place
);
auto
value_name
=
GetValueName
(
dtype
);
CopyVecotorToTensor
<
T
>
(
value_name
.
c_str
(),
&
value_t
,
ctx
);
value_t
.
Resize
(
value_dims
);
ElementwiseComputeEx
<
SubFunctor
<
T
>
,
DeviceContext
,
T
>
(
ctx
,
&
slice_tensor
,
&
value_t
,
-
1
,
SubFunctor
<
T
>
(),
&
slice_tensor
);
}
slice_tensor
.
Resize
(
slice_dims
);
// - Step 2.2 Pad slice tensor with 0
pad_e
.
device
(
eigen_place
)
=
pad_e
.
constant
(
T
(
0
));
pad_e
.
stridedSlice
(
starts_indices
,
ends_indices
,
strides_indices
)
.
device
(
eigen_place
)
=
slice_e
;
// Step 3: Set out tensor with value_tensor
out_e
.
device
(
eigen_place
)
=
out_e
-
pad_e
;
}
template
<
typename
DeviceContext
,
typename
T
>
void
SetValueCompute_dispatch
(
const
framework
::
ExecutionContext
&
ctx
,
framework
::
Tensor
*
in
,
framework
::
Tensor
*
value_tensor
,
framework
::
Tensor
*
out
,
const
std
::
vector
<
int64_t
>&
axes
,
std
::
vector
<
int64_t
>*
starts
,
std
::
vector
<
int64_t
>*
ends
,
const
std
::
vector
<
int64_t
>&
shape
,
int
rank
)
{
switch
(
rank
)
{
case
1
:
SetValueCompute
<
DeviceContext
,
T
,
1
>
(
ctx
,
in
,
value_tensor
,
out
,
axes
,
starts
,
ends
,
shape
);
break
;
case
2
:
SetValueCompute
<
DeviceContext
,
T
,
2
>
(
ctx
,
in
,
value_tensor
,
out
,
axes
,
starts
,
ends
,
shape
);
break
;
case
3
:
SetValueCompute
<
DeviceContext
,
T
,
3
>
(
ctx
,
in
,
value_tensor
,
out
,
axes
,
starts
,
ends
,
shape
);
break
;
case
4
:
SetValueCompute
<
DeviceContext
,
T
,
4
>
(
ctx
,
in
,
value_tensor
,
out
,
axes
,
starts
,
ends
,
shape
);
break
;
case
5
:
SetValueCompute
<
DeviceContext
,
T
,
5
>
(
ctx
,
in
,
value_tensor
,
out
,
axes
,
starts
,
ends
,
shape
);
break
;
case
6
:
SetValueCompute
<
DeviceContext
,
T
,
6
>
(
ctx
,
in
,
value_tensor
,
out
,
axes
,
starts
,
ends
,
shape
);
break
;
default:
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The rank of input should be less than 7, but received %d."
,
rank
));
}
}
template
<
typename
DeviceContext
,
typename
T
>
void
Tensor_Conj
(
const
DeviceContext
&
dev_ctx
,
const
framework
::
Tensor
&
tensor
,
framework
::
Tensor
*
out
)
{
out
->
Resize
(
tensor
.
dims
());
platform
::
ForRange
<
DeviceContext
>
out_for_range
(
dev_ctx
,
tensor
.
numel
());
math
::
ConjFunctor
<
T
>
out_functor
(
tensor
.
data
<
T
>
(),
tensor
.
numel
(),
out
->
mutable_data
<
T
>
(
dev_ctx
.
GetPlace
()));
out_for_range
(
out_functor
);
}
template
<
typename
DeviceContext
,
typename
T
>
void
Tensor_Add
(
const
DeviceContext
&
dev_ctx
,
const
framework
::
Tensor
&
src1
,
const
framework
::
Tensor
&
src2
,
framework
::
Tensor
*
out
)
{
out
->
Resize
(
src1
.
dims
());
out
->
mutable_data
<
T
>
(
dev_ctx
.
GetPlace
());
auto
pt_x
=
paddle
::
experimental
::
MakePtenDenseTensor
(
src1
);
auto
pt_y
=
paddle
::
experimental
::
MakePtenDenseTensor
(
src2
);
auto
pt_z
=
paddle
::
experimental
::
MakePtenDenseTensor
(
*
out
);
pten
::
Add
<
T
>
(
dev_ctx
,
*
pt_x
.
get
(),
*
pt_y
.
get
(),
-
1
,
pt_z
.
get
());
}
template
<
typename
DeviceContext
,
typename
T
>
void
Tensor_Sub
(
const
DeviceContext
&
dev_ctx
,
const
framework
::
Tensor
&
src1
,
const
framework
::
Tensor
&
src2
,
framework
::
Tensor
*
out
)
{
out
->
Resize
(
src1
.
dims
());
out
->
mutable_data
<
T
>
(
dev_ctx
.
GetPlace
());
auto
pt_x
=
paddle
::
experimental
::
MakePtenDenseTensor
(
src1
);
auto
pt_y
=
paddle
::
experimental
::
MakePtenDenseTensor
(
src2
);
auto
pt_z
=
paddle
::
experimental
::
MakePtenDenseTensor
(
*
out
);
pten
::
Subtract
<
T
>
(
dev_ctx
,
*
pt_x
.
get
(),
*
pt_y
.
get
(),
-
1
,
pt_z
.
get
());
}
template
<
typename
DeviceContext
,
typename
T
,
size_t
D
>
void
SliceCompute
(
const
framework
::
ExecutionContext
&
ctx
,
const
framework
::
Tensor
*
in
,
framework
::
Tensor
*
out
,
const
std
::
vector
<
int
>&
axes_int
,
const
std
::
vector
<
int
>&
starts_int
,
const
std
::
vector
<
int
>&
ends_int
)
{
std
::
vector
<
int64_t
>
axes
(
axes_int
.
begin
(),
axes_int
.
end
());
std
::
vector
<
int64_t
>
starts
(
starts_int
.
begin
(),
starts_int
.
end
());
std
::
vector
<
int64_t
>
ends
(
ends_int
.
begin
(),
ends_int
.
end
());
std
::
vector
<
int
>
decrease_axis
=
{};
std
::
vector
<
int
>
infer_flags
=
{};
PADDLE_ENFORCE_EQ
(
starts
.
size
(),
axes
.
size
(),
platform
::
errors
::
InvalidArgument
(
"The size of starts must be equal to the size of axes."
));
PADDLE_ENFORCE_EQ
(
ends
.
size
(),
axes
.
size
(),
platform
::
errors
::
InvalidArgument
(
"The size of ends must be equal to the size of axes."
));
// Step 2: Compute output
auto
in_dims
=
in
->
dims
();
auto
out_dims
=
out
->
dims
();
auto
slice_dims
=
out_dims
;
// 2.1 Infer output dims
for
(
size_t
i
=
0
;
i
<
axes
.
size
();
++
i
)
{
// when start == -1 && end == start+1
if
(
starts
[
i
]
==
-
1
&&
ends
[
i
]
==
0
&&
infer_flags
[
i
]
==
-
1
)
{
auto
ret
=
std
::
find
(
decrease_axis
.
begin
(),
decrease_axis
.
end
(),
axes
[
i
]);
if
(
ret
!=
decrease_axis
.
end
())
{
ends
[
i
]
=
in_dims
[
axes
[
i
]];
}
}
}
CheckAndUpdateSliceAttrs
(
in_dims
,
axes
,
&
starts
,
&
ends
);
slice_dims
=
GetSliceDims
<
int64_t
>
(
in_dims
,
axes
,
starts
,
ends
,
nullptr
,
nullptr
);
out_dims
=
GetDecreasedDims
(
slice_dims
,
decrease_axis
);
// 2.2 Get output
auto
offsets
=
Eigen
::
DSizes
<
Eigen
::
DenseIndex
,
D
>
();
auto
extents
=
Eigen
::
DSizes
<
Eigen
::
DenseIndex
,
D
>
();
for
(
size_t
i
=
0
;
i
<
D
;
++
i
)
{
offsets
[
i
]
=
0
;
extents
[
i
]
=
slice_dims
[
i
];
}
for
(
size_t
i
=
0
;
i
<
axes
.
size
();
++
i
)
{
offsets
[
axes
[
i
]]
=
starts
[
i
];
}
out
->
Resize
(
slice_dims
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
in_t
=
framework
::
EigenTensor
<
T
,
D
>::
From
(
*
in
,
in_dims
);
auto
out_t
=
framework
::
EigenTensor
<
T
,
D
>::
From
(
*
out
,
slice_dims
);
auto
&
eigen_place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
if
(
in
->
numel
()
<=
Eigen
::
NumTraits
<
int
>::
highest
())
{
// similar to tf.slice:
// if element number less than INT_MAX, change the type of index to int
Eigen
::
DSizes
<
int
,
D
>
offsets_32bit
,
extents_32bit
;
for
(
size_t
i
=
0
;
i
<
D
;
i
++
)
{
offsets_32bit
[
i
]
=
offsets
[
i
];
extents_32bit
[
i
]
=
extents
[
i
];
}
EigenSlice
<
std
::
decay_t
<
decltype
(
eigen_place
)
>
,
T
,
D
>::
Eval
(
eigen_place
,
framework
::
To32BitIndex
(
out_t
),
framework
::
To32BitIndex
(
in_t
),
offsets_32bit
,
extents_32bit
);
}
else
{
EigenSlice
<
std
::
decay_t
<
decltype
(
eigen_place
)
>
,
T
,
D
>::
Eval
(
eigen_place
,
out_t
,
in_t
,
offsets
,
extents
);
}
out
->
Resize
(
out_dims
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
}
template
<
typename
DeviceContext
,
typename
T
>
void
Tensor_narrow
(
const
framework
::
ExecutionContext
&
ctx
,
const
framework
::
Tensor
*
src
,
framework
::
Tensor
*
out
,
int
row_s
,
int
row_e
,
int
col_s
,
int
col_e
)
{
auto
rank
=
src
->
dims
().
size
();
std
::
vector
<
int
>
axes_int
=
{
rank
-
2
,
rank
-
1
};
std
::
vector
<
int
>
starts_int
=
{
row_s
,
col_s
};
std
::
vector
<
int
>
ends_int
=
{
row_e
,
col_e
};
switch
(
rank
)
{
case
1
:
SliceCompute
<
DeviceContext
,
T
,
1
>
(
ctx
,
src
,
out
,
axes_int
,
starts_int
,
ends_int
);
break
;
case
2
:
SliceCompute
<
DeviceContext
,
T
,
2
>
(
ctx
,
src
,
out
,
axes_int
,
starts_int
,
ends_int
);
break
;
case
3
:
SliceCompute
<
DeviceContext
,
T
,
3
>
(
ctx
,
src
,
out
,
axes_int
,
starts_int
,
ends_int
);
break
;
case
4
:
SliceCompute
<
DeviceContext
,
T
,
4
>
(
ctx
,
src
,
out
,
axes_int
,
starts_int
,
ends_int
);
break
;
case
5
:
SliceCompute
<
DeviceContext
,
T
,
5
>
(
ctx
,
src
,
out
,
axes_int
,
starts_int
,
ends_int
);
break
;
case
6
:
SliceCompute
<
DeviceContext
,
T
,
6
>
(
ctx
,
src
,
out
,
axes_int
,
starts_int
,
ends_int
);
break
;
default:
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The rank of input should be less than 7, but received %d."
,
rank
));
}
}
template
<
typename
DeviceContext
>
void
arange
(
const
DeviceContext
&
dev_ctx
,
framework
::
Tensor
*
tmp
,
int
w
,
int
batchsize
=
1
,
int
h
=
1
)
{
tmp
->
Resize
(
framework
::
make_ddim
({
batchsize
*
w
}));
platform
::
CPUPlace
cpu
;
auto
tmpdata
=
tmp
->
mutable_data
<
int32_t
>
(
cpu
);
for
(
int
b
=
0
;
b
<
batchsize
;
b
++
)
{
for
(
int
i
=
0
;
i
<
w
;
i
++
)
{
tmpdata
[
b
*
w
+
i
]
=
static_cast
<
int32_t
>
(
b
*
h
+
i
);
}
}
}
template
<
typename
T
>
struct
OneFunctor
{
OneFunctor
(
T
*
output
,
int
*
idtptr
,
int
w
,
int
dim
)
:
output_
(
output
),
idtptr_
(
idtptr
),
w_
(
w
),
dim_
(
dim
)
{}
HOSTDEVICE
void
operator
()(
size_t
idx
)
const
{
output_
[
w_
*
idtptr_
[
idx
]
+
idx
%
dim_
]
=
static_cast
<
T
>
(
1
);
}
T
*
output_
;
int
*
idtptr_
;
int
w_
;
int
dim_
;
};
template
<
typename
DeviceContext
,
typename
T
>
void
LU_Unpack
(
const
DeviceContext
&
dev_ctx
,
const
framework
::
Tensor
*
LU
,
framework
::
Tensor
*
L
,
framework
::
Tensor
*
U
)
{
const
auto
udims
=
LU
->
dims
();
L
->
Resize
(
udims
);
U
->
Resize
(
udims
);
const
auto
H
=
udims
[
udims
.
size
()
-
2
];
const
auto
W
=
udims
[
udims
.
size
()
-
1
];
auto
L_dataptr
=
L
->
mutable_data
<
T
>
(
dev_ctx
.
GetPlace
());
platform
::
ForRange
<
DeviceContext
>
x_for_range
(
dev_ctx
,
LU
->
numel
());
TrilTriuCompute
<
T
>
tril_computer
(
LU
->
data
<
T
>
(),
-
1
,
true
,
H
,
W
,
L_dataptr
);
x_for_range
(
tril_computer
);
TrilTriuCompute
<
T
>
triu_computer
(
LU
->
data
<
T
>
(),
0
,
false
,
H
,
W
,
U
->
mutable_data
<
T
>
(
dev_ctx
.
GetPlace
()));
x_for_range
(
triu_computer
);
// set L's diagonal 1
auto
dim
=
std
::
min
(
H
,
W
);
framework
::
Tensor
rowtensor
,
rt_dev
;
auto
batchsize
=
product
(
framework
::
slice_ddim
(
udims
,
0
,
udims
.
size
()
-
2
));
batchsize
=
std
::
max
(
static_cast
<
int
>
(
batchsize
),
1
);
arange
<
DeviceContext
>
(
dev_ctx
,
&
rowtensor
,
dim
,
batchsize
,
H
);
auto
idtptr
=
rowtensor
.
data
<
int32_t
>
();
if
(
is_gpu_place
(
dev_ctx
.
GetPlace
()))
{
framework
::
TensorCopy
(
rowtensor
,
dev_ctx
.
GetPlace
(),
&
rt_dev
);
idtptr
=
rt_dev
.
data
<
int32_t
>
();
}
platform
::
ForRange
<
DeviceContext
>
for_range
(
dev_ctx
,
rowtensor
.
numel
());
OneFunctor
<
T
>
functor
(
L_dataptr
,
idtptr
,
W
,
dim
);
for_range
(
functor
);
}
template
<
typename
DeviceContext
,
typename
T
>
void
scatterpivot
(
const
DeviceContext
&
dev_ctx
,
T
*
out_data
,
framework
::
Tensor
*
idlst
,
int
w
,
int
dim
)
{
framework
::
Tensor
idlst_tmp
;
idlst_tmp
.
Resize
(
idlst
->
dims
());
idlst_tmp
.
mutable_data
<
int32_t
>
(
dev_ctx
.
GetPlace
());
framework
::
TensorCopy
(
*
idlst
,
dev_ctx
.
GetPlace
(),
&
idlst_tmp
);
auto
idtptr
=
idlst_tmp
.
data
<
int32_t
>
();
platform
::
ForRange
<
DeviceContext
>
for_range
(
dev_ctx
,
idlst_tmp
.
numel
());
OneFunctor
<
T
>
functor
(
out_data
,
idtptr
,
w
,
dim
);
for_range
(
functor
);
}
template
<
typename
DeviceContext
,
typename
T
>
void
Unpack_Pivot
(
const
DeviceContext
&
dev_ctx
,
const
framework
::
Tensor
&
Pivot
,
framework
::
Tensor
*
P
,
int
h
,
int
w
)
{
auto
dims
=
Pivot
.
dims
();
auto
Pdimvec
=
vectorize
(
dims
);
auto
prank
=
Pdimvec
.
size
();
auto
Pnum
=
dims
[
prank
-
1
];
framework
::
Tensor
Pivot_cpu
;
platform
::
CPUPlace
cpu
;
framework
::
TensorCopy
(
Pivot
,
cpu
,
&
Pivot_cpu
);
auto
pdataptr
=
Pivot_cpu
.
data
<
int32_t
>
();
Pdimvec
[
prank
-
1
]
=
h
;
Pdimvec
.
emplace_back
(
h
);
auto
Pdim
=
framework
::
make_ddim
(
Pdimvec
);
P
->
Resize
(
Pdim
);
auto
pdata
=
P
->
mutable_data
<
T
>
(
dev_ctx
.
GetPlace
());
math
::
SetConstant
<
DeviceContext
,
T
>
setter
;
setter
(
dev_ctx
,
P
,
static_cast
<
T
>
(
0
));
auto
batchsize
=
product
(
framework
::
slice_ddim
(
dims
,
0
,
prank
-
1
));
batchsize
=
std
::
max
(
static_cast
<
int
>
(
batchsize
),
1
);
framework
::
Tensor
idt
;
for
(
int
i
=
0
;
i
<
batchsize
;
i
++
)
{
arange
<
DeviceContext
>
(
dev_ctx
,
&
idt
,
h
);
auto
idlst
=
idt
.
data
<
int32_t
>
();
for
(
int
j
=
0
;
j
<
Pnum
;
j
++
)
{
if
(
idlst
[
pdataptr
[
i
*
Pnum
+
j
]
-
1
]
==
idlst
[
j
])
continue
;
auto
temp
=
idlst
[
j
];
idlst
[
j
]
=
idlst
[
pdataptr
[
i
*
Pnum
+
j
]
-
1
];
idlst
[
pdataptr
[
i
*
Pnum
+
j
]
-
1
]
=
temp
;
}
scatterpivot
(
dev_ctx
,
&
(
pdata
[
i
*
h
*
h
]),
&
idt
,
h
,
h
);
}
}
}
// namespace operators
}
// namespace paddle
paddle/fluid/platform/dynload/cusolver.h
浏览文件 @
4e21457d
...
...
@@ -71,6 +71,10 @@ CUSOLVER_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP);
__macro(cusolverDnSpotrsBatched); \
__macro(cusolverDnDpotrsBatched); \
__macro(cusolverDnSgesvdj_bufferSize); \
__macro(cusolverDnSgetrf_bufferSize); \
__macro(cusolverDnDgetrf_bufferSize); \
__macro(cusolverDnCgetrf_bufferSize); \
__macro(cusolverDnZgetrf_bufferSize); \
__macro(cusolverDnSgeqrf_bufferSize); \
__macro(cusolverDnDgeqrf_bufferSize); \
__macro(cusolverDnCgeqrf_bufferSize); \
...
...
@@ -84,6 +88,10 @@ CUSOLVER_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP);
__macro(cusolverDnDgesvdj_bufferSize); \
__macro(cusolverDnSgesvdj); \
__macro(cusolverDnDgesvdj); \
__macro(cusolverDnSgetrf); \
__macro(cusolverDnDgetrf); \
__macro(cusolverDnCgetrf); \
__macro(cusolverDnZgetrf); \
__macro(cusolverDnSgeqrf); \
__macro(cusolverDnDgeqrf); \
__macro(cusolverDnCgeqrf); \
...
...
python/paddle/fluid/tests/unittests/test_lu_op.py
0 → 100644
浏览文件 @
4e21457d
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
from
op_test
import
OpTest
import
unittest
import
itertools
import
numpy
as
np
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.layers
as
layers
import
paddle.fluid.core
as
core
import
scipy
import
scipy.linalg
import
copy
def
scipy_lu
(
A
,
pivot
):
shape
=
A
.
shape
if
len
(
shape
)
==
2
:
return
scipy
.
linalg
.
lu
(
A
,
permute_l
=
not
pivot
)
else
:
preshape
=
shape
[:
-
2
]
batchsize
=
np
.
product
(
shape
)
//
(
shape
[
-
2
]
*
shape
[
-
1
])
PP
=
[]
PL
=
[]
PU
=
[]
NA
=
A
.
reshape
((
-
1
,
shape
[
-
2
],
shape
[
-
1
]))
for
b
in
range
(
batchsize
):
P
,
L
,
U
=
scipy
.
linalg
.
lu
(
NA
[
b
],
permute_l
=
not
pivot
)
pshape
=
P
.
shape
lshape
=
L
.
shape
ushape
=
U
.
shape
PP
.
append
(
P
)
PL
.
append
(
L
)
PU
.
append
(
U
)
return
np
.
array
(
PP
).
reshape
(
preshape
+
pshape
),
np
.
array
(
PL
).
reshape
(
preshape
+
lshape
),
np
.
array
(
PU
).
reshape
(
preshape
+
ushape
)
def
Pmat_to_perm
(
Pmat_org
,
cut
):
Pmat
=
copy
.
deepcopy
(
Pmat_org
)
shape
=
Pmat
.
shape
rows
=
shape
[
-
2
]
cols
=
shape
[
-
1
]
batchsize
=
max
(
1
,
np
.
product
(
shape
[:
-
2
]))
P
=
Pmat
.
reshape
(
batchsize
,
rows
,
cols
)
permmat
=
[]
for
b
in
range
(
batchsize
):
permlst
=
[]
sP
=
P
[
b
]
for
c
in
range
(
min
(
rows
,
cols
)):
idx
=
np
.
argmax
(
sP
[:,
c
])
permlst
.
append
(
idx
)
tmp
=
copy
.
deepcopy
(
sP
[
c
,
:])
sP
[
c
,
:]
=
sP
[
idx
,
:]
sP
[
idx
,
:]
=
tmp
permmat
.
append
(
permlst
)
Pivot
=
np
.
array
(
permmat
).
reshape
(
list
(
shape
[:
-
2
])
+
[
rows
,
])
+
1
return
Pivot
[...,
:
cut
]
def
perm_to_Pmat
(
perm
,
dim
):
pshape
=
perm
.
shape
bs
=
int
(
np
.
product
(
perm
.
shape
[:
-
1
]).
item
())
perm
=
perm
.
reshape
((
bs
,
pshape
[
-
1
]))
oneslst
=
[]
for
i
in
range
(
bs
):
idlst
=
np
.
arange
(
dim
)
perm_item
=
perm
[
i
,
:]
for
idx
,
p
in
enumerate
(
perm_item
-
1
):
temp
=
idlst
[
idx
]
idlst
[
idx
]
=
idlst
[
p
]
idlst
[
p
]
=
temp
ones
=
paddle
.
eye
(
dim
)
nmat
=
paddle
.
scatter
(
ones
,
paddle
.
to_tensor
(
idlst
),
ones
)
oneslst
.
append
(
nmat
)
return
np
.
array
(
oneslst
).
reshape
(
list
(
pshape
[:
-
1
])
+
[
dim
,
dim
])
# m < n
class
TestLUOp
(
OpTest
):
"""
case 1
"""
def
config
(
self
):
self
.
x_shape
=
[
3
,
10
,
12
]
self
.
pivot
=
True
self
.
get_infos
=
True
self
.
dtype
=
"float64"
def
set_output
(
self
):
X
=
self
.
inputs
[
'X'
]
sP
,
sl
,
sU
=
scipy_lu
(
X
,
self
.
pivot
)
sL
=
np
.
tril
(
sl
,
-
1
)
ashape
=
np
.
array
(
X
.
shape
)
lshape
=
np
.
array
(
sL
.
shape
)
ushape
=
np
.
array
(
sU
.
shape
)
lpad
=
(
len
(
sL
.
shape
)
-
2
)
*
[(
0
,
0
)]
+
list
((
(
0
,
(
ashape
-
lshape
)[
-
2
]),
(
0
,
(
ashape
-
lshape
)[
-
1
])))
upad
=
(
len
(
sU
.
shape
)
-
2
)
*
[(
0
,
0
)]
+
list
((
(
0
,
(
ashape
-
ushape
)[
-
2
]),
(
0
,
(
ashape
-
ushape
)[
-
1
])))
NsL
=
np
.
pad
(
sL
,
lpad
)
NsU
=
np
.
pad
(
sU
,
upad
)
NLU
=
NsL
+
NsU
self
.
output
=
NLU
self
.
Pivots
=
Pmat_to_perm
(
sP
,
min
(
ashape
[
-
2
],
ashape
[
-
1
]))
self
.
Infos
=
np
.
zeros
(
self
.
x_shape
[:
-
2
])
if
len
(
X
.
shape
)
>
2
else
np
.
array
([
0
])
def
setUp
(
self
):
self
.
op_type
=
"lu"
self
.
config
()
self
.
inputs
=
{
'X'
:
np
.
random
.
random
(
self
.
x_shape
).
astype
(
self
.
dtype
)}
self
.
attrs
=
{
'pivots'
:
self
.
pivot
}
self
.
set_output
()
self
.
outputs
=
{
'Out'
:
self
.
output
,
'Pivots'
:
self
.
Pivots
,
'Infos'
:
self
.
Infos
}
def
test_check_output
(
self
):
self
.
check_output
()
# m = n 2D
class
TestLUOp2
(
TestLUOp
):
"""
case 2
"""
def
config
(
self
):
self
.
x_shape
=
[
10
,
10
]
self
.
pivot
=
True
self
.
get_infos
=
True
self
.
dtype
=
"float64"
# m > n
class
TestLUOp3
(
TestLUOp
):
"""
case 3
"""
def
config
(
self
):
self
.
x_shape
=
[
2
,
12
,
10
]
self
.
pivot
=
True
self
.
get_infos
=
True
self
.
dtype
=
"float64"
if
__name__
==
"__main__"
:
unittest
.
main
()
tools/static_mode_white_list.py
浏览文件 @
4e21457d
...
...
@@ -727,6 +727,7 @@ STATIC_MODE_TESTING_LIST = [
'test_class_center_sample_op'
,
'test_fill_diagonal_tensor_op'
,
'test_fill_any_op'
,
'test_lu_op'
,
'test_margin_cross_entropy_op'
,
'test_pull_gpups_sparse_op'
,
]
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录