Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
1c0120e2
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
1c0120e2
编写于
7月 22, 2022
作者:
F
fwenguang
提交者:
GitHub
7月 22, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[MLU] add floor kernel and grid_sampler kernel (#44498)
上级
5ee4a21a
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
508 addition
and
13 deletion
+508
-13
paddle/fluid/operators/activation_op_mlu.cc
paddle/fluid/operators/activation_op_mlu.cc
+23
-0
paddle/fluid/operators/grid_sampler_op_mlu.cc
paddle/fluid/operators/grid_sampler_op_mlu.cc
+112
-0
paddle/fluid/operators/mlu/mlu_baseop.cc
paddle/fluid/operators/mlu/mlu_baseop.cc
+55
-0
paddle/fluid/operators/mlu/mlu_baseop.h
paddle/fluid/operators/mlu/mlu_baseop.h
+23
-0
paddle/fluid/platform/device/mlu/mlu_info.h
paddle/fluid/platform/device/mlu/mlu_info.h
+1
-1
python/paddle/fluid/tests/unittests/mlu/test_floor_op_mlu.py
python/paddle/fluid/tests/unittests/mlu/test_floor_op_mlu.py
+59
-0
python/paddle/fluid/tests/unittests/mlu/test_grid_sampler_op_mlu.py
...dle/fluid/tests/unittests/mlu/test_grid_sampler_op_mlu.py
+223
-0
tools/dockerfile/Dockerfile.mlu
tools/dockerfile/Dockerfile.mlu
+12
-12
未找到文件。
paddle/fluid/operators/activation_op_mlu.cc
浏览文件 @
1c0120e2
...
@@ -399,6 +399,25 @@ class HardSigmoidGradMLUKernel : public framework::OpKernel<T> {
...
@@ -399,6 +399,25 @@ class HardSigmoidGradMLUKernel : public framework::OpKernel<T> {
}
}
};
};
template
<
typename
T
>
class
FloorMLUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
MLUCnnlTensorDesc
input_desc
(
*
input
);
MLUCnnlTensorDesc
output_desc
(
*
output
);
MLUCnnl
::
Floor
(
ctx
,
input_desc
.
get
(),
GetBasePtr
(
input
),
output_desc
.
get
(),
GetBasePtr
(
output
));
}
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
ReciprocalMLUKernel
:
public
framework
::
OpKernel
<
T
>
{
class
ReciprocalMLUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
...
@@ -589,3 +608,7 @@ REGISTER_OP_MLU_KERNEL(
...
@@ -589,3 +608,7 @@ REGISTER_OP_MLU_KERNEL(
hard_sigmoid_grad
,
hard_sigmoid_grad
,
ops
::
HardSigmoidGradMLUKernel
<
float
>
,
ops
::
HardSigmoidGradMLUKernel
<
float
>
,
ops
::
HardSigmoidGradMLUKernel
<
paddle
::
platform
::
float16
>
);
ops
::
HardSigmoidGradMLUKernel
<
paddle
::
platform
::
float16
>
);
REGISTER_OP_MLU_KERNEL
(
floor
,
ops
::
FloorMLUKernel
<
float
>
,
ops
::
FloorMLUKernel
<
paddle
::
platform
::
float16
>
);
paddle/fluid/operators/grid_sampler_op_mlu.cc
0 → 100644
浏览文件 @
1c0120e2
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
template
<
typename
T
>
class
GridSamplerMLUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE_EQ
(
platform
::
is_mlu_place
(
ctx
.
GetPlace
()),
true
,
platform
::
errors
::
Unavailable
(
"This kernel only runs on MLU."
));
// input and output data
const
Tensor
*
input
=
ctx
.
Input
<
Tensor
>
(
"X"
);
const
Tensor
*
grid
=
ctx
.
Input
<
Tensor
>
(
"Grid"
);
Tensor
*
output
=
ctx
.
Output
<
Tensor
>
(
"Output"
);
int
n
=
input
->
dims
()[
0
];
int
c
=
input
->
dims
()[
1
];
int
out_h
=
grid
->
dims
()[
1
];
int
out_w
=
grid
->
dims
()[
2
];
output
->
mutable_data
<
T
>
({
n
,
c
,
out_h
,
out_w
},
ctx
.
GetPlace
());
// attrs
// paddle.nn.functional.grid_sample(x, grid, mode='bilinear',
// padding_mode='zeros', align_corners=True, name=None)
const
std
::
string
mode
=
ctx
.
Attr
<
std
::
string
>
(
"mode"
);
const
std
::
string
padding_mode
=
ctx
.
Attr
<
std
::
string
>
(
"padding_mode"
);
bool
align_corners
=
ctx
.
Attr
<
bool
>
(
"align_corners"
);
const
std
::
string
data_format
=
paddle
::
framework
::
DataLayoutToString
(
input
->
layout
());
PADDLE_ENFORCE_EQ
(
mode
==
"bilinear"
,
true
,
platform
::
errors
::
Unavailable
(
"Only support bilinear mode in mlu grid_sample kernel."
));
PADDLE_ENFORCE_EQ
(
padding_mode
==
"zeros"
,
true
,
platform
::
errors
::
Unavailable
(
"Only support zeros padding_mode in mlu grid_sample kernel."
));
Tensor
trans_input
(
input
->
dtype
());
// transpose input from NCHW to NHWC
const
std
::
vector
<
int
>
perm_to_nhwc
=
{
0
,
2
,
3
,
1
};
TransposeFromMLUTensor
<
T
>
(
ctx
,
perm_to_nhwc
,
input
,
&
trans_input
,
true
/*need_reshape_or_alloc*/
);
Tensor
tmp_output
(
output
->
dtype
());
tmp_output
.
mutable_data
<
T
>
({
n
,
out_h
,
out_w
,
c
},
ctx
.
GetPlace
());
MLUCnnlGridSampleDesc
grid_sample_desc
(
mode
,
padding_mode
,
align_corners
);
MLUCnnlTensorDesc
input_desc
(
trans_input
,
CNNL_LAYOUT_NHWC
,
ToCnnlDataType
<
T
>
());
MLUCnnlTensorDesc
grid_desc
(
*
grid
,
CNNL_LAYOUT_NHWC
,
ToCnnlDataType
<
T
>
());
MLUCnnlTensorDesc
tmp_output_desc
(
tmp_output
,
CNNL_LAYOUT_NHWC
,
ToCnnlDataType
<
T
>
());
MLUCnnl
::
GridSample
(
ctx
,
grid_sample_desc
.
get
(),
input_desc
.
get
(),
GetBasePtr
(
&
trans_input
),
grid_desc
.
get
(),
GetBasePtr
(
grid
),
tmp_output_desc
.
get
(),
GetBasePtr
(
&
tmp_output
));
// transpose output from NHWC to NCHW
const
std
::
vector
<
int
>
perm_to_nchw
=
{
0
,
3
,
1
,
2
,
};
TransposeFromMLUTensor
<
T
>
(
ctx
,
perm_to_nchw
,
&
tmp_output
,
output
,
false
/*need_reshape_or_alloc*/
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_MLU_KERNEL
(
grid_sampler
,
ops
::
GridSamplerMLUKernel
<
float
>
,
ops
::
GridSamplerMLUKernel
<
plat
::
float16
>
);
paddle/fluid/operators/mlu/mlu_baseop.cc
浏览文件 @
1c0120e2
...
@@ -622,6 +622,29 @@ MLUCnnlDCNDesc::~MLUCnnlDCNDesc() {
...
@@ -622,6 +622,29 @@ MLUCnnlDCNDesc::~MLUCnnlDCNDesc() {
}
}
}
}
MLUCnnlGridSampleDesc
::
MLUCnnlGridSampleDesc
(
const
std
::
string
&
interp_mode_str
,
const
std
::
string
&
padding_mode_str
,
bool
align_corners
)
{
cnnlInterpMode_t
interp_mode
=
CNNL_INTERP_BILINEAR
;
cnnlGridSamplePaddingMode_t
padding_mode
=
CNNL_GRIDSAMPLE_PADDING_ZEROS
;
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlCreateGridSampleDescriptor
(
&
grid_sample_desc_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlSetGridSampleDescriptor
(
grid_sample_desc_
,
interp_mode
,
padding_mode
,
align_corners
));
}
const
cnnlGridSampleDescriptor_t
MLUCnnlGridSampleDesc
::
get
()
const
{
return
grid_sample_desc_
;
}
MLUCnnlGridSampleDesc
::~
MLUCnnlGridSampleDesc
()
{
if
(
grid_sample_desc_
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlDestroyGridSampleDescriptor
(
grid_sample_desc_
));
}
}
MLUSeqDataDesc
::
MLUSeqDataDesc
(
cnnlSeqDataLayout_t
layout
,
MLUSeqDataDesc
::
MLUSeqDataDesc
(
cnnlSeqDataLayout_t
layout
,
cnnlDataType_t
dtype
,
cnnlDataType_t
dtype
,
int
dimNb
,
int
dimNb
,
...
@@ -4918,6 +4941,38 @@ MLURNNDesc::~MLURNNDesc() {
...
@@ -4918,6 +4941,38 @@ MLURNNDesc::~MLURNNDesc() {
grads_image
));
grads_image
));
}
}
/* static */
void
MLUCnnl
::
GridSample
(
const
ExecutionContext
&
ctx
,
const
cnnlGridSampleDescriptor_t
grid_sample_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
grid_desc
,
const
void
*
grid
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
)
{
cnnlHandle_t
handle
=
GetHandleFromCTX
(
ctx
);
size_t
workspace_size
;
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlGetGridSampleForwardWorkspaceSize
(
handle
,
input_desc
,
grid_desc
,
output_desc
,
&
workspace_size
));
auto
&
dev_ctx
=
GetDevCtxFromCTX
(
ctx
);
Tensor
workspace
=
ctx
.
AllocateTmpTensor
<
int8_t
,
MLUDeviceContext
>
(
{
static_cast
<
int64_t
>
(
workspace_size
)},
dev_ctx
);
void
*
workspace_ptr
=
workspace
.
mutable_data
(
ctx
.
GetPlace
());
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlGridSampleForward
(
handle
,
grid_sample_desc
,
input_desc
,
input
,
grid_desc
,
grid
,
output_desc
,
output
,
workspace_ptr
,
workspace_size
));
}
/* static */
void
MLUCnnl
::
SyncBatchNormStats
(
/* static */
void
MLUCnnl
::
SyncBatchNormStats
(
const
ExecutionContext
&
ctx
,
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
x_desc
,
const
cnnlTensorDescriptor_t
x_desc
,
...
...
paddle/fluid/operators/mlu/mlu_baseop.h
浏览文件 @
1c0120e2
...
@@ -495,6 +495,20 @@ class MLUCnnlDCNDesc {
...
@@ -495,6 +495,20 @@ class MLUCnnlDCNDesc {
cnnlDCNDescriptor_t
dcn_desc_
=
nullptr
;
cnnlDCNDescriptor_t
dcn_desc_
=
nullptr
;
};
};
class
MLUCnnlGridSampleDesc
{
public:
MLUCnnlGridSampleDesc
(
const
std
::
string
&
interp_mode_str
,
const
std
::
string
&
padding_mode_str
,
bool
align_corners
);
const
cnnlGridSampleDescriptor_t
get
()
const
;
~
MLUCnnlGridSampleDesc
();
private:
cnnlGridSampleDescriptor_t
grid_sample_desc_
=
nullptr
;
};
class
MLUSeqDataDesc
{
class
MLUSeqDataDesc
{
public:
public:
MLUSeqDataDesc
(
const
MLUSeqDataDesc
&
desc
)
=
delete
;
MLUSeqDataDesc
(
const
MLUSeqDataDesc
&
desc
)
=
delete
;
...
@@ -2040,6 +2054,15 @@ class MLUCnnl {
...
@@ -2040,6 +2054,15 @@ class MLUCnnl {
const
cnnlTensorDescriptor_t
grads_image_desc
,
const
cnnlTensorDescriptor_t
grads_image_desc
,
void
*
grads_image
);
void
*
grads_image
);
static
void
GridSample
(
const
ExecutionContext
&
ctx
,
const
cnnlGridSampleDescriptor_t
grid_sample_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
grid_desc
,
const
void
*
grid
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
SyncBatchNormStats
(
const
ExecutionContext
&
ctx
,
static
void
SyncBatchNormStats
(
const
ExecutionContext
&
ctx
,
const
cnnlTensorDescriptor_t
x_desc
,
const
cnnlTensorDescriptor_t
x_desc
,
const
void
*
x
,
const
void
*
x
,
...
...
paddle/fluid/platform/device/mlu/mlu_info.h
浏览文件 @
1c0120e2
...
@@ -16,9 +16,9 @@ limitations under the License. */
...
@@ -16,9 +16,9 @@ limitations under the License. */
#ifdef PADDLE_WITH_MLU
#ifdef PADDLE_WITH_MLU
#include <cn_api.h>
#include <cn_api.h>
#include <cndrv_id.h>
#include <cnnl.h>
#include <cnnl.h>
#include <cnpapi.h>
#include <cnpapi.h>
#include <cnpapi_cndrv_id.h>
#include <cnrt.h>
#include <cnrt.h>
#ifdef PADDLE_WITH_CNCL
#ifdef PADDLE_WITH_CNCL
#include <cncl.h>
#include <cncl.h>
...
...
python/paddle/fluid/tests/unittests/mlu/test_floor_op_mlu.py
0 → 100644
浏览文件 @
1c0120e2
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
sys
sys
.
path
.
append
(
'..'
)
from
op_test
import
OpTest
import
paddle
paddle
.
enable_static
()
class
TestFloor
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"floor"
self
.
place
=
paddle
.
device
.
MLUPlace
(
0
)
self
.
__class__
.
use_mlu
=
True
self
.
init_dtype
()
self
.
__class__
.
no_need_check_grad
=
True
self
.
python_api
=
paddle
.
floor
np
.
random
.
seed
(
1024
)
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
10
,
12
]).
astype
(
self
.
dtype
)
out
=
np
.
floor
(
x
)
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
x
)}
self
.
outputs
=
{
'Out'
:
out
}
def
test_check_output
(
self
):
self
.
check_output_with_place
(
self
.
place
,
check_eager
=
False
)
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float32
class
TestFloorFP16
(
TestFloor
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/mlu/test_grid_sampler_op_mlu.py
0 → 100644
浏览文件 @
1c0120e2
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle
import
unittest
import
numpy
as
np
import
paddle.fluid.core
as
core
import
sys
sys
.
path
.
append
(
'..'
)
from
op_test
import
OpTest
paddle
.
enable_static
()
def
AffineGrid
(
theta
,
grid_shape
):
n
=
grid_shape
[
0
]
h
=
grid_shape
[
1
]
w
=
grid_shape
[
2
]
h_idx
=
np
.
repeat
(
np
.
linspace
(
-
1
,
1
,
h
)[
np
.
newaxis
,
:],
w
,
axis
=
0
).
T
[:,
:,
np
.
newaxis
]
w_idx
=
np
.
repeat
(
np
.
linspace
(
-
1
,
1
,
w
)[
np
.
newaxis
,
:],
h
,
axis
=
0
)[:,
:,
np
.
newaxis
]
grid
=
np
.
concatenate
([
w_idx
,
h_idx
,
np
.
ones
([
h
,
w
,
1
])],
axis
=
2
)
# h * w * 3
grid
=
np
.
repeat
(
grid
[
np
.
newaxis
,
:],
n
,
axis
=
0
)
# n * h * w *3
ret
=
np
.
zeros
([
n
,
h
*
w
,
2
])
theta
=
theta
.
transpose
([
0
,
2
,
1
])
for
i
in
range
(
len
(
theta
)):
ret
[
i
]
=
np
.
dot
(
grid
[
i
].
reshape
([
h
*
w
,
3
]),
theta
[
i
])
return
ret
.
reshape
([
n
,
h
,
w
,
2
]).
astype
(
"float32"
)
def
getGridPointValue
(
data
,
x
,
y
):
data_shape
=
data
.
shape
N
=
data_shape
[
0
]
C
=
data_shape
[
1
]
in_H
=
data_shape
[
2
]
in_W
=
data_shape
[
3
]
out_H
=
x
.
shape
[
1
]
out_W
=
x
.
shape
[
2
]
#out = np.zeros(data_shape, dtype='float32')
out
=
np
.
zeros
([
N
,
C
,
out_H
,
out_W
],
dtype
=
'float32'
)
for
i
in
range
(
N
):
for
j
in
range
(
out_H
):
for
k
in
range
(
out_W
):
if
y
[
i
,
j
,
k
]
<
0
or
y
[
i
,
j
,
k
]
>
in_H
-
1
or
x
[
i
,
j
,
k
]
<
0
or
x
[
i
,
j
,
k
]
>
in_W
-
1
:
out
[
i
,
:,
j
,
k
]
=
0
else
:
out
[
i
,
:,
j
,
k
]
=
data
[
i
,
:,
y
[
i
,
j
,
k
],
x
[
i
,
j
,
k
]]
return
out
def
clip
(
x
,
min_n
,
max_n
):
return
np
.
maximum
(
np
.
minimum
(
x
,
max_n
),
min_n
)
def
unnormalizeAndClip
(
grid_slice
,
max_val
,
align_corners
,
padding_mode
):
if
align_corners
:
grid_slice
=
0.5
*
((
grid_slice
.
astype
(
'float32'
)
+
1.0
)
*
max_val
)
else
:
grid_slice
=
0.5
*
((
grid_slice
.
astype
(
'float32'
)
+
1.0
)
*
(
max_val
+
1
))
-
0.5
if
padding_mode
==
"border"
:
grid_slice
=
clip
(
grid_slice
,
0
,
max_val
)
elif
padding_mode
==
"reflection"
:
double_range
=
2
*
max_val
if
align_corners
else
(
max_val
+
1
)
*
2
grid_abs
=
np
.
abs
(
grid_slice
)
if
align_corners
else
np
.
abs
(
grid_slice
+
0.5
)
extra
=
grid_abs
-
np
.
floor
(
grid_abs
/
double_range
)
*
double_range
grid_slice
=
np
.
minimum
(
extra
,
double_range
-
extra
)
grid_slice
=
grid_slice
if
align_corners
else
clip
(
grid_slice
-
0.5
,
0
,
max_val
)
return
grid_slice
def
GridSampler
(
data
,
grid
,
align_corners
=
True
,
mode
=
"bilinear"
,
padding_mode
=
"zeros"
):
dims
=
data
.
shape
N
=
dims
[
0
]
in_C
=
dims
[
1
]
in_H
=
dims
[
2
]
in_W
=
dims
[
3
]
out_H
=
grid
.
shape
[
1
]
out_W
=
grid
.
shape
[
2
]
x
=
grid
[:,
:,
:,
0
]
y
=
grid
[:,
:,
:,
1
]
y_max
=
in_H
-
1
x_max
=
in_W
-
1
x
=
unnormalizeAndClip
(
x
,
x_max
,
align_corners
,
padding_mode
)
y
=
unnormalizeAndClip
(
y
,
y_max
,
align_corners
,
padding_mode
)
if
mode
==
"bilinear"
:
x0
=
np
.
floor
(
x
).
astype
(
'int32'
)
x1
=
x0
+
1
y0
=
np
.
floor
(
y
).
astype
(
'int32'
)
y1
=
y0
+
1
wa
=
np
.
tile
(((
x1
-
x
)
*
(
y1
-
y
)).
reshape
((
N
,
1
,
out_H
,
out_W
)),
(
1
,
in_C
,
1
,
1
))
wb
=
np
.
tile
(((
x1
-
x
)
*
(
y
-
y0
)).
reshape
((
N
,
1
,
out_H
,
out_W
)),
(
1
,
in_C
,
1
,
1
))
wc
=
np
.
tile
(((
x
-
x0
)
*
(
y1
-
y
)).
reshape
((
N
,
1
,
out_H
,
out_W
)),
(
1
,
in_C
,
1
,
1
))
wd
=
np
.
tile
(((
x
-
x0
)
*
(
y
-
y0
)).
reshape
((
N
,
1
,
out_H
,
out_W
)),
(
1
,
in_C
,
1
,
1
))
va
=
getGridPointValue
(
data
,
x0
,
y0
)
vb
=
getGridPointValue
(
data
,
x0
,
y1
)
vc
=
getGridPointValue
(
data
,
x1
,
y0
)
vd
=
getGridPointValue
(
data
,
x1
,
y1
)
out
=
(
wa
*
va
+
wb
*
vb
+
wc
*
vc
+
wd
*
vd
).
astype
(
'float32'
)
elif
mode
==
"nearest"
:
x
=
np
.
round
(
x
).
astype
(
'int32'
)
y
=
np
.
round
(
y
).
astype
(
'int32'
)
out
=
getGridPointValue
(
data
,
x
,
y
)
return
out
class
TestGridSamplerOp
(
OpTest
):
def
setUp
(
self
):
self
.
place
=
paddle
.
device
.
MLUPlace
(
0
)
self
.
__class__
.
use_mlu
=
True
self
.
__class__
.
no_need_check_grad
=
True
self
.
op_type
=
'grid_sampler'
self
.
align_corners
=
True
self
.
padding_mode
=
"zeros"
self
.
mode
=
"bilinear"
self
.
initTestCase
()
x
=
np
.
random
.
randint
(
0
,
255
,
self
.
x_shape
).
astype
(
'float32'
)
theta
=
np
.
zeros
(
self
.
theta_shape
).
astype
(
'float32'
)
for
i
in
range
(
self
.
theta_shape
[
0
]):
for
j
in
range
(
2
):
for
k
in
range
(
3
):
theta
[
i
,
j
,
k
]
=
np
.
random
.
rand
(
1
)[
0
]
grid
=
AffineGrid
(
theta
,
self
.
grid_shape
)
self
.
inputs
=
{
'X'
:
x
,
'Grid'
:
grid
}
self
.
attrs
=
{
'use_cudnn'
:
False
,
"align_corners"
:
self
.
align_corners
,
"padding_mode"
:
self
.
padding_mode
,
"mode"
:
self
.
mode
}
self
.
outputs
=
{
'Output'
:
GridSampler
(
x
,
grid
,
self
.
align_corners
,
self
.
mode
,
self
.
padding_mode
)
}
def
test_check_output
(
self
):
self
.
check_output_with_place
(
self
.
place
)
def
initTestCase
(
self
):
self
.
x_shape
=
(
2
,
3
,
8
,
8
)
self
.
grid_shape
=
(
2
,
7
,
9
,
2
)
self
.
theta_shape
=
(
2
,
2
,
3
)
self
.
align_corners
=
False
self
.
padding_mode
=
"zeros"
self
.
mode
=
"bilinear"
class
Case1
(
TestGridSamplerOp
):
def
initTestCase
(
self
):
self
.
x_shape
=
(
2
,
3
,
5
,
6
)
self
.
grid_shape
=
(
2
,
8
,
9
,
2
)
self
.
theta_shape
=
(
2
,
2
,
3
)
self
.
align_corners
=
True
self
.
padding_mode
=
"zeros"
self
.
mode
=
"bilinear"
class
LargeInputCase
(
TestGridSamplerOp
):
def
initTestCase
(
self
):
self
.
x_shape
=
(
2
,
3
,
128
,
128
)
self
.
grid_shape
=
(
2
,
130
,
130
,
2
)
self
.
theta_shape
=
(
2
,
2
,
3
)
self
.
align_corners
=
False
self
.
padding_mode
=
"zeros"
self
.
mode
=
"bilinear"
class
Case2
(
LargeInputCase
):
def
initTestCase
(
self
):
self
.
x_shape
=
(
2
,
3
,
128
,
128
)
self
.
grid_shape
=
(
2
,
130
,
130
,
2
)
self
.
theta_shape
=
(
2
,
2
,
3
)
self
.
align_corners
=
True
self
.
padding_mode
=
"zeros"
self
.
mode
=
"bilinear"
if
__name__
==
"__main__"
:
unittest
.
main
()
tools/dockerfile/Dockerfile.mlu
浏览文件 @
1c0120e2
...
@@ -2,14 +2,14 @@
...
@@ -2,14 +2,14 @@
# Update CNTOOLKIT_VERSION, CNNL_VERSION and CNCL_VERSION if using other versions
# Update CNTOOLKIT_VERSION, CNNL_VERSION and CNCL_VERSION if using other versions
#
#
# Build:
# Build:
# - CNTOOLKIT_VERSION
2.8.1
-1
# - CNTOOLKIT_VERSION
3.0.0
-1
# - CNNL_VERSION 1.
9.3
-1
# - CNNL_VERSION 1.
11.0
-1
# - CNCL_VERSION 1.
0.4
-1
# - CNCL_VERSION 1.
2.0
-1
#
#
# Download three packages from FTP (need to connect cambricon AE to get FTP url)
# Download three packages from FTP (need to connect cambricon AE to get FTP url)
# - cntoolkit_
2.6.5
-1.ubuntu18.04_amd64.deb
# - cntoolkit_
3.0.0
-1.ubuntu18.04_amd64.deb
# - cnnl_1.
8.3
-1.ubuntu18.04_amd64.deb
# - cnnl_1.
11.0
-1.ubuntu18.04_amd64.deb
# - cncl_1.
0.2
-1.ubuntu18.04_amd64.deb
# - cncl_1.
2.0
-1.ubuntu18.04_amd64.deb
# copy them to current directory first, then run build commands
# copy them to current directory first, then run build commands
#
#
# For example:
# For example:
...
@@ -21,9 +21,9 @@
...
@@ -21,9 +21,9 @@
# (get cncl pkg)
# (get cncl pkg)
#
#
# docker build -f Dockerfile.mlu \
# docker build -f Dockerfile.mlu \
# --build-arg CNTOOLKIT_VERSION=
2.8.1
-1 \
# --build-arg CNTOOLKIT_VERSION=
3.0.0
-1 \
# --build-arg CNNL_VERSION=1.
9.3
-1 \
# --build-arg CNNL_VERSION=1.
11.0
-1 \
# --build-arg CNCL_VERSION=1.
0.4
-1 \
# --build-arg CNCL_VERSION=1.
2.0
-1 \
# -t paddlepaddle/paddle:latest-dev-mlu .
# -t paddlepaddle/paddle:latest-dev-mlu .
#
#
# without mlu device:
# without mlu device:
...
@@ -40,9 +40,9 @@ MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
...
@@ -40,9 +40,9 @@ MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
ENV WITH_GPU=OFF
ENV WITH_GPU=OFF
ARG CNTOOLKIT_VERSION=
2.8.1
-1
ARG CNTOOLKIT_VERSION=
3.0.0
-1
ARG CNNL_VERSION=1.
9.3
-1
ARG CNNL_VERSION=1.
11.0
-1
ARG CNCL_VERSION=1.
0.4
-1
ARG CNCL_VERSION=1.
2.0
-1
ARG CNTOOLKIT_PKG=cntoolkit_$CNTOOLKIT_VERSION.ubuntu18.04_amd64.deb
ARG CNTOOLKIT_PKG=cntoolkit_$CNTOOLKIT_VERSION.ubuntu18.04_amd64.deb
ARG CNNL_PKG=cnnl_$CNNL_VERSION.ubuntu18.04_amd64.deb
ARG CNNL_PKG=cnnl_$CNNL_VERSION.ubuntu18.04_amd64.deb
ARG CNCL_PKG=cncl_$CNCL_VERSION.ubuntu18.04_amd64.deb
ARG CNCL_PKG=cncl_$CNCL_VERSION.ubuntu18.04_amd64.deb
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录