Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
dfbac603
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
dfbac603
编写于
11月 22, 2018
作者:
P
peizhilin
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'upstream/develop' into windows/build
上级
7c8c9dc9
dd6fd4c7
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
1447 addition
and
24 deletion
+1447
-24
AUTHORS.md
AUTHORS.md
+1
-0
paddle/fluid/API.spec
paddle/fluid/API.spec
+1
-0
paddle/fluid/operators/group_norm_op.cc
paddle/fluid/operators/group_norm_op.cc
+162
-0
paddle/fluid/operators/group_norm_op.cu
paddle/fluid/operators/group_norm_op.cu
+292
-0
paddle/fluid/operators/group_norm_op.h
paddle/fluid/operators/group_norm_op.h
+197
-0
paddle/fluid/operators/math/detail/activation_functions.h
paddle/fluid/operators/math/detail/activation_functions.h
+1
-0
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+4
-0
python/paddle/fluid/contrib/utils/__init__.py
python/paddle/fluid/contrib/utils/__init__.py
+20
-0
python/paddle/fluid/contrib/utils/hdfs_utils.py
python/paddle/fluid/contrib/utils/hdfs_utils.py
+505
-0
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+79
-0
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+7
-5
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+5
-5
python/paddle/fluid/tests/unittests/test_group_norm_op.py
python/paddle/fluid/tests/unittests/test_group_norm_op.py
+143
-0
tools/manylinux1/Dockerfile.x64
tools/manylinux1/Dockerfile.x64
+6
-2
tools/manylinux1/build_scripts/build.sh
tools/manylinux1/build_scripts/build.sh
+10
-9
tools/manylinux1/build_scripts/build_utils.sh
tools/manylinux1/build_scripts/build_utils.sh
+14
-3
未找到文件。
AUTHORS.md
浏览文件 @
dfbac603
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
| kexinzhao | Ke-Xin Zhao |
| kexinzhao | Ke-Xin Zhao |
| kuke | Yi-Bing Liu |
| kuke | Yi-Bing Liu |
| lcy-seso | Ying Cao |
| lcy-seso | Ying Cao |
| cjld | Dun Liang |
| lipeng-unisound | Peng Li |
| lipeng-unisound | Peng Li |
| liuyuan | Yuan Liu |
| liuyuan | Yuan Liu |
| livc | Zhao Li |
| livc | Zhao Li |
...
...
paddle/fluid/API.spec
浏览文件 @
dfbac603
...
@@ -103,6 +103,7 @@ paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 's
...
@@ -103,6 +103,7 @@ paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 's
paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.layer_norm ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None))
paddle.fluid.layers.layer_norm ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None))
paddle.fluid.layers.group_norm ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None))
paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, False, False))
paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, False, False))
paddle.fluid.layers.smooth_l1 ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.smooth_l1 ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.one_hot ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.one_hot ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None)
...
...
paddle/fluid/operators/group_norm_op.cc
0 → 100644
浏览文件 @
dfbac603
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/group_norm_op.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
using
DataLayout
=
framework
::
DataLayout
;
class
GroupNormOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of GroupNormOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Y"
),
"Output(Y) of GroupNormOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Mean"
),
"Output(Mean) of GroupNormOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Variance"
),
"Output(Variance) of GroupNormOp should not be null."
);
auto
x_dim
=
ctx
->
GetInputDim
(
"X"
);
auto
channel_num
=
x_dim
[
1
];
auto
batch_size
=
x_dim
[
0
];
auto
groups
=
ctx
->
Attrs
().
Get
<
int
>
(
"groups"
);
PADDLE_ENFORCE_LE
(
groups
,
channel_num
,
"'groups' must be less equal than the number of channels."
);
PADDLE_ENFORCE_GE
(
groups
,
1
,
"'groups' must be greater equal than 1."
);
if
(
ctx
->
HasInput
(
"Scale"
))
{
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputDim
(
"Scale"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputDim
(
"Scale"
)[
0
],
channel_num
);
}
if
(
ctx
->
HasInput
(
"Bias"
))
{
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputDim
(
"Bias"
).
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputDim
(
"Bias"
)[
0
],
channel_num
);
}
ctx
->
SetOutputDim
(
"Y"
,
ctx
->
GetInputDim
(
"X"
));
ctx
->
SetOutputDim
(
"Mean"
,
{
batch_size
,
groups
});
ctx
->
SetOutputDim
(
"Variance"
,
{
batch_size
,
groups
});
ctx
->
ShareLoD
(
"X"
,
"Y"
);
}
};
class
GroupNormOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"The input tensor."
);
AddInput
(
"Scale"
,
"Scale is a 1-dimensional tensor of size C"
"that is applied to the output."
)
.
AsDispensable
();
AddInput
(
"Bias"
,
"Bias is a 1-dimensional tensor of size C "
"that is applied to the output"
)
.
AsDispensable
();
AddOutput
(
"Y"
,
"Result after normalization."
);
AddOutput
(
"Mean"
,
"Mean of each group."
).
AsIntermediate
();
AddOutput
(
"Variance"
,
"Variance of each group."
).
AsIntermediate
();
AddAttr
<
float
>
(
"epsilon"
,
"Constant for numerical stability [default 1e-5]."
)
.
SetDefault
(
1e-5
)
.
AddCustomChecker
([](
const
float
&
epsilon
)
{
PADDLE_ENFORCE
(
epsilon
>=
0.0
f
&&
epsilon
<=
1.0
f
,
"'epsilon' should be between 0.0 and 1.0."
);
});
AddAttr
<
int
>
(
"groups"
,
"The number of groups that divided from channels."
)
.
AddCustomChecker
([](
const
int
&
groups
)
{
PADDLE_ENFORCE_GT
(
groups
,
0
,
"'groups' should be greater than zero."
);
});
AddComment
(
R"DOC(
Group Normalization
Refer to `Group Normalization <https://arxiv.org/abs/1803.08494>`_
)DOC"
);
}
};
class
GroupNormGradOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
// check input
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of GroupNormOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Mean"
),
"Input(Mean) of GroupNormOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Variance"
),
"Input(Variance) of GroupNormOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Y"
)),
"Input(Y@GRAD) of GroupNormOp should not be null."
);
// check output
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"X"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"X"
));
}
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"Scale"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Scale"
),
ctx
->
GetInputDim
(
"Scale"
));
}
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"Bias"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Bias"
),
ctx
->
GetInputDim
(
"Bias"
));
}
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
*
var
=
ctx
.
InputVar
(
framework
::
GradVarName
(
"Y"
));
if
(
var
==
nullptr
)
{
PADDLE_THROW
(
"can't find Y@GRAD"
);
}
const
Tensor
*
t
=
nullptr
;
if
(
var
->
IsType
<
Tensor
>
())
{
t
=
&
var
->
Get
<
Tensor
>
();
}
else
if
(
var
->
IsType
<
LoDTensor
>
())
{
t
=
&
var
->
Get
<
LoDTensor
>
();
}
if
(
t
==
nullptr
)
{
PADDLE_THROW
(
"can't find Y@GRAD"
);
}
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
t
->
type
()),
ctx
.
GetPlace
());
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
group_norm
,
ops
::
GroupNormOp
,
ops
::
GroupNormOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
group_norm_grad
,
ops
::
GroupNormGradOp
);
REGISTER_OP_CPU_KERNEL
(
group_norm
,
ops
::
GroupNormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
GroupNormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
group_norm_grad
,
ops
::
GroupNormGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
GroupNormGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/fluid/operators/group_norm_op.cu
0 → 100644
浏览文件 @
dfbac603
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <cub/cub.cuh>
#include "paddle/fluid/operators/group_norm_op.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
__global__
void
GroupNormForwardGetMeanAndVar
(
const
T
*
x
,
int
N
,
int
C
,
int
imsize
,
int
groups
,
int
group_size
,
T
*
mean
,
T
*
var
)
{
int
gid
=
blockIdx
.
y
;
int
cid
=
blockIdx
.
x
;
int
bid
=
blockIdx
.
z
;
int
number
=
min
(
group_size
,
static_cast
<
int
>
(
C
-
gid
*
group_size
));
int
ccid
=
gid
*
group_size
+
cid
;
if
(
ccid
>=
C
)
return
;
T
x_mean
=
0
,
x_var
=
0
;
for
(
int
imid
=
threadIdx
.
x
;
imid
<
imsize
;
imid
+=
blockDim
.
x
)
{
T
val
=
x
[(
bid
*
C
+
ccid
)
*
imsize
+
imid
];
x_mean
+=
val
;
x_var
+=
val
*
val
;
}
x_mean
/=
number
*
imsize
;
x_var
/=
number
*
imsize
;
__shared__
T
s_mem
[
2
];
if
(
threadIdx
.
x
==
0
)
{
s_mem
[
0
]
=
s_mem
[
1
]
=
0
;
}
__syncthreads
();
paddle
::
platform
::
CudaAtomicAdd
(
&
s_mem
[
0
],
x_mean
);
paddle
::
platform
::
CudaAtomicAdd
(
&
s_mem
[
1
],
x_var
);
__syncthreads
();
if
(
threadIdx
.
x
==
0
)
{
paddle
::
platform
::
CudaAtomicAdd
(
&
mean
[
bid
*
groups
+
gid
],
s_mem
[
0
]);
paddle
::
platform
::
CudaAtomicAdd
(
&
var
[
bid
*
groups
+
gid
],
s_mem
[
1
]);
}
}
template
<
typename
T
>
__global__
void
GroupNormForward
(
const
T
*
x
,
const
T
*
mean
,
const
T
*
var
,
const
T
*
scale
,
const
T
*
bias
,
int
N
,
int
C
,
int
imsize
,
int
groups
,
int
group_size
,
T
epsilon
,
T
*
y
,
T
*
real_var
)
{
int
gid
=
blockIdx
.
y
;
int
cid
=
blockIdx
.
x
;
int
bid
=
blockIdx
.
z
;
int
ccid
=
gid
*
group_size
+
cid
;
if
(
ccid
>=
C
)
return
;
T
x_mean
=
mean
[
bid
*
groups
+
gid
];
T
x_var
=
var
[
bid
*
groups
+
gid
];
x_var
=
x_var
-
x_mean
*
x_mean
;
T
var_inv
=
1.0
/
sqrt
(
x_var
+
epsilon
);
if
(
cid
==
0
&&
threadIdx
.
x
==
0
)
real_var
[
bid
*
groups
+
gid
]
=
x_var
;
for
(
int
imid
=
threadIdx
.
x
;
imid
<
imsize
;
imid
+=
blockDim
.
x
)
{
T
val
=
x
[(
bid
*
C
+
ccid
)
*
imsize
+
imid
];
val
=
(
val
-
x_mean
)
*
var_inv
;
if
(
scale
)
val
*=
scale
[
gid
*
group_size
+
cid
];
if
(
bias
)
val
+=
bias
[
gid
*
group_size
+
cid
];
y
[(
bid
*
C
+
ccid
)
*
imsize
+
imid
]
=
val
;
}
}
template
<
typename
T
>
class
GroupNormKernel
<
platform
::
CUDADeviceContext
,
T
>
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
float
epsilon
=
ctx
.
Attr
<
float
>
(
"epsilon"
);
auto
*
scale
=
ctx
.
Input
<
Tensor
>
(
"Scale"
);
auto
*
bias
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Output
<
Tensor
>
(
"Y"
);
auto
*
mean
=
ctx
.
Output
<
Tensor
>
(
"Mean"
);
auto
*
var
=
ctx
.
Output
<
Tensor
>
(
"Variance"
);
const
auto
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
const
auto
x_dims
=
x
->
dims
();
const
int
group_size
=
(
x_dims
[
1
]
-
1
)
/
groups
+
1
;
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
mean
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
var
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
SetConstant
<
platform
::
CUDADeviceContext
,
T
>
set_zero
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
Tensor
temp_var
;
temp_var
.
mutable_data
<
T
>
(
var
->
dims
(),
ctx
.
GetPlace
());
set_zero
(
dev_ctx
,
mean
,
static_cast
<
T
>
(
0
));
set_zero
(
dev_ctx
,
&
temp_var
,
static_cast
<
T
>
(
0
));
auto
*
x_data
=
x
->
data
<
T
>
();
auto
*
y_data
=
y
->
data
<
T
>
();
auto
*
mean_data
=
mean
->
data
<
T
>
();
auto
*
var_data
=
var
->
data
<
T
>
();
auto
*
temp_var_data
=
temp_var
.
data
<
T
>
();
const
T
*
scale_data
=
nullptr
;
if
(
scale
)
scale_data
=
scale
->
data
<
T
>
();
const
T
*
bias_data
=
nullptr
;
if
(
bias
)
bias_data
=
bias
->
data
<
T
>
();
int
imsize
=
x_dims
[
2
]
*
x_dims
[
3
];
int
block_size
=
std
::
min
(
512
,
imsize
);
dim3
grid
(
group_size
,
groups
,
x_dims
[
0
]);
dim3
threads
(
block_size
,
1
,
1
);
GroupNormForwardGetMeanAndVar
<
T
><<<
grid
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
x_data
,
x_dims
[
0
],
x_dims
[
1
],
imsize
,
groups
,
group_size
,
mean_data
,
temp_var_data
);
GroupNormForward
<
T
><<<
grid
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
x_data
,
mean_data
,
temp_var_data
,
scale_data
,
bias_data
,
x_dims
[
0
],
x_dims
[
1
],
imsize
,
groups
,
group_size
,
epsilon
,
y_data
,
var_data
);
}
};
template
<
typename
T
>
__global__
void
GroupNormBackwardGetMeanAndVar
(
const
T
*
x
,
const
T
*
mean
,
const
T
*
var
,
const
T
*
scale
,
const
T
*
d_y
,
int
N
,
int
C
,
int
imsize
,
int
groups
,
int
group_size
,
T
epsilon
,
T
*
d_x
,
T
*
d_mean
,
T
*
d_var
,
T
*
d_scale
,
T
*
d_bias
)
{
int
gid
=
blockIdx
.
y
;
int
cid
=
blockIdx
.
x
;
int
bid
=
blockIdx
.
z
;
int
number
=
min
(
group_size
,
static_cast
<
int
>
(
C
-
gid
*
group_size
));
int
ccid
=
gid
*
group_size
+
cid
;
if
(
ccid
>=
C
)
return
;
T
x_mean
=
mean
[
bid
*
groups
+
gid
];
T
x_var
=
var
[
bid
*
groups
+
gid
];
T
var_inv
=
1.0
/
sqrt
(
x_var
+
epsilon
);
T
d_var_inv
=
0
,
d_x_mean
=
0
;
T
d_mean_data
=
0
,
d_var_data
=
0
,
d_scale_data
=
0
,
d_bias_data
=
0
;
for
(
int
imid
=
threadIdx
.
x
;
imid
<
imsize
;
imid
+=
blockDim
.
x
)
{
T
tmp
=
x
[(
bid
*
C
+
ccid
)
*
imsize
+
imid
];
T
val
=
(
tmp
-
x_mean
)
*
var_inv
;
T
dval
=
d_y
[(
bid
*
C
+
ccid
)
*
imsize
+
imid
];
if
(
d_bias
)
d_bias_data
+=
dval
;
if
(
d_scale
)
d_scale_data
+=
val
*
dval
;
if
(
scale
)
dval
=
dval
*
scale
[
ccid
];
d_var_data
+=
(
tmp
-
x_mean
)
*
dval
;
T
d_tmp
=
dval
*
var_inv
;
if
(
d_x
)
d_x
[(
bid
*
C
+
ccid
)
*
imsize
+
imid
]
=
d_tmp
;
d_mean_data
-=
d_tmp
;
}
__shared__
T
s_mem
[
4
];
if
(
threadIdx
.
x
==
0
)
{
s_mem
[
0
]
=
s_mem
[
1
]
=
0
;
if
(
d_scale
)
s_mem
[
2
]
=
0
;
if
(
d_bias
)
s_mem
[
3
]
=
0
;
}
__syncthreads
();
paddle
::
platform
::
CudaAtomicAdd
(
&
s_mem
[
0
],
d_mean_data
);
paddle
::
platform
::
CudaAtomicAdd
(
&
s_mem
[
1
],
d_var_data
);
if
(
d_scale
)
paddle
::
platform
::
CudaAtomicAdd
(
&
s_mem
[
2
],
d_scale_data
);
if
(
d_bias
)
paddle
::
platform
::
CudaAtomicAdd
(
&
s_mem
[
3
],
d_bias_data
);
__syncthreads
();
if
(
threadIdx
.
x
==
0
)
{
paddle
::
platform
::
CudaAtomicAdd
(
&
d_mean
[
bid
*
groups
+
gid
],
s_mem
[
0
]);
paddle
::
platform
::
CudaAtomicAdd
(
&
d_var
[
bid
*
groups
+
gid
],
s_mem
[
1
]);
if
(
d_scale
)
paddle
::
platform
::
CudaAtomicAdd
(
&
d_scale
[
ccid
],
s_mem
[
2
]);
if
(
d_bias
)
paddle
::
platform
::
CudaAtomicAdd
(
&
d_bias
[
ccid
],
s_mem
[
3
]);
}
}
template
<
typename
T
>
__global__
void
GroupNormBackward
(
const
T
*
x
,
const
T
*
mean
,
const
T
*
var
,
const
T
*
d_mean
,
const
T
*
d_var
,
int
N
,
int
C
,
int
imsize
,
int
groups
,
int
group_size
,
T
epsilon
,
T
*
d_x
)
{
int
gid
=
blockIdx
.
y
;
int
cid
=
blockIdx
.
x
;
int
bid
=
blockIdx
.
z
;
int
number
=
min
(
group_size
,
static_cast
<
int
>
(
C
-
gid
*
group_size
));
int
ccid
=
gid
*
group_size
+
cid
;
if
(
ccid
>=
C
)
return
;
T
x_mean
=
mean
[
bid
*
groups
+
gid
];
T
x_var
=
var
[
bid
*
groups
+
gid
];
T
d_x_mean
=
d_mean
[
bid
*
groups
+
gid
];
T
d_var_inv
=
d_var
[
bid
*
groups
+
gid
];
T
d_x_var
=
-
1.0
/
(
2
*
(
x_var
+
epsilon
)
*
sqrt
(
x_var
+
epsilon
))
*
d_var_inv
;
d_x_mean
-=
2
*
d_x_var
*
x_mean
;
d_x_var
/=
number
*
imsize
;
d_x_mean
/=
number
*
imsize
;
for
(
int
imid
=
threadIdx
.
x
;
imid
<
imsize
;
imid
+=
blockDim
.
x
)
{
T
tmp
=
x
[(
bid
*
C
+
ccid
)
*
imsize
+
imid
];
if
(
d_x
)
d_x
[(
bid
*
C
+
ccid
)
*
imsize
+
imid
]
+=
d_x_mean
+
tmp
*
2
*
d_x_var
;
}
}
template
<
typename
T
>
class
GroupNormGradKernel
<
platform
::
CUDADeviceContext
,
T
>
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
float
epsilon
=
ctx
.
Attr
<
float
>
(
"epsilon"
);
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
mean
=
ctx
.
Input
<
Tensor
>
(
"Mean"
);
auto
*
var
=
ctx
.
Input
<
Tensor
>
(
"Variance"
);
auto
*
scale
=
ctx
.
Input
<
Tensor
>
(
"Scale"
);
auto
*
d_y
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
const
auto
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
// init output
auto
*
d_x
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
d_scale
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Scale"
));
auto
*
d_bias
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
const
auto
&
x_dims
=
x
->
dims
();
const
int
group_size
=
(
x_dims
[
1
]
-
1
)
/
groups
+
1
;
T
*
d_x_data
=
nullptr
;
if
(
d_x
)
{
d_x
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
d_x_data
=
d_x
->
data
<
T
>
();
}
math
::
SetConstant
<
platform
::
CUDADeviceContext
,
T
>
set_zero
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
Tensor
temp_var
;
temp_var
.
mutable_data
<
T
>
(
var
->
dims
(),
ctx
.
GetPlace
());
set_zero
(
dev_ctx
,
&
temp_var
,
static_cast
<
T
>
(
0
));
T
*
temp_var_data
=
temp_var
.
data
<
T
>
();
Tensor
temp_mean
;
temp_mean
.
mutable_data
<
T
>
(
var
->
dims
(),
ctx
.
GetPlace
());
set_zero
(
dev_ctx
,
&
temp_mean
,
static_cast
<
T
>
(
0
));
T
*
temp_mean_data
=
temp_mean
.
data
<
T
>
();
auto
*
x_data
=
x
->
data
<
T
>
();
auto
*
y_data
=
d_y
->
data
<
T
>
();
auto
*
mean_data
=
mean
->
data
<
T
>
();
auto
*
var_data
=
var
->
data
<
T
>
();
T
*
d_scale_data
=
nullptr
;
if
(
d_scale
)
{
d_scale
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
set_zero
(
dev_ctx
,
d_scale
,
static_cast
<
T
>
(
0
));
d_scale_data
=
d_scale
->
data
<
T
>
();
}
T
*
d_bias_data
=
nullptr
;
if
(
d_bias
)
{
d_bias
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
set_zero
(
dev_ctx
,
d_bias
,
static_cast
<
T
>
(
0
));
d_bias_data
=
d_bias
->
data
<
T
>
();
}
const
T
*
scale_data
=
nullptr
;
if
(
scale
)
scale_data
=
scale
->
data
<
T
>
();
int
imsize
=
x_dims
[
2
]
*
x_dims
[
3
];
int
block_size
=
std
::
min
(
512
,
imsize
);
dim3
grid
(
group_size
,
groups
,
x_dims
[
0
]);
dim3
threads
(
block_size
,
1
,
1
);
GroupNormBackwardGetMeanAndVar
<
T
><<<
grid
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
x_data
,
mean_data
,
var_data
,
scale_data
,
y_data
,
x_dims
[
0
],
x_dims
[
1
],
imsize
,
groups
,
group_size
,
epsilon
,
d_x_data
,
temp_mean_data
,
temp_var_data
,
d_scale_data
,
d_bias_data
);
GroupNormBackward
<
T
><<<
grid
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
x_data
,
mean_data
,
var_data
,
temp_mean_data
,
temp_var_data
,
x_dims
[
0
],
x_dims
[
1
],
imsize
,
groups
,
group_size
,
epsilon
,
d_x_data
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
group_norm
,
ops
::
GroupNormKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
GroupNormKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
REGISTER_OP_CUDA_KERNEL
(
group_norm_grad
,
ops
::
GroupNormGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
GroupNormGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
paddle/fluid/operators/group_norm_op.h
0 → 100644
浏览文件 @
dfbac603
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
using
DataLayout
=
framework
::
DataLayout
;
template
<
typename
DeviceContext
,
typename
T
>
class
GroupNormKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
float
epsilon
=
ctx
.
Attr
<
float
>
(
"epsilon"
);
auto
*
scale
=
ctx
.
Input
<
Tensor
>
(
"Scale"
);
auto
*
bias
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Output
<
Tensor
>
(
"Y"
);
auto
*
mean
=
ctx
.
Output
<
Tensor
>
(
"Mean"
);
auto
*
var
=
ctx
.
Output
<
Tensor
>
(
"Variance"
);
const
auto
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
const
auto
x_dims
=
x
->
dims
();
const
int
group_size
=
(
x_dims
[
1
]
-
1
)
/
groups
+
1
;
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
mean
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
var
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
*
x_data
=
x
->
data
<
T
>
();
auto
*
y_data
=
y
->
data
<
T
>
();
auto
*
mean_data
=
mean
->
data
<
T
>
();
auto
*
var_data
=
var
->
data
<
T
>
();
const
T
*
scale_data
=
nullptr
;
if
(
scale
)
scale_data
=
scale
->
data
<
T
>
();
const
T
*
bias_data
=
nullptr
;
if
(
bias
)
bias_data
=
bias
->
data
<
T
>
();
int
imsize
=
x_dims
[
2
]
*
x_dims
[
3
];
auto
*
iter_x_data
=
x_data
;
auto
*
iter_y_data
=
y_data
;
for
(
int
bid
=
0
;
bid
<
x_dims
[
0
];
bid
++
)
for
(
int
gid
=
0
;
gid
<
groups
;
gid
++
)
{
T
x_mean
=
0
,
x_var
=
0
;
int
number
=
std
::
min
(
group_size
,
static_cast
<
int
>
(
x_dims
[
1
]
-
gid
*
group_size
));
auto
*
tmp
=
iter_x_data
;
for
(
int
cid
=
0
;
cid
<
number
;
cid
++
)
{
for
(
int
imid
=
0
;
imid
<
imsize
;
imid
++
,
iter_x_data
++
)
{
x_mean
+=
iter_x_data
[
0
];
x_var
+=
iter_x_data
[
0
]
*
iter_x_data
[
0
];
}
}
x_mean
/=
number
*
imsize
;
x_var
/=
number
*
imsize
;
x_var
=
x_var
-
x_mean
*
x_mean
;
T
var_inv
=
1.0
/
sqrt
(
x_var
+
epsilon
);
mean_data
[
bid
*
groups
+
gid
]
=
x_mean
;
var_data
[
bid
*
groups
+
gid
]
=
x_var
;
for
(
int
cid
=
0
;
cid
<
number
;
cid
++
)
{
for
(
int
imid
=
0
;
imid
<
imsize
;
imid
++
,
tmp
++
,
iter_y_data
++
)
{
T
val
=
(
tmp
[
0
]
-
x_mean
)
*
var_inv
;
if
(
scale_data
)
val
*=
scale_data
[
gid
*
group_size
+
cid
];
if
(
bias_data
)
val
+=
bias_data
[
gid
*
group_size
+
cid
];
iter_y_data
[
0
]
=
val
;
}
}
}
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
GroupNormGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
float
epsilon
=
ctx
.
Attr
<
float
>
(
"epsilon"
);
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
mean
=
ctx
.
Input
<
Tensor
>
(
"Mean"
);
auto
*
var
=
ctx
.
Input
<
Tensor
>
(
"Variance"
);
auto
*
scale
=
ctx
.
Input
<
Tensor
>
(
"Scale"
);
auto
*
d_y
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
const
auto
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
// init output
auto
*
d_x
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
d_scale
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Scale"
));
auto
*
d_bias
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
const
auto
&
x_dims
=
x
->
dims
();
const
int
group_size
=
(
x_dims
[
1
]
-
1
)
/
groups
+
1
;
// TODO(liangdun): need to check d_x is null
math
::
SetConstant
<
DeviceContext
,
T
>
set_zero
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
T
*
d_x_data
=
nullptr
;
if
(
d_x
)
{
d_x
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
set_zero
(
dev_ctx
,
d_x
,
static_cast
<
T
>
(
0
));
d_x_data
=
d_x
->
data
<
T
>
();
}
auto
*
x_data
=
x
->
data
<
T
>
();
auto
*
y_data
=
d_y
->
data
<
T
>
();
auto
*
mean_data
=
mean
->
data
<
T
>
();
auto
*
var_data
=
var
->
data
<
T
>
();
T
*
d_scale_data
=
nullptr
;
if
(
d_scale
)
{
d_scale
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
set_zero
(
dev_ctx
,
d_scale
,
static_cast
<
T
>
(
0
));
d_scale_data
=
d_scale
->
data
<
T
>
();
}
T
*
d_bias_data
=
nullptr
;
if
(
d_bias
)
{
d_bias
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
set_zero
(
dev_ctx
,
d_bias
,
static_cast
<
T
>
(
0
));
d_bias_data
=
d_bias
->
data
<
T
>
();
}
const
T
*
scale_data
=
nullptr
;
if
(
scale
)
scale_data
=
scale
->
data
<
T
>
();
int
imsize
=
x_dims
[
2
]
*
x_dims
[
3
];
auto
*
iter_x_data
=
x_data
;
auto
*
iter_d_x_data
=
d_x_data
;
auto
*
iter_y_data
=
y_data
;
for
(
int
bid
=
0
;
bid
<
x_dims
[
0
];
bid
++
)
for
(
int
gid
=
0
;
gid
<
groups
;
gid
++
)
{
T
x_mean
=
mean_data
[
bid
*
groups
+
gid
];
T
x_var
=
var_data
[
bid
*
groups
+
gid
];
T
var_inv
=
1.0
/
sqrt
(
x_var
+
epsilon
);
int
number
=
std
::
min
(
group_size
,
static_cast
<
int
>
(
x_dims
[
1
]
-
gid
*
group_size
));
auto
*
tmp
=
iter_x_data
;
auto
*
tmp2
=
iter_d_x_data
;
T
d_var_inv
=
0
,
d_x_mean
=
0
;
for
(
int
cid
=
0
;
cid
<
number
;
cid
++
)
{
for
(
int
imid
=
0
;
imid
<
imsize
;
imid
++
,
tmp
++
,
iter_y_data
++
,
iter_d_x_data
++
)
{
T
val
=
(
tmp
[
0
]
-
x_mean
)
*
var_inv
;
T
dval
=
iter_y_data
[
0
];
if
(
d_bias_data
)
d_bias_data
[
gid
*
group_size
+
cid
]
+=
dval
;
if
(
d_scale_data
)
d_scale_data
[
gid
*
group_size
+
cid
]
+=
val
*
dval
;
if
(
scale_data
)
dval
=
scale_data
[
gid
*
group_size
+
cid
]
*
dval
;
d_var_inv
+=
(
tmp
[
0
]
-
x_mean
)
*
dval
;
T
d_tmp
=
dval
*
var_inv
;
if
(
d_x_data
)
iter_d_x_data
[
0
]
+=
d_tmp
;
d_x_mean
-=
d_tmp
;
}
}
T
d_x_var
=
-
1.0
/
(
2
*
(
x_var
+
epsilon
)
*
sqrt
(
x_var
+
epsilon
))
*
d_var_inv
;
d_x_mean
-=
2
*
d_x_var
*
x_mean
;
d_x_var
/=
number
*
imsize
;
d_x_mean
/=
number
*
imsize
;
iter_d_x_data
=
tmp2
;
if
(
d_x_data
)
{
for
(
int
cid
=
0
;
cid
<
number
;
cid
++
)
{
for
(
int
imid
=
0
;
imid
<
imsize
;
imid
++
,
iter_x_data
++
,
iter_d_x_data
++
)
{
iter_d_x_data
[
0
]
+=
d_x_mean
;
iter_d_x_data
[
0
]
+=
iter_x_data
[
0
]
*
2
*
d_x_var
;
}
}
}
}
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/math/detail/activation_functions.h
浏览文件 @
dfbac603
...
@@ -15,6 +15,7 @@ limitations under the License. */
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <math.h>
#include <math.h>
#include <string>
#include <string>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/hostdevice.h"
#include "paddle/fluid/platform/hostdevice.h"
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
dfbac603
...
@@ -37,6 +37,7 @@ limitations under the License. */
...
@@ -37,6 +37,7 @@ limitations under the License. */
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
...
@@ -86,6 +87,9 @@ bool IsCompiledWithDIST() {
...
@@ -86,6 +87,9 @@ bool IsCompiledWithDIST() {
}
}
PYBIND11_PLUGIN
(
core
)
{
PYBIND11_PLUGIN
(
core
)
{
// Not used, just make sure cpu_info.cc is linked.
paddle
::
platform
::
CpuTotalPhysicalMemory
();
paddle
::
memory
::
allocation
::
UseAllocatorStrategyGFlag
();
paddle
::
memory
::
allocation
::
UseAllocatorStrategyGFlag
();
py
::
module
m
(
"core"
,
"C++ core of PaddlePaddle"
);
py
::
module
m
(
"core"
,
"C++ core of PaddlePaddle"
);
...
...
python/paddle/fluid/contrib/utils/__init__.py
0 → 100644
浏览文件 @
dfbac603
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
from
.
import
hdfs_utils
from
.hdfs_utils
import
*
__all__
=
hdfs_utils
.
__all__
python/paddle/fluid/contrib/utils/hdfs_utils.py
0 → 100644
浏览文件 @
dfbac603
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""HDFS Utils"""
import
os
import
subprocess
import
multiprocessing
from
datetime
import
datetime
import
re
import
copy
import
errno
import
logging
__all__
=
[
"HDFSClient"
,
"multi_download"
]
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(message)s'
)
_logger
=
logging
.
getLogger
(
"hdfs_utils"
)
_logger
.
setLevel
(
logging
.
INFO
)
class
HDFSClient
(
object
):
def
__init__
(
self
,
hadoop_home
,
configs
):
self
.
pre_commands
=
[]
hadoop_bin
=
'%s/bin/hadoop'
%
hadoop_home
self
.
pre_commands
.
append
(
hadoop_bin
)
dfs
=
'fs'
self
.
pre_commands
.
append
(
dfs
)
for
k
,
v
in
configs
.
iteritems
():
config_command
=
'-D%s=%s'
%
(
k
,
v
)
self
.
pre_commands
.
append
(
config_command
)
def
__run_hdfs_cmd
(
self
,
commands
,
retry_times
=
5
):
whole_commands
=
copy
.
deepcopy
(
self
.
pre_commands
)
whole_commands
.
extend
(
commands
)
print
(
'Running system command: {0}'
.
format
(
' '
.
join
(
whole_commands
)))
ret_code
=
0
ret_out
=
None
ret_err
=
None
for
x
in
range
(
retry_times
+
1
):
proc
=
subprocess
.
Popen
(
whole_commands
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
(
output
,
errors
)
=
proc
.
communicate
()
ret_code
,
ret_out
,
ret_err
=
proc
.
returncode
,
output
,
errors
if
ret_code
:
_logger
.
warn
(
'Times: %d, Error running command: %s. Return code: %d, Error: %s'
%
(
x
,
' '
.
join
(
whole_commands
),
proc
.
returncode
,
errors
))
else
:
break
return
ret_code
,
ret_out
,
ret_err
def
upload
(
self
,
hdfs_path
,
local_path
,
overwrite
=
False
,
retry_times
=
5
):
"""
upload the local file to hdfs
args:
local_file_path: the local file path
remote_file_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp)
return:
True or False
"""
assert
hdfs_path
is
not
None
assert
local_path
is
not
None
and
os
.
path
.
exists
(
local_path
)
if
os
.
path
.
isdir
(
local_path
):
_logger
.
warn
(
"The Local path: {} is dir and I will support it later, return"
.
format
(
local_path
))
return
base
=
os
.
path
.
basename
(
local_path
)
if
not
self
.
is_exist
(
hdfs_path
):
self
.
makedirs
(
hdfs_path
)
else
:
if
self
.
is_exist
(
os
.
path
.
join
(
hdfs_path
,
base
)):
if
overwrite
:
_logger
.
error
(
"The HDFS path: {} is exist and overwrite is True, delete it"
.
format
(
hdfs_path
))
self
.
delete
(
hdfs_path
)
else
:
_logger
.
error
(
"The HDFS path: {} is exist and overwrite is False, return"
.
format
(
hdfs_path
))
return
False
put_commands
=
[
"-put"
,
local_path
,
hdfs_path
]
returncode
,
output
,
errors
=
self
.
__run_hdfs_cmd
(
put_commands
,
retry_times
)
if
returncode
:
_logger
.
error
(
"Put local path: {} to HDFS path: {} failed"
.
format
(
local_path
,
hdfs_path
))
return
False
else
:
_logger
.
info
(
"Put local path: {} to HDFS path: {} successfully"
.
format
(
local_path
,
hdfs_path
))
return
True
def
download
(
self
,
hdfs_path
,
local_path
,
overwrite
=
False
,
unzip
=
False
):
"""
download from hdfs
args:
local_file_path: the local file path
remote_file_path: remote dir on hdfs
return:
True or False
"""
_logger
.
info
(
'Downloading %r to %r.'
,
hdfs_path
,
local_path
)
_logger
.
info
(
'Download of %s to %r complete.'
,
hdfs_path
,
local_path
)
if
not
self
.
is_exist
(
hdfs_path
):
print
(
"HDFS path: {} do not exist"
.
format
(
hdfs_path
))
return
False
if
self
.
is_dir
(
hdfs_path
):
_logger
.
error
(
"The HDFS path: {} is dir and I will support it later, return"
.
format
(
hdfs_path
))
if
os
.
path
.
exists
(
local_path
):
base
=
os
.
path
.
basename
(
hdfs_path
)
local_file
=
os
.
path
.
join
(
local_path
,
base
)
if
os
.
path
.
exists
(
local_file
):
if
overwrite
:
os
.
remove
(
local_file
)
else
:
_logger
.
error
(
"The Local path: {} is exist and overwrite is False, return"
.
format
(
local_file
))
return
False
self
.
make_local_dirs
(
local_path
)
download_commands
=
[
"-get"
,
hdfs_path
,
local_path
]
returncode
,
output
,
errors
=
self
.
__run_hdfs_cmd
(
download_commands
)
if
returncode
:
_logger
.
error
(
"Get local path: {} from HDFS path: {} failed"
.
format
(
local_path
,
hdfs_path
))
return
False
else
:
_logger
.
info
(
"Get local path: {} from HDFS path: {} successfully"
.
format
(
local_path
,
hdfs_path
))
return
True
def
is_exist
(
self
,
hdfs_path
=
None
):
"""
whether the remote hdfs path exists?
args:
remote_file_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp)
fs_name: The default values are the same as in the job configuration
fs_ugi: The default values are the same as in the job configuration
return:
True or False
"""
exist_cmd
=
[
'-test'
,
'-e'
,
hdfs_path
]
returncode
,
output
,
errors
=
self
.
__run_hdfs_cmd
(
exist_cmd
,
retry_times
=
1
)
if
returncode
:
_logger
.
error
(
"HDFS is_exist HDFS path: {} failed"
.
format
(
hdfs_path
))
return
False
else
:
_logger
.
info
(
"HDFS is_exist HDFS path: {} successfully"
.
format
(
hdfs_path
))
return
True
def
is_dir
(
self
,
hdfs_path
=
None
):
"""
whether the remote hdfs path exists?
args:
remote_file_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp)
fs_name: The default values are the same as in the job configuration
fs_ugi: The default values are the same as in the job configuration
return:
True or False
"""
if
not
self
.
is_exist
(
hdfs_path
):
return
False
dir_cmd
=
[
'-test'
,
'-d'
,
hdfs_path
]
returncode
,
output
,
errors
=
self
.
__run_hdfs_cmd
(
dir_cmd
,
retry_times
=
1
)
if
returncode
:
_logger
.
error
(
"HDFS path: {} failed is not a directory"
.
format
(
hdfs_path
))
return
False
else
:
_logger
.
info
(
"HDFS path: {} successfully is a directory"
.
format
(
hdfs_path
))
return
True
def
delete
(
self
,
hdfs_path
):
"""Remove a file or directory from HDFS.
:param hdfs_path: HDFS path.
:param recursive: Recursively delete files and directories. By default,
this method will raise an :class:`HdfsError` if trying to delete a
non-empty directory.
This function returns `True` if the deletion was successful and `False` if
no file or directory previously existed at `hdfs_path`.
"""
_logger
.
info
(
'Deleting %r.'
,
hdfs_path
)
if
not
self
.
is_exist
(
hdfs_path
):
_logger
.
warn
(
"HDFS path: {} do not exist"
.
format
(
hdfs_path
))
return
True
if
self
.
is_dir
(
hdfs_path
):
del_cmd
=
[
'-rmr'
,
hdfs_path
]
else
:
del_cmd
=
[
'-rm'
,
hdfs_path
]
returncode
,
output
,
errors
=
self
.
__run_hdfs_cmd
(
del_cmd
,
retry_times
=
0
)
if
returncode
:
_logger
.
error
(
"HDFS path: {} delete files failure"
.
format
(
hdfs_path
))
return
False
else
:
_logger
.
info
(
"HDFS path: {} delete files successfully"
.
format
(
hdfs_path
))
return
True
def
rename
(
self
,
hdfs_src_path
,
hdfs_dst_path
,
overwrite
=
False
):
"""Move a file or folder.
:param hdfs_src_path: Source path.
:param hdfs_dst_path: Destination path. If the path already exists and is
a directory, the source will be moved into it. If the path exists and is
a file, or if a parent destination directory is missing, this method will
raise an :class:`HdfsError`.
"""
assert
hdfs_src_path
is
not
None
assert
hdfs_dst_path
is
not
None
if
not
self
.
is_exist
(
hdfs_src_path
):
_logger
.
info
(
"HDFS path do not exist: {}"
.
format
(
hdfs_src_path
))
if
self
.
is_exist
(
hdfs_dst_path
)
and
not
overwrite
:
_logger
.
error
(
"HDFS path is exist: {} and overwrite=False"
.
format
(
hdfs_dst_path
))
rename_command
=
[
'-mv'
,
hdfs_src_path
,
hdfs_dst_path
]
returncode
,
output
,
errors
=
self
.
__run_hdfs_cmd
(
rename_command
,
retry_times
=
1
)
if
returncode
:
_logger
.
error
(
"HDFS rename path: {} to {} failed"
.
format
(
hdfs_src_path
,
hdfs_dst_path
))
return
False
else
:
_logger
.
info
(
"HDFS rename path: {} to {} successfully"
.
format
(
hdfs_src_path
,
hdfs_dst_path
))
return
True
@
staticmethod
def
make_local_dirs
(
local_path
):
try
:
os
.
makedirs
(
local_path
)
except
OSError
as
e
:
if
e
.
errno
!=
errno
.
EEXIST
:
raise
def
makedirs
(
self
,
hdfs_path
):
"""Create a remote directory, recursively if necessary.
:param hdfs_path: Remote path. Intermediate directories will be created
appropriately.
"""
_logger
.
info
(
'Creating directories to %r.'
,
hdfs_path
)
assert
hdfs_path
is
not
None
if
self
.
is_exist
(
hdfs_path
):
return
mkdirs_commands
=
[
'-mkdir'
,
hdfs_path
]
returncode
,
output
,
errors
=
self
.
__run_hdfs_cmd
(
mkdirs_commands
,
retry_times
=
1
)
if
returncode
:
_logger
.
error
(
"HDFS mkdir path: {} failed"
.
format
(
hdfs_path
))
return
False
else
:
_logger
.
error
(
"HDFS mkdir path: {} successfully"
.
format
(
hdfs_path
))
return
True
def
ls
(
self
,
hdfs_path
):
assert
hdfs_path
is
not
None
if
not
self
.
is_exist
(
hdfs_path
):
return
[]
ls_commands
=
[
'-ls'
,
hdfs_path
]
returncode
,
output
,
errors
=
self
.
__run_hdfs_cmd
(
ls_commands
,
retry_times
=
1
)
if
returncode
:
_logger
.
error
(
"HDFS list path: {} failed"
.
format
(
hdfs_path
))
return
[]
else
:
_logger
.
info
(
"HDFS list path: {} successfully"
.
format
(
hdfs_path
))
ret_lines
=
[]
regex
=
re
.
compile
(
'\s+'
)
out_lines
=
output
.
strip
().
split
(
"
\n
"
)
for
line
in
out_lines
:
re_line
=
regex
.
split
(
line
)
if
len
(
re_line
)
==
8
:
ret_lines
.
append
(
re_line
[
7
])
return
ret_lines
def
lsr
(
self
,
hdfs_path
,
only_file
=
True
,
sort
=
True
):
def
sort_by_time
(
v1
,
v2
):
v1_time
=
datetime
.
strptime
(
v1
[
1
],
'%Y-%m-%d %H:%M'
)
v2_time
=
datetime
.
strptime
(
v2
[
1
],
'%Y-%m-%d %H:%M'
)
return
v1_time
>
v2_time
assert
hdfs_path
is
not
None
if
not
self
.
is_exist
(
hdfs_path
):
return
[]
ls_commands
=
[
'-lsr'
,
hdfs_path
]
returncode
,
output
,
errors
=
self
.
__run_hdfs_cmd
(
ls_commands
,
retry_times
=
1
)
if
returncode
:
_logger
.
error
(
"HDFS list all files: {} failed"
.
format
(
hdfs_path
))
return
[]
else
:
_logger
.
info
(
"HDFS list all files: {} successfully"
.
format
(
hdfs_path
))
lines
=
[]
regex
=
re
.
compile
(
'\s+'
)
out_lines
=
output
.
strip
().
split
(
"
\n
"
)
for
line
in
out_lines
:
re_line
=
regex
.
split
(
line
)
if
len
(
re_line
)
==
8
:
if
only_file
and
re_line
[
0
][
0
]
==
"d"
:
continue
else
:
lines
.
append
(
(
re_line
[
7
],
re_line
[
5
]
+
" "
+
re_line
[
6
]))
if
sort
:
sorted
(
lines
,
cmp
=
sort_by_time
)
ret_lines
=
[
ret
[
0
]
for
ret
in
lines
]
return
ret_lines
def
multi_upload
(
client
,
hdfs_path
,
local_path
,
multi_processes
=
5
,
overwrite
=
False
):
"""
:param overwrite: will overwrite hdfs file or not
:param multi_processes: the upload data process at the same time, default=5
:param client: instance of HDFSClient
:param hdfs_path: path on hdfs
:param local_path: path on local
:return:
"""
def
__subprocess_upload
(
datas
):
for
data
in
datas
:
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
local_path
)
hdfs_re_path
=
os
.
path
.
join
(
hdfs_path
,
re_path
)
client
.
upload
(
hdfs_re_path
,
data
,
overwrite
,
retry_times
=
5
)
def
get_local_files
(
path
):
rlist
=
[]
if
not
os
.
path
.
isdir
(
path
):
return
rlist
for
dirname
,
folder
,
files
in
os
.
walk
(
path
):
for
i
in
files
:
t
=
os
.
path
.
join
(
dirname
,
i
)
rlist
.
append
(
t
)
return
rlist
assert
isinstance
(
client
,
HDFSClient
)
all_files
=
get_local_files
(
local_path
)
if
not
all_files
:
_logger
.
info
(
"there are nothing need to upload, exit"
)
return
_logger
.
info
(
"Start {} multi process to upload datas"
.
format
(
multi_processes
))
procs
=
[]
for
i
in
range
(
multi_processes
):
process_datas
=
all_files
[
i
::
multi_processes
]
p
=
multiprocessing
.
Process
(
target
=
__subprocess_upload
,
args
=
(
process_datas
,
))
procs
.
append
(
p
)
p
.
start
()
# complete the processes
for
proc
in
procs
:
proc
.
join
()
_logger
.
info
(
"Finish {} multi process to upload datas"
.
format
(
multi_processes
))
def
multi_download
(
client
,
hdfs_path
,
local_path
,
trainer_id
,
trainers
,
multi_processes
=
5
):
"""
multi_download
:param client: instance of HDFSClient
:param hdfs_path: path on hdfs
:param local_path: path on local
:param trainer_id: current trainer id
:param trainers: all trainers number
:param multi_processes: the download data process at the same time, default=5
:return: None
"""
def
__subprocess_download
(
datas
):
for
data
in
datas
:
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
hdfs_path
)
local_re_path
=
os
.
path
.
join
(
local_path
,
re_path
)
client
.
download
(
data
,
local_re_path
)
assert
isinstance
(
client
,
HDFSClient
)
client
.
make_local_dirs
(
local_path
)
_logger
.
info
(
"Make local dir {} successfully"
.
format
(
local_path
))
all_need_download
=
client
.
lsr
(
hdfs_path
,
sort
=
True
)
need_download
=
all_need_download
[
trainer_id
::
trainers
]
_logger
.
info
(
"Get {} files From all {} files need to be download from {}"
.
format
(
len
(
need_download
),
len
(
all_need_download
),
hdfs_path
))
_logger
.
info
(
"Start {} multi process to download datas"
.
format
(
multi_processes
))
procs
=
[]
for
i
in
range
(
multi_processes
):
process_datas
=
need_download
[
i
::
multi_processes
]
p
=
multiprocessing
.
Process
(
target
=
__subprocess_download
,
args
=
(
process_datas
,
))
procs
.
append
(
p
)
p
.
start
()
# complete the processes
for
proc
in
procs
:
proc
.
join
()
_logger
.
info
(
"Finish {} multi process to download datas"
.
format
(
multi_processes
))
local_downloads
=
[]
for
data
in
need_download
:
data_name
=
os
.
path
.
basename
(
data
)
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
hdfs_path
)
local_re_path
=
os
.
path
.
join
(
local_path
,
re_path
,
data_name
)
local_downloads
.
append
(
local_re_path
)
return
local_downloads
if
__name__
==
"__main__"
:
hadoop_home
=
"/home/client/hadoop-client/hadoop/"
configs
=
{
"fs.default.name"
:
"hdfs://xxx.hadoop.com:54310"
,
"hadoop.job.ugi"
:
"hello,hello123"
}
client
=
HDFSClient
(
hadoop_home
,
configs
)
client
.
ls
(
"/user/com/train-25"
)
files
=
client
.
lsr
(
"/user/com/train-25/models"
)
downloads
=
multi_download
(
client
,
"/user/com/train-25/model"
,
"/home/xx/data1"
,
1
,
5
,
multi_processes
=
5
)
multi_upload
(
client
,
"/user/com/train-25/model"
,
"/home/xx/data1"
)
python/paddle/fluid/layers/nn.py
浏览文件 @
dfbac603
...
@@ -85,6 +85,7 @@ __all__ = [
...
@@ -85,6 +85,7 @@ __all__ = [
'row_conv'
,
'row_conv'
,
'multiplex'
,
'multiplex'
,
'layer_norm'
,
'layer_norm'
,
'group_norm'
,
'softmax_with_cross_entropy'
,
'softmax_with_cross_entropy'
,
'smooth_l1'
,
'smooth_l1'
,
'one_hot'
,
'one_hot'
,
...
@@ -2547,6 +2548,84 @@ def layer_norm(input,
...
@@ -2547,6 +2548,84 @@ def layer_norm(input,
return
helper
.
append_activation
(
layer_norm_out
)
return
helper
.
append_activation
(
layer_norm_out
)
@
templatedoc
()
def
group_norm
(
input
,
groups
,
epsilon
=
1e-05
,
param_attr
=
None
,
bias_attr
=
None
,
act
=
None
,
data_layout
=
'NCHW'
,
name
=
None
):
"""
**Group Normalization Layer**
Refer to `Group Normalization <https://arxiv.org/abs/1803.08494>`
Args:
input(Variable): The input tensor variable.
groups(int): The number of groups that divided from channels.
epsilon(float): The small value added to the variance to prevent
division by zero.
param_attr(ParamAttr|None): The parameter attribute for the learnable
scale :math:`g`. If it is set to False, no scale will be added to the output units.
If it is set to None, the bias is initialized one. Default: None.
bias_attr(ParamAttr|None): The parameter attribute for the learnable
bias :math:`b`. If it is set to False, no bias will be added to the output units.
If it is set to None, the bias is initialized zero. Default: None.
act(str): Activation to be applied to the output of group normalizaiton.
data_layout(string|NCHW): Only NCHW is supported.
name (str): The name of this layer. It is optional.
Returns:
Variable: A tensor variable which is the result after applying group normalization on the input.
Examples:
>>> data = fluid.layers.data(name='data', shape=[8, 32, 32],
>>> dtype='float32')
>>> x = fluid.layers.group_norm(input=data, groups=4)
"""
helper
=
LayerHelper
(
'group_norm'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
# create intput and parameters
inputs
=
{
'X'
:
input
}
input_shape
=
input
.
shape
if
data_layout
!=
'NCHW'
:
raise
ValueError
(
"unsupported data layout:"
+
data_layout
)
param_shape
=
[
input_shape
[
1
]]
if
param_attr
:
scale
=
helper
.
create_parameter
(
attr
=
helper
.
param_attr
,
shape
=
param_shape
,
dtype
=
dtype
,
default_initializer
=
Constant
(
1.0
))
inputs
[
'Scale'
]
=
scale
if
bias_attr
:
bias
=
helper
.
create_parameter
(
attr
=
helper
.
bias_attr
,
shape
=
param_shape
,
dtype
=
dtype
,
is_bias
=
True
)
inputs
[
'Bias'
]
=
bias
# create output
mean_out
=
helper
.
create_tmp_variable
(
dtype
=
dtype
,
stop_gradient
=
True
)
variance_out
=
helper
.
create_tmp_variable
(
dtype
=
dtype
,
stop_gradient
=
True
)
group_norm_out
=
helper
.
create_tmp_variable
(
dtype
)
helper
.
append_op
(
type
=
"group_norm"
,
inputs
=
inputs
,
outputs
=
{
"Y"
:
group_norm_out
,
"Mean"
:
mean_out
,
"Variance"
:
variance_out
,
},
attrs
=
{
"epsilon"
:
epsilon
,
"groups"
:
groups
})
return
helper
.
append_activation
(
group_norm_out
)
def
conv2d_transpose
(
input
,
def
conv2d_transpose
(
input
,
num_filters
,
num_filters
,
output_size
=
None
,
output_size
=
None
,
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
dfbac603
...
@@ -23,11 +23,11 @@ if(NOT WITH_DISTRIBUTE)
...
@@ -23,11 +23,11 @@ if(NOT WITH_DISTRIBUTE)
LIST
(
REMOVE_ITEM TEST_OPS test_dist_text_classification
)
LIST
(
REMOVE_ITEM TEST_OPS test_dist_text_classification
)
endif
(
NOT WITH_DISTRIBUTE
)
endif
(
NOT WITH_DISTRIBUTE
)
if
(
WITH_GPU
)
if
(
NOT
${
WITH_GPU
}
)
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
LIST
(
REMOVE_ITEM TEST_OPS test_conv2d_fusion_op
)
LIST
(
REMOVE_ITEM TEST_OPS test_conv2d_fusion_op
)
elseif
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
endif
(
)
LIST
(
REMOVE_ITEM TEST_OPS test_conv2d_fusion_op
)
endif
(
WITH_GPU
)
endif
()
list
(
REMOVE_ITEM TEST_OPS test_seq_concat_op
)
# FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290
list
(
REMOVE_ITEM TEST_OPS test_seq_concat_op
)
# FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290
list
(
REMOVE_ITEM TEST_OPS test_modified_huber_loss_op
)
# FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5184
list
(
REMOVE_ITEM TEST_OPS test_modified_huber_loss_op
)
# FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5184
...
@@ -81,10 +81,12 @@ list(REMOVE_ITEM TEST_OPS test_dist_se_resnext)
...
@@ -81,10 +81,12 @@ list(REMOVE_ITEM TEST_OPS test_dist_se_resnext)
list
(
REMOVE_ITEM TEST_OPS test_dist_transformer
)
list
(
REMOVE_ITEM TEST_OPS test_dist_transformer
)
list
(
REMOVE_ITEM TEST_OPS test_parallel_executor_transformer
)
list
(
REMOVE_ITEM TEST_OPS test_parallel_executor_transformer
)
list
(
REMOVE_ITEM TEST_OPS test_image_classification_resnet
)
list
(
REMOVE_ITEM TEST_OPS test_image_classification_resnet
)
list
(
REMOVE_ITEM TEST_OPS test_interpolate_op
)
foreach
(
TEST_OP
${
TEST_OPS
}
)
foreach
(
TEST_OP
${
TEST_OPS
}
)
py_test_modules
(
${
TEST_OP
}
MODULES
${
TEST_OP
}
)
py_test_modules
(
${
TEST_OP
}
MODULES
${
TEST_OP
}
)
endforeach
(
TEST_OP
)
endforeach
(
TEST_OP
)
py_test_modules
(
test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=
${
WARPCTC_LIB_DIR
}
SERIAL
)
py_test_modules
(
test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=
${
WARPCTC_LIB_DIR
}
SERIAL
)
py_test_modules
(
test_interpolate_op MODULES test_interpolate_op SERIAL
)
if
(
WITH_DISTRIBUTE
)
if
(
WITH_DISTRIBUTE
)
py_test_modules
(
test_dist_train MODULES test_dist_train SERIAL
)
py_test_modules
(
test_dist_train MODULES test_dist_train SERIAL
)
set_tests_properties
(
test_listen_and_serv_op PROPERTIES TIMEOUT 20
)
set_tests_properties
(
test_listen_and_serv_op PROPERTIES TIMEOUT 20
)
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
dfbac603
...
@@ -381,8 +381,8 @@ class OpTest(unittest.TestCase):
...
@@ -381,8 +381,8 @@ class OpTest(unittest.TestCase):
outs
.
sort
(
key
=
len
)
outs
.
sort
(
key
=
len
)
checker
(
outs
)
checker
(
outs
)
def
_
_
assert_is_close
(
self
,
numeric_grads
,
analytic_grads
,
names
,
def
_assert_is_close
(
self
,
numeric_grads
,
analytic_grads
,
names
,
max_relative_error
,
msg_prefix
):
max_relative_error
,
msg_prefix
):
for
a
,
b
,
name
in
six
.
moves
.
zip
(
numeric_grads
,
analytic_grads
,
names
):
for
a
,
b
,
name
in
six
.
moves
.
zip
(
numeric_grads
,
analytic_grads
,
names
):
abs_a
=
np
.
abs
(
a
)
abs_a
=
np
.
abs
(
a
)
...
@@ -451,9 +451,9 @@ class OpTest(unittest.TestCase):
...
@@ -451,9 +451,9 @@ class OpTest(unittest.TestCase):
analytic_grads
=
self
.
_get_gradient
(
inputs_to_check
,
place
,
analytic_grads
=
self
.
_get_gradient
(
inputs_to_check
,
place
,
output_names
,
no_grad_set
)
output_names
,
no_grad_set
)
self
.
_
_
assert_is_close
(
numeric_grads
,
analytic_grads
,
inputs_to_check
,
self
.
_assert_is_close
(
numeric_grads
,
analytic_grads
,
inputs_to_check
,
max_relative_error
,
max_relative_error
,
"Gradient Check On %s"
%
str
(
place
))
"Gradient Check On %s"
%
str
(
place
))
@
staticmethod
@
staticmethod
def
_numpy_to_lod_tensor
(
np_value
,
lod
,
place
):
def
_numpy_to_lod_tensor
(
np_value
,
lod
,
place
):
...
...
python/paddle/fluid/tests/unittests/test_group_norm_op.py
0 → 100644
浏览文件 @
dfbac603
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
operator
import
mul
import
paddle.fluid.core
as
core
import
paddle.fluid
as
fluid
from
op_test
import
OpTest
from
testsuite
import
create_op
def
group_norm_naive
(
x
,
scale
,
bias
,
epsilon
,
groups
):
N
,
C
,
H
,
W
=
x
.
shape
G
=
groups
x
=
x
.
reshape
((
N
*
G
,
-
1
))
mean
=
np
.
mean
(
x
,
axis
=
1
,
keepdims
=
True
)
var
=
np
.
var
(
x
,
axis
=
1
,
keepdims
=
True
)
output
=
(
x
-
mean
)
/
np
.
sqrt
(
var
+
epsilon
)
output
=
output
.
reshape
((
N
,
C
,
H
,
W
))
*
scale
.
reshape
(
(
-
1
,
1
,
1
))
+
bias
.
reshape
((
-
1
,
1
,
1
))
return
output
,
mean
.
reshape
((
N
,
G
)),
var
.
reshape
((
N
,
G
))
class
TestGroupNormOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"group_norm"
self
.
data_format
=
"NCHW"
self
.
dtype
=
np
.
float32
self
.
shape
=
(
2
,
4
,
3
,
3
)
self
.
attrs
=
{
'epsilon'
:
1e-5
,
'groups'
:
2
}
self
.
compare_between_place
=
False
self
.
init_test_case
()
input
=
np
.
random
.
random
(
self
.
shape
).
astype
(
self
.
dtype
)
scale
=
np
.
random
.
random
([
self
.
shape
[
1
]]).
astype
(
self
.
dtype
)
bias
=
np
.
random
.
random
([
self
.
shape
[
1
]]).
astype
(
self
.
dtype
)
output
,
mean
,
var
=
group_norm_naive
(
input
,
scale
,
bias
,
self
.
attrs
[
'epsilon'
],
self
.
attrs
[
'groups'
])
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
input
),
'Scale'
:
OpTest
.
np_dtype_to_fluid_dtype
(
scale
),
'Bias'
:
OpTest
.
np_dtype_to_fluid_dtype
(
bias
)
}
self
.
outputs
=
{
'Y'
:
output
,
'Mean'
:
mean
,
'Variance'
:
var
}
def
test_check_output
(
self
):
atol
=
1e-4
place
=
core
.
CPUPlace
()
self
.
check_output_with_place
(
place
,
atol
=
atol
)
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
atol
)
def
do_compare_between_place
(
self
):
if
not
core
.
is_compiled_with_cuda
():
return
place
=
core
.
CPUPlace
()
place2
=
core
.
CUDAPlace
(
0
)
self
.
scope
=
core
.
Scope
()
op_inputs
=
self
.
inputs
if
hasattr
(
self
,
"inputs"
)
else
dict
()
op_outputs
=
self
.
outputs
if
hasattr
(
self
,
"outputs"
)
else
dict
()
op_attrs
=
self
.
attrs
if
hasattr
(
self
,
"attrs"
)
else
dict
()
self
.
op
=
create_op
(
self
.
scope
,
self
.
op_type
,
op_inputs
,
op_outputs
,
op_attrs
)
inputs_to_check
=
set
([
'X'
,
'Scale'
,
'Bias'
])
output_names
=
'Y'
cpu_grads
=
self
.
_get_gradient
(
inputs_to_check
,
place
,
output_names
,
None
)
gpu_grads
=
self
.
_get_gradient
(
inputs_to_check
,
place2
,
output_names
,
None
)
self
.
_assert_is_close
(
cpu_grads
,
gpu_grads
,
inputs_to_check
,
0.005
,
"Gradient Check On %s"
%
str
(
place
))
def
test_check_grad
(
self
):
if
self
.
compare_between_place
:
self
.
do_compare_between_place
()
return
place
=
core
.
CPUPlace
()
self
.
check_grad_with_place
(
place
,
set
([
'X'
,
'Scale'
,
'Bias'
]),
'Y'
,
max_relative_error
=
0.01
)
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
self
.
check_grad_with_place
(
place
,
set
([
'X'
,
'Scale'
,
'Bias'
]),
'Y'
,
max_relative_error
=
0.01
)
def
init_test_case
(
self
):
pass
class
TestGroupNormOp1
(
TestGroupNormOp
):
def
init_test_case
(
self
):
self
.
attrs
[
'groups'
]
=
1
class
TestGroupNormOp2
(
TestGroupNormOp
):
def
init_test_case
(
self
):
self
.
attrs
[
'groups'
]
=
4
class
TestGroupNormOpBigEps1
(
TestGroupNormOp
):
def
init_test_case
(
self
):
self
.
attrs
[
'groups'
]
=
1
self
.
attrs
[
'epsilon'
]
=
0.5
class
TestGroupNormOpBigEps2
(
TestGroupNormOp
):
def
init_test_case
(
self
):
self
.
attrs
[
'groups'
]
=
4
self
.
attrs
[
'epsilon'
]
=
0.5
class
TestGroupNormOpBigEps3
(
TestGroupNormOp
):
def
init_test_case
(
self
):
self
.
attrs
[
'epsilon'
]
=
0.5
class
TestGroupNormOpLargeData
(
TestGroupNormOp
):
def
init_test_case
(
self
):
self
.
shape
=
(
2
,
32
,
64
,
64
)
self
.
attrs
[
'groups'
]
=
8
self
.
compare_between_place
=
True
if
__name__
==
'__main__'
:
unittest
.
main
()
tools/manylinux1/Dockerfile.x64
浏览文件 @
dfbac603
...
@@ -36,17 +36,21 @@ RUN cd /opt && wget -q --no-check-certificate https://github.com/google/protobuf
...
@@ -36,17 +36,21 @@ RUN cd /opt && wget -q --no-check-certificate https://github.com/google/protobuf
tar xzf protobuf-cpp-3.1.0.tar.gz && \
tar xzf protobuf-cpp-3.1.0.tar.gz && \
cd protobuf-3.1.0 && ./configure && make -j4 && make install && cd .. && rm -f protobuf-cpp-3.1.0.tar.gz
cd protobuf-3.1.0 && ./configure && make -j4 && make install && cd .. && rm -f protobuf-cpp-3.1.0.tar.gz
RUN wget
-O /root/requirements.txt https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python
/requirements.txt
RUN wget
https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/requirements.txt -O /root
/requirements.txt
RUN LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27mu/bin/pip install -r /root/requirements.txt && \
RUN LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27mu/bin/pip install -r /root/requirements.txt && \
LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27m/bin/pip install -r /root/requirements.txt && \
LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27m/bin/pip install -r /root/requirements.txt && \
LD_LIBRARY_PATH=/opt/_internal/cpython-3.5.1/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.5.1/bin/pip3 install -r /root/requirements.txt && \
LD_LIBRARY_PATH=/opt/_internal/cpython-3.5.1/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.5.1/bin/pip3 install -r /root/requirements.txt && \
LD_LIBRARY_PATH=/opt/_internal/cpython-3.6.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.6.0/bin/pip3 install -r /root/requirements.txt && \
LD_LIBRARY_PATH=/opt/_internal/cpython-3.7.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.7.0/bin/pip3 install -r /root/requirements.txt && \
go get github.com/Masterminds/glide && \
go get github.com/Masterminds/glide && \
rm -rf /root/requirements.txt
rm -rf /root/requirements.txt
RUN LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27mu/bin/pip install pre-commit 'ipython==5.3.0' opencv-python && \
RUN LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27mu/bin/pip install pre-commit 'ipython==5.3.0' opencv-python && \
LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27m/bin/pip install pre-commit 'ipython==5.3.0' opencv-python && \
LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27m/bin/pip install pre-commit 'ipython==5.3.0' opencv-python && \
LD_LIBRARY_PATH=/opt/_internal/cpython-3.5.1/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.5.1/bin/pip3 install pre-commit 'ipython==5.3.0' opencv-python
LD_LIBRARY_PATH=/opt/_internal/cpython-3.5.1/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.5.1/bin/pip3 install pre-commit 'ipython==5.3.0' opencv-python && \
LD_LIBRARY_PATH=/opt/_internal/cpython-3.6.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.6.0/bin/pip3 install pre-commit 'ipython==5.3.0' opencv-python && \
LD_LIBRARY_PATH=/opt/_internal/cpython-3.7.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.7.0/bin/pip3 install pre-commit 'ipython==5.3.0' opencv-python
RUN wget -O /opt/swig-2.0.12.tar.gz https://cytranet.dl.sourceforge.net/project/swig/swig/swig-2.0.12/swig-2.0.12.tar.gz && \
RUN wget -O /opt/swig-2.0.12.tar.gz https://cytranet.dl.sourceforge.net/project/swig/swig/swig-2.0.12/swig-2.0.12.tar.gz && \
cd /opt && tar xzf swig-2.0.12.tar.gz && cd /opt/swig-2.0.12 && ./configure && make && make install && cd /opt && rm swig-2.0.12.tar.gz
cd /opt && tar xzf swig-2.0.12.tar.gz && cd /opt/swig-2.0.12 && ./configure && make && make install && cd /opt && rm swig-2.0.12.tar.gz
...
...
tools/manylinux1/build_scripts/build.sh
浏览文件 @
dfbac603
...
@@ -9,12 +9,12 @@ set -ex
...
@@ -9,12 +9,12 @@ set -ex
# remove others to expedite build and reduce docker image size. The original
# remove others to expedite build and reduce docker image size. The original
# manylinux docker image project builds many python versions.
# manylinux docker image project builds many python versions.
# NOTE We added back 3.5.1, since auditwheel requires python 3.3+
# NOTE We added back 3.5.1, since auditwheel requires python 3.3+
CPYTHON_VERSIONS
=
"
2.7.11 3.5.
1"
CPYTHON_VERSIONS
=
"
3.7.0 3.6.0 3.5.1 2.7.1
1"
# openssl version to build, with expected sha256 hash of .tar.gz
# openssl version to build, with expected sha256 hash of .tar.gz
# archive
# archive
OPENSSL_ROOT
=
openssl-1.
0.2l
OPENSSL_ROOT
=
openssl-1.
1.0i
OPENSSL_HASH
=
ce07195b659e75f4e1db43552860070061f156a98bb37b672b101ba6e3ddf30c
OPENSSL_HASH
=
ebbfc844a8c8cc0ea5dc10b86c9ce97f401837f3fa08c17b2cdadc118253cf99
EPEL_RPM_HASH
=
e5ed9ecf22d0c4279e92075a64c757ad2b38049bcf5c16c4f2b75d5f6860dc0d
EPEL_RPM_HASH
=
e5ed9ecf22d0c4279e92075a64c757ad2b38049bcf5c16c4f2b75d5f6860dc0d
DEVTOOLS_HASH
=
a8ebeb4bed624700f727179e6ef771dafe47651131a00a78b342251415646acc
DEVTOOLS_HASH
=
a8ebeb4bed624700f727179e6ef771dafe47651131a00a78b342251415646acc
PATCHELF_HASH
=
d9afdff4baeacfbc64861454f368b7f2c15c44d245293f7587bbf726bfe722fb
PATCHELF_HASH
=
d9afdff4baeacfbc64861454f368b7f2c15c44d245293f7587bbf726bfe722fb
...
@@ -25,7 +25,7 @@ AUTOCONF_HASH=954bd69b391edc12d6a4a51a2dd1476543da5c6bbf05a95b59dc0dd6fd4c2969
...
@@ -25,7 +25,7 @@ AUTOCONF_HASH=954bd69b391edc12d6a4a51a2dd1476543da5c6bbf05a95b59dc0dd6fd4c2969
# Dependencies for compiling Python that we want to remove from
# Dependencies for compiling Python that we want to remove from
# the final image after compiling Python
# the final image after compiling Python
PYTHON_COMPILE_DEPS
=
"zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel db4-devel libpcap-devel xz-devel"
PYTHON_COMPILE_DEPS
=
"zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel db4-devel libpcap-devel xz-devel
libffi-devel
"
# Libraries that are allowed as part of the manylinux1 profile
# Libraries that are allowed as part of the manylinux1 profile
MANYLINUX1_DEPS
=
"glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel mesa-libGL-devel libICE-devel libSM-devel ncurses-devel freetype-devel libpng-devel"
MANYLINUX1_DEPS
=
"glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel mesa-libGL-devel libICE-devel libSM-devel ncurses-devel freetype-devel libpng-devel"
...
@@ -61,7 +61,7 @@ yum -y install bzip2 make git patch unzip bison yasm diffutils \
...
@@ -61,7 +61,7 @@ yum -y install bzip2 make git patch unzip bison yasm diffutils \
wget
-q
https://cmake.org/files/v3.5/cmake-3.5.2.tar.gz
&&
tar
xzf cmake-3.5.2.tar.gz
&&
\
wget
-q
https://cmake.org/files/v3.5/cmake-3.5.2.tar.gz
&&
tar
xzf cmake-3.5.2.tar.gz
&&
\
cd
cmake-3.5.2
&&
./bootstrap
&&
\
cd
cmake-3.5.2
&&
./bootstrap
&&
\
make
-j
4
&&
make
install
&&
cd
..
&&
rm
cmake-3.5.2.tar.gz
make
-j
8
&&
make
install
&&
cd
..
&&
rm
cmake-3.5.2.tar.gz
# Install newest autoconf
# Install newest autoconf
...
@@ -77,11 +77,13 @@ mkdir -p /opt/python
...
@@ -77,11 +77,13 @@ mkdir -p /opt/python
build_cpythons
$CPYTHON_VERSIONS
build_cpythons
$CPYTHON_VERSIONS
PY35_BIN
=
/opt/python/cp35-cp35m/bin
PY35_BIN
=
/opt/python/cp35-cp35m/bin
PY36_BIN
=
/opt/python/cp36-cp36m/bin
PY37_BIN
=
/opt/python/cp37-cp37m/bin
# NOTE Since our custom manylinux image builds pythons with shared
# NOTE Since our custom manylinux image builds pythons with shared
# libpython, we need to add libpython's dir to LD_LIBRARY_PATH before running
# libpython, we need to add libpython's dir to LD_LIBRARY_PATH before running
# python.
# python.
ORIGINAL_LD_LIBRARY_PATH
=
"
${
LD_LIBRARY_PATH
}
"
ORIGINAL_LD_LIBRARY_PATH
=
"
${
LD_LIBRARY_PATH
}
"
LD_LIBRARY_PATH
=
"
${
ORIGINAL_LD_LIBRARY_PATH
}
:
$(
dirname
${
PY35_BIN
}
)
/lib"
LD_LIBRARY_PATH
=
"
${
ORIGINAL_LD_LIBRARY_PATH
}
:
$(
dirname
${
PY35_BIN
}
)
/lib
:
$(
dirname
${
PY36_BIN
}
)
/lib:
$(
dirname
${
PY37_BIN
}
)
/lib
"
# Our openssl doesn't know how to find the system CA trust store
# Our openssl doesn't know how to find the system CA trust store
# (https://github.com/pypa/manylinux/issues/53)
# (https://github.com/pypa/manylinux/issues/53)
...
@@ -119,9 +121,8 @@ ln -s $PY35_BIN/auditwheel /usr/local/bin/auditwheel
...
@@ -119,9 +121,8 @@ ln -s $PY35_BIN/auditwheel /usr/local/bin/auditwheel
# final image
# final image
yum
-y
erase wireless-tools gtk2 libX11 hicolor-icon-theme
\
yum
-y
erase wireless-tools gtk2 libX11 hicolor-icon-theme
\
avahi freetype bitstream-vera-fonts
\
avahi freetype bitstream-vera-fonts
\
${
PYTHON_COMPILE_DEPS
}
>
/dev/null 2>&1
${
PYTHON_COMPILE_DEPS
}
>
/dev/null 2>&1
||
true
yum
-y
install
${
MANYLINUX1_DEPS
}
yum
-y
install
${
MANYLINUX1_DEPS
}
&&
yum
-y
clean all
>
/dev/null 2>&1
||
true
yum
-y
clean all
>
/dev/null 2>&1
yum list installed
yum list installed
# we don't need libpython*.a, and they're many megabytes
# we don't need libpython*.a, and they're many megabytes
find /opt/_internal
-name
'*.a'
-print0
| xargs
-0
rm
-f
find /opt/_internal
-name
'*.a'
-print0
| xargs
-0
rm
-f
...
...
tools/manylinux1/build_scripts/build_utils.sh
浏览文件 @
dfbac603
...
@@ -52,9 +52,17 @@ function do_cpython_build {
...
@@ -52,9 +52,17 @@ function do_cpython_build {
# NOTE --enable-shared for generating libpython shared library needed for
# NOTE --enable-shared for generating libpython shared library needed for
# linking of some of the nupic.core test executables.
# linking of some of the nupic.core test executables.
CFLAGS
=
"-Wformat"
./configure
--prefix
=
${
prefix
}
--enable-shared
$unicode_flags
>
/dev/null
if
[
$(
lex_pyver
$py_ver
)
-ge
$(
lex_pyver 3.7
)
]
;
then
make
-j2
>
/dev/null
# NOTE python 3.7 should be installed via make altinstall rather than
make
install
>
/dev/null
# make install, and we should specify the location of ssl
CFLAGS
=
"-Wformat"
./configure
--prefix
=
${
prefix
}
--with-openssl
=
/usr/local/ssl
--enable-shared
$unicode_flags
>
/dev/null
make
-j8
>
/dev/null
make altinstall
>
/dev/null
else
CFLAGS
=
"-Wformat"
./configure
--prefix
=
${
prefix
}
--enable-shared
$unicode_flags
>
/dev/null
make
-j8
>
/dev/null
make
install
>
/dev/null
fi
popd
popd
echo
"ZZZ looking for libpython"
echo
"ZZZ looking for libpython"
find /
-name
'libpython*.so*'
find /
-name
'libpython*.so*'
...
@@ -64,6 +72,9 @@ function do_cpython_build {
...
@@ -64,6 +72,9 @@ function do_cpython_build {
if
[
-e
${
prefix
}
/bin/python3
]
;
then
if
[
-e
${
prefix
}
/bin/python3
]
;
then
ln
-s
python3
${
prefix
}
/bin/python
ln
-s
python3
${
prefix
}
/bin/python
fi
fi
if
[
-e
${
prefix
}
/bin/python3.7
]
;
then
ln
-s
python3.7
${
prefix
}
/bin/python
fi
# NOTE Make libpython shared library visible to python calls below
# NOTE Make libpython shared library visible to python calls below
LD_LIBRARY_PATH
=
"
${
prefix
}
/lib"
${
prefix
}
/bin/python get-pip.py
LD_LIBRARY_PATH
=
"
${
prefix
}
/lib"
${
prefix
}
/bin/python get-pip.py
LD_LIBRARY_PATH
=
"
${
prefix
}
/lib"
${
prefix
}
/bin/pip
install
wheel
LD_LIBRARY_PATH
=
"
${
prefix
}
/lib"
${
prefix
}
/bin/pip
install
wheel
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录