Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
5ccab2dc
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5ccab2dc
编写于
2月 11, 2018
作者:
C
chengduoZH
浏览文件
操作
浏览文件
下载
差异文件
remove conflict
上级
bbff442e
1dceb99e
变更
54
显示空白变更内容
内联
并排
Showing
54 changed file
with
934 addition
and
610 deletion
+934
-610
AUTHORS.md
AUTHORS.md
+1
-1
doc/design/kernel_selection.md
doc/design/kernel_selection.md
+0
-0
doc/templates/conf.py.cn.in
doc/templates/conf.py.cn.in
+1
-1
doc/templates/conf.py.en.in
doc/templates/conf.py.en.in
+1
-1
paddle/CMakeLists.txt
paddle/CMakeLists.txt
+0
-1
paddle/fluid/CMakeLists.txt
paddle/fluid/CMakeLists.txt
+1
-0
paddle/fluid/framework/ddim.cc
paddle/fluid/framework/ddim.cc
+10
-0
paddle/fluid/framework/ddim.h
paddle/fluid/framework/ddim.h
+2
-0
paddle/fluid/framework/init.cc
paddle/fluid/framework/init.cc
+1
-1
paddle/fluid/framework/mixed_vector.h
paddle/fluid/framework/mixed_vector.h
+9
-4
paddle/fluid/framework/mixed_vector_test.cu
paddle/fluid/framework/mixed_vector_test.cu
+11
-4
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+1
-1
paddle/fluid/operators/concat_op.h
paddle/fluid/operators/concat_op.h
+19
-19
paddle/fluid/operators/listen_and_serv_op.cc
paddle/fluid/operators/listen_and_serv_op.cc
+26
-9
paddle/fluid/operators/multiclass_nms_op.cc
paddle/fluid/operators/multiclass_nms_op.cc
+19
-10
paddle/fluid/operators/send_op.cc
paddle/fluid/operators/send_op.cc
+22
-2
paddle/fluid/operators/send_recv_op_test.cc
paddle/fluid/operators/send_recv_op_test.cc
+1
-1
paddle/fluid/operators/sequence_expand_op.cc
paddle/fluid/operators/sequence_expand_op.cc
+3
-1
paddle/fluid/operators/split_op.h
paddle/fluid/operators/split_op.h
+10
-9
paddle/fluid/operators/split_selected_rows_op.cc
paddle/fluid/operators/split_selected_rows_op.cc
+1
-22
paddle/fluid/operators/split_selected_rows_op.h
paddle/fluid/operators/split_selected_rows_op.h
+1
-0
paddle/fluid/operators/strided_memcpy.h
paddle/fluid/operators/strided_memcpy.h
+57
-0
paddle/fluid/operators/sum_op.h
paddle/fluid/operators/sum_op.h
+3
-1
paddle/fluid/operators/target_assign_op.cc
paddle/fluid/operators/target_assign_op.cc
+76
-117
paddle/fluid/operators/target_assign_op.cu
paddle/fluid/operators/target_assign_op.cu
+22
-20
paddle/fluid/operators/target_assign_op.h
paddle/fluid/operators/target_assign_op.h
+71
-98
paddle/fluid/platform/cpu_info_test.cc
paddle/fluid/platform/cpu_info_test.cc
+1
-1
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+2
-2
paddle/fluid/platform/enforce_test.cc
paddle/fluid/platform/enforce_test.cc
+1
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+1
-1
paddle/fluid/string/.clang-format
paddle/fluid/string/.clang-format
+0
-0
paddle/fluid/string/CMakeLists.txt
paddle/fluid/string/CMakeLists.txt
+0
-0
paddle/fluid/string/piece.cc
paddle/fluid/string/piece.cc
+1
-1
paddle/fluid/string/piece.h
paddle/fluid/string/piece.h
+2
-2
paddle/fluid/string/piece_test.cc
paddle/fluid/string/piece_test.cc
+1
-1
paddle/fluid/string/printf.h
paddle/fluid/string/printf.h
+1
-1
paddle/fluid/string/printf_test.cc
paddle/fluid/string/printf_test.cc
+3
-3
paddle/fluid/string/tinyformat/tinyformat.h
paddle/fluid/string/tinyformat/tinyformat.h
+41
-65
paddle/fluid/string/to_string.h
paddle/fluid/string/to_string.h
+0
-0
paddle/fluid/string/to_string_test.cc
paddle/fluid/string/to_string_test.cc
+2
-2
paddle/scripts/docker/build.sh
paddle/scripts/docker/build.sh
+2
-2
paddle/scripts/travis/build_doc.sh
paddle/scripts/travis/build_doc.sh
+3
-3
python/paddle/v2/fluid/distribute_transpiler.py
python/paddle/v2/fluid/distribute_transpiler.py
+176
-74
python/paddle/v2/fluid/layers/__init__.py
python/paddle/v2/fluid/layers/__init__.py
+3
-0
python/paddle/v2/fluid/layers/detection.py
python/paddle/v2/fluid/layers/detection.py
+107
-9
python/paddle/v2/fluid/layers/math_op_patch.py
python/paddle/v2/fluid/layers/math_op_patch.py
+1
-0
python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py
...le/v2/fluid/tests/book_distribute/notest_dist_word2vec.py
+1
-1
python/paddle/v2/fluid/tests/test_cpp_reader.py
python/paddle/v2/fluid/tests/test_cpp_reader.py
+1
-3
python/paddle/v2/fluid/tests/test_detection.py
python/paddle/v2/fluid/tests/test_detection.py
+135
-0
python/paddle/v2/fluid/tests/test_multiclass_nms_op.py
python/paddle/v2/fluid/tests/test_multiclass_nms_op.py
+5
-5
python/paddle/v2/fluid/tests/test_prior_boxes.py
python/paddle/v2/fluid/tests/test_prior_boxes.py
+0
-87
python/paddle/v2/fluid/tests/test_sequence_expand.py
python/paddle/v2/fluid/tests/test_sequence_expand.py
+15
-0
python/paddle/v2/fluid/tests/test_split_op.py
python/paddle/v2/fluid/tests/test_split_op.py
+4
-4
python/paddle/v2/fluid/tests/test_target_assign_op.py
python/paddle/v2/fluid/tests/test_target_assign_op.py
+56
-19
未找到文件。
AUTHORS.md
浏览文件 @
5ccab2dc
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
|---|---|
|---|---|
| backyes | Yan-Fei Wang |
| backyes | Yan-Fei Wang |
| beckett1124 | Bin Qi |
| beckett1124 | Bin Qi |
|
Canpio
| Jia-Yi Feng |
|
JiayiFeng
| Jia-Yi Feng |
| chengxiaohua1105 | Xiao-Hua Cheng |
| chengxiaohua1105 | Xiao-Hua Cheng |
| cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang |
| cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang |
| cxysteven | Xing-Yi Cheng |
| cxysteven | Xing-Yi Cheng |
...
...
doc/design/
switch_kernel
.md
→
doc/design/
kernel_selection
.md
浏览文件 @
5ccab2dc
文件已移动
doc/templates/conf.py.cn.in
浏览文件 @
5ccab2dc
...
@@ -82,7 +82,7 @@ language = 'zh_CN'
...
@@ -82,7 +82,7 @@ language = 'zh_CN'
# List of patterns, relative to source directory, that match files and
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# directories to ignore when looking for source files.
exclude_patterns = ['_build', '**/*_en*', '*_en*']
exclude_patterns = ['_build', '**/*_en*', '*_en*'
, 'api/*'
]
# The reST default role (used for this markup: `text`) to use for all
# The reST default role (used for this markup: `text`) to use for all
# documents.
# documents.
...
...
doc/templates/conf.py.en.in
浏览文件 @
5ccab2dc
...
@@ -82,7 +82,7 @@ language = None
...
@@ -82,7 +82,7 @@ language = None
# List of patterns, relative to source directory, that match files and
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# directories to ignore when looking for source files.
exclude_patterns = ['_build', '**/*_cn*', '*_cn*']
exclude_patterns = ['_build', '**/*_cn*', '*_cn*'
, 'api/*'
]
# The reST default role (used for this markup: `text`) to use for all
# The reST default role (used for this markup: `text`) to use for all
# documents.
# documents.
...
...
paddle/CMakeLists.txt
浏览文件 @
5ccab2dc
...
@@ -11,7 +11,6 @@ if(MOBILE_INFERENCE)
...
@@ -11,7 +11,6 @@ if(MOBILE_INFERENCE)
else
()
else
()
add_subdirectory
(
pserver
)
add_subdirectory
(
pserver
)
add_subdirectory
(
trainer
)
add_subdirectory
(
trainer
)
add_subdirectory
(
string
)
add_subdirectory
(
scripts
)
add_subdirectory
(
scripts
)
if
(
WITH_C_API
)
if
(
WITH_C_API
)
...
...
paddle/fluid/CMakeLists.txt
浏览文件 @
5ccab2dc
...
@@ -4,3 +4,4 @@ add_subdirectory(framework)
...
@@ -4,3 +4,4 @@ add_subdirectory(framework)
add_subdirectory
(
operators
)
add_subdirectory
(
operators
)
add_subdirectory
(
pybind
)
add_subdirectory
(
pybind
)
add_subdirectory
(
inference
)
add_subdirectory
(
inference
)
add_subdirectory
(
string
)
paddle/fluid/framework/ddim.cc
浏览文件 @
5ccab2dc
...
@@ -314,5 +314,15 @@ DDim stride(const DDim& ddim) {
...
@@ -314,5 +314,15 @@ DDim stride(const DDim& ddim) {
}
}
return
framework
::
make_ddim
(
strides
);
return
framework
::
make_ddim
(
strides
);
}
}
DDim
stride_numel
(
const
framework
::
DDim
&
ddim
)
{
std
::
vector
<
int64_t
>
strides
(
ddim
.
size
());
strides
[
ddim
.
size
()
-
1
]
=
ddim
[
ddim
.
size
()
-
1
];
for
(
int
i
=
ddim
.
size
()
-
2
;
i
>=
0
;
--
i
)
{
strides
[
i
]
=
strides
[
i
+
1
]
*
ddim
[
i
];
}
return
framework
::
make_ddim
(
strides
);
}
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/ddim.h
浏览文件 @
5ccab2dc
...
@@ -125,6 +125,8 @@ DDim flatten_to_2d(const DDim& src, int num_col_dims);
...
@@ -125,6 +125,8 @@ DDim flatten_to_2d(const DDim& src, int num_col_dims);
DDim
flatten_to_1d
(
const
DDim
&
src
);
DDim
flatten_to_1d
(
const
DDim
&
src
);
DDim
stride
(
const
DDim
&
ddim
);
DDim
stride
(
const
DDim
&
ddim
);
DDim
stride_numel
(
const
DDim
&
ddim
);
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
...
...
paddle/fluid/framework/init.cc
浏览文件 @
5ccab2dc
...
@@ -20,7 +20,7 @@ limitations under the License. */
...
@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/string/piece.h"
#include "paddle/
fluid/
string/piece.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/framework/mixed_vector.h
浏览文件 @
5ccab2dc
...
@@ -37,9 +37,8 @@ class Vector {
...
@@ -37,9 +37,8 @@ class Vector {
// Fill vector with value. The vector size is `count`.
// Fill vector with value. The vector size is `count`.
explicit
Vector
(
size_t
count
,
const
T
&
value
=
T
())
{
explicit
Vector
(
size_t
count
,
const
T
&
value
=
T
())
{
if
(
count
==
0
)
{
InitEmpty
();
InitEmpty
();
}
else
{
if
(
count
!=
0
)
{
resize
(
count
);
resize
(
count
);
T
*
ptr
=
begin
();
T
*
ptr
=
begin
();
for
(
size_t
i
=
0
;
i
<
count
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
count
;
++
i
)
{
...
@@ -122,6 +121,10 @@ class Vector {
...
@@ -122,6 +121,10 @@ class Vector {
const
T
*
begin
()
const
{
return
&
this
->
operator
[](
0
);
}
const
T
*
begin
()
const
{
return
&
this
->
operator
[](
0
);
}
const
T
*
end
()
const
{
return
&
this
->
operator
[](
size
());
}
const
T
*
end
()
const
{
return
&
this
->
operator
[](
size
());
}
const
T
*
cbegin
()
const
{
return
begin
();
}
const
T
*
cend
()
const
{
return
end
();
}
const
T
&
back
()
const
{
const
T
&
back
()
const
{
auto
it
=
end
();
auto
it
=
end
();
--
it
;
--
it
;
...
@@ -244,7 +247,9 @@ class Vector {
...
@@ -244,7 +247,9 @@ class Vector {
bool
operator
==
(
const
Vector
<
T
>&
other
)
const
{
bool
operator
==
(
const
Vector
<
T
>&
other
)
const
{
if
(
size
()
!=
other
.
size
())
return
false
;
if
(
size
()
!=
other
.
size
())
return
false
;
for
(
auto
it1
=
begin
(),
it2
=
other
.
begin
();
it1
<
end
();
++
it1
,
++
it2
)
{
auto
it1
=
cbegin
();
auto
it2
=
other
.
cbegin
();
for
(;
it1
<
cend
();
++
it1
,
++
it2
)
{
if
(
*
it1
!=
*
it2
)
{
if
(
*
it1
!=
*
it2
)
{
return
false
;
return
false
;
}
}
...
...
paddle/fluid/framework/mixed_vector_test.cu
浏览文件 @
5ccab2dc
...
@@ -26,10 +26,10 @@ TEST(mixed_vector, CPU_VECTOR) {
...
@@ -26,10 +26,10 @@ TEST(mixed_vector, CPU_VECTOR) {
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
tmp
.
push_back
(
i
);
tmp
.
push_back
(
i
);
}
}
ASSERT_EQ
(
tmp
.
size
(),
10
);
ASSERT_EQ
(
tmp
.
size
(),
10
UL
);
vec
<
int
>
tmp2
;
vec
<
int
>
tmp2
;
tmp2
=
tmp
;
tmp2
=
tmp
;
ASSERT_EQ
(
tmp2
.
size
(),
10
);
ASSERT_EQ
(
tmp2
.
size
(),
10
UL
);
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
ASSERT_EQ
(
tmp2
[
i
],
i
);
ASSERT_EQ
(
tmp2
[
i
],
i
);
ASSERT_EQ
(
tmp2
[
i
],
tmp
[
i
]);
ASSERT_EQ
(
tmp2
[
i
],
tmp
[
i
]);
...
@@ -58,7 +58,7 @@ TEST(mixed_vector, GPU_VECTOR) {
...
@@ -58,7 +58,7 @@ TEST(mixed_vector, GPU_VECTOR) {
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
tmp
.
push_back
(
i
);
tmp
.
push_back
(
i
);
}
}
ASSERT_EQ
(
tmp
.
size
(),
10
);
ASSERT_EQ
(
tmp
.
size
(),
10
UL
);
paddle
::
platform
::
CUDAPlace
gpu
(
0
);
paddle
::
platform
::
CUDAPlace
gpu
(
0
);
multiply_10
<<<
1
,
1
,
0
,
GetCUDAStream
(
gpu
)
>>>
(
tmp
.
MutableData
(
gpu
));
multiply_10
<<<
1
,
1
,
0
,
GetCUDAStream
(
gpu
)
>>>
(
tmp
.
MutableData
(
gpu
));
...
@@ -79,7 +79,7 @@ TEST(mixed_vector, MultiGPU) {
...
@@ -79,7 +79,7 @@ TEST(mixed_vector, MultiGPU) {
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
tmp
.
push_back
(
i
);
tmp
.
push_back
(
i
);
}
}
ASSERT_EQ
(
tmp
.
size
(),
10
);
ASSERT_EQ
(
tmp
.
size
(),
10
UL
);
paddle
::
platform
::
CUDAPlace
gpu0
(
0
);
paddle
::
platform
::
CUDAPlace
gpu0
(
0
);
paddle
::
platform
::
SetDeviceId
(
0
);
paddle
::
platform
::
SetDeviceId
(
0
);
multiply_10
<<<
1
,
1
,
0
,
GetCUDAStream
(
gpu0
)
>>>
(
tmp
.
MutableData
(
gpu0
));
multiply_10
<<<
1
,
1
,
0
,
GetCUDAStream
(
gpu0
)
>>>
(
tmp
.
MutableData
(
gpu0
));
...
@@ -91,3 +91,10 @@ TEST(mixed_vector, MultiGPU) {
...
@@ -91,3 +91,10 @@ TEST(mixed_vector, MultiGPU) {
ASSERT_EQ
(
tmp
[
i
],
i
*
100
);
ASSERT_EQ
(
tmp
[
i
],
i
*
100
);
}
}
}
}
TEST
(
mixed_vector
,
InitWithCount
)
{
paddle
::
framework
::
Vector
<
int
>
vec
(
10
,
10
);
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
ASSERT_EQ
(
vec
[
i
],
10
);
}
}
paddle/fluid/framework/scope.cc
浏览文件 @
5ccab2dc
...
@@ -18,7 +18,7 @@ limitations under the License. */
...
@@ -18,7 +18,7 @@ limitations under the License. */
#include <mutex> // for call_once
#include <mutex> // for call_once
#include "glog/logging.h"
#include "glog/logging.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/string/printf.h"
#include "paddle/
fluid/
string/printf.h"
DEFINE_bool
(
benchmark
,
false
,
DEFINE_bool
(
benchmark
,
false
,
"Doing memory benchmark. It will make deleting scope synchronized, "
"Doing memory benchmark. It will make deleting scope synchronized, "
...
...
paddle/fluid/operators/concat_op.h
浏览文件 @
5ccab2dc
...
@@ -28,17 +28,18 @@ class ConcatKernel : public framework::OpKernel<T> {
...
@@ -28,17 +28,18 @@ class ConcatKernel : public framework::OpKernel<T> {
auto
ins
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"X"
);
auto
ins
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
int64_t
axis
=
static_cast
<
int64_t
>
(
ctx
.
Attr
<
int
>
(
"axis"
));
int64_t
axis
=
static_cast
<
int64_t
>
(
ctx
.
Attr
<
int
>
(
"axis"
));
const
size_t
n
=
ins
.
size
();
auto
place
=
ctx
.
GetPlace
();
out
->
mutable_data
<
T
>
(
place
);
auto
out_stride
=
framework
::
stride_numel
(
out
->
dims
());
size_t
output_offset
=
0
;
size_t
output_offset
=
0
;
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
for
(
auto
*
in
:
ins
)
{
auto
out_stride
=
framework
::
stride
(
out
->
dims
());
auto
in_stride
=
framework
::
stride_numel
(
in
->
dims
());
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
StridedNumelCopyWithAxis
<
T
>
(
ctx
.
device_context
(),
axis
,
auto
&
in
=
ins
[
i
];
out
->
data
<
T
>
()
+
output_offset
,
out_stride
,
auto
axis_dim
=
in
->
dims
()[
axis
];
in
->
data
<
T
>
(),
in_stride
);
auto
in_stride
=
framework
::
stride
(
in
->
dims
());
output_offset
+=
in_stride
[
axis
];
StridedMemcpy
<
T
>
(
ctx
.
device_context
(),
in
->
data
<
T
>
(),
in_stride
,
in
->
dims
(),
out_stride
,
out
->
data
<
T
>
()
+
output_offset
);
output_offset
+=
axis_dim
*
in_stride
[
axis
];
}
}
}
}
};
};
...
@@ -50,17 +51,16 @@ class ConcatGradKernel : public framework::OpKernel<T> {
...
@@ -50,17 +51,16 @@ class ConcatGradKernel : public framework::OpKernel<T> {
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
outs
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
outs
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
int64_t
axis
=
static_cast
<
int64_t
>
(
ctx
.
Attr
<
int
>
(
"axis"
));
int64_t
axis
=
static_cast
<
int64_t
>
(
ctx
.
Attr
<
int
>
(
"axis"
));
const
size_t
n
=
outs
.
size
();
size_t
input_offset
=
0
;
size_t
input_offset
=
0
;
auto
in_stride
=
framework
::
stride
(
in
->
dims
());
auto
in_stride
=
framework
::
stride
_numel
(
in
->
dims
());
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
auto
&
out
=
outs
[
i
];
for
(
auto
&
out
:
outs
)
{
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
size_t
axis_dim
=
out
->
dims
()[
axis
]
;
auto
out_stride
=
framework
::
stride_numel
(
out
->
dims
())
;
auto
out_stride
=
framework
::
stride
(
out
->
dims
());
StridedNumelCopyWithAxis
<
T
>
(
ctx
.
device_context
(),
axis
,
out
->
data
<
T
>
(),
StridedMemcpy
<
T
>
(
ctx
.
device_context
()
,
in
->
data
<
T
>
()
+
input_offset
,
out_stride
,
in
->
data
<
T
>
()
+
input_offset
,
in_stride
,
out
->
dims
(),
out_stride
,
out
->
data
<
T
>
()
);
in_stride
);
input_offset
+=
axis_dim
*
in
_stride
[
axis
];
input_offset
+=
out
_stride
[
axis
];
}
}
}
}
};
};
...
...
paddle/fluid/operators/listen_and_serv_op.cc
浏览文件 @
5ccab2dc
...
@@ -27,7 +27,7 @@ limitations under the License. */
...
@@ -27,7 +27,7 @@ limitations under the License. */
#include "paddle/fluid/operators/detail/grpc_server.h"
#include "paddle/fluid/operators/detail/grpc_server.h"
#include "paddle/fluid/operators/detail/sendrecvop_utils.h"
#include "paddle/fluid/operators/detail/sendrecvop_utils.h"
#include "paddle/fluid/operators/detail/simple_block_queue.h"
#include "paddle/fluid/operators/detail/simple_block_queue.h"
#include "paddle/string/printf.h"
#include "paddle/
fluid/
string/printf.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -101,11 +101,15 @@ class ListenAndServOp : public framework::OperatorBase {
...
@@ -101,11 +101,15 @@ class ListenAndServOp : public framework::OperatorBase {
// TODO(typhoonzero): change this to a while_op for every cluster-batch.
// TODO(typhoonzero): change this to a while_op for every cluster-batch.
bool
exit_flag
=
false
;
bool
exit_flag
=
false
;
// Record received sparse variables, so that
// we could reset those after execute optimize program
std
::
vector
<
framework
::
Variable
*>
sparse_vars
;
while
(
!
exit_flag
)
{
while
(
!
exit_flag
)
{
// Get from multiple trainers, we don't care about the order in which
// Get from multiple trainers, we don't care about the order in which
// the gradients arrives, just add suffix 0~n and merge the gradient.
// the gradients arrives, just add suffix 0~n and merge the gradient.
rpc_service_
->
SetCond
(
0
);
rpc_service_
->
SetCond
(
0
);
size_t
recv_var_cnt
=
0
;
size_t
recv_var_cnt
=
0
;
size_t
update_param_cnt
=
0
;
int
batch_barrier
=
0
;
int
batch_barrier
=
0
;
while
(
batch_barrier
!=
fan_in
)
{
while
(
batch_barrier
!=
fan_in
)
{
const
detail
::
MessageWithName
&
v
=
rpc_service_
->
Get
();
const
detail
::
MessageWithName
&
v
=
rpc_service_
->
Get
();
...
@@ -126,13 +130,14 @@ class ListenAndServOp : public framework::OperatorBase {
...
@@ -126,13 +130,14 @@ class ListenAndServOp : public framework::OperatorBase {
std
::
string
param_var_name
;
std
::
string
param_var_name
;
if
(
it
!=
grad_list
.
end
())
{
if
(
it
!=
grad_list
.
end
())
{
param_var_name
=
param_list
[
it
-
grad_list
.
begin
()];
param_var_name
=
param_list
[
it
-
grad_list
.
begin
()];
}
else
{
update_param_cnt
++
;
LOG
(
ERROR
)
<<
"grad has no paired param:"
<<
grad_var_name
;
}
VLOG
(
3
)
<<
"received grad: "
<<
grad_var_name
VLOG
(
3
)
<<
"received grad: "
<<
grad_var_name
<<
" updating param: "
<<
param_var_name
;
<<
" updating param: "
<<
param_var_name
;
}
else
{
if
(
fan_in
>
1
)
{
VLOG
(
3
)
<<
"received variable: "
<<
grad_var_name
<<
" no need to update param"
;
}
if
(
fan_in
>
1
&&
!
param_var_name
.
empty
())
{
grad_var_name
=
this
->
GetGradVarNameForTrainer
(
grad_var_name
);
grad_var_name
=
this
->
GetGradVarNameForTrainer
(
grad_var_name
);
}
}
auto
*
var
=
recv_scope
.
FindVar
(
grad_var_name
);
auto
*
var
=
recv_scope
.
FindVar
(
grad_var_name
);
...
@@ -141,23 +146,35 @@ class ListenAndServOp : public framework::OperatorBase {
...
@@ -141,23 +146,35 @@ class ListenAndServOp : public framework::OperatorBase {
PADDLE_THROW
(
"Can not find server side var"
);
PADDLE_THROW
(
"Can not find server side var"
);
}
}
detail
::
DeserializeFromMessage
(
v
.
second
,
dev_ctx
,
var
);
detail
::
DeserializeFromMessage
(
v
.
second
,
dev_ctx
,
var
);
if
(
var
->
IsType
<
framework
::
SelectedRows
>
())
{
sparse_vars
.
push_back
(
var
);
}
}
}
}
}
VLOG
(
3
)
<<
"recv "
<<
recv_var_cnt
<<
" parmeters for one barrier."
;
VLOG
(
3
)
<<
"recv "
<<
recv_var_cnt
<<
" parmeters for one barrier."
;
// TODO(Yancey1989): merge SelectedRows variables here
if
(
exit_flag
)
{
if
(
exit_flag
)
{
rpc_service_
->
ShutDown
();
rpc_service_
->
ShutDown
();
}
}
VLOG
(
3
)
<<
"run optimize graph..."
;
try
{
try
{
executor
.
Run
(
*
program
,
&
recv_scope
,
block
->
ID
(),
/*global_block*/
executor
.
Run
(
*
program
,
&
recv_scope
,
block
->
ID
(),
/*global_block*/
false
/*create_local_scope*/
,
false
/*create_vars*/
);
false
/*create_local_scope*/
,
false
/*create_vars*/
);
}
catch
(
std
::
exception
&
e
)
{
}
catch
(
std
::
exception
&
e
)
{
LOG
(
ERROR
)
<<
"run sub program error "
<<
e
.
what
();
LOG
(
ERROR
)
<<
"run sub program error "
<<
e
.
what
();
}
}
// Reset the received sparse variables, the sum operator would not
// sum the input sparse variables which rows is empty at the next
// mini-batch.
// TOOD(Yancey1989): move the reset action into an operator, we couldn't
// have any hide logic in the operator.
for
(
auto
&
var
:
sparse_vars
)
{
var
->
GetMutable
<
framework
::
SelectedRows
>
()
->
mutable_rows
()
->
clear
();
}
rpc_service_
->
SetCond
(
1
);
rpc_service_
->
SetCond
(
1
);
rpc_service_
->
WaitClientGet
(
recv_var
_cnt
);
rpc_service_
->
WaitClientGet
(
update_param
_cnt
);
grads_counter_
.
clear
();
grads_counter_
.
clear
();
sparse_vars
.
clear
();
}
// while(true)
}
// while(true)
}
}
...
...
paddle/fluid/operators/multiclass_nms_op.cc
浏览文件 @
5ccab2dc
...
@@ -38,22 +38,22 @@ class MultiClassNMSOp : public framework::OperatorWithKernel {
...
@@ -38,22 +38,22 @@ class MultiClassNMSOp : public framework::OperatorWithKernel {
auto
box_dims
=
ctx
->
GetInputDim
(
"BBoxes"
);
auto
box_dims
=
ctx
->
GetInputDim
(
"BBoxes"
);
auto
score_dims
=
ctx
->
GetInputDim
(
"Scores"
);
auto
score_dims
=
ctx
->
GetInputDim
(
"Scores"
);
PADDLE_ENFORCE_EQ
(
box_dims
.
size
(),
2
,
PADDLE_ENFORCE_EQ
(
box_dims
.
size
(),
3
,
"The rank of Input(BBoxes) must be
2
."
);
"The rank of Input(BBoxes) must be
3
."
);
PADDLE_ENFORCE_EQ
(
score_dims
.
size
(),
3
,
PADDLE_ENFORCE_EQ
(
score_dims
.
size
(),
3
,
"The rank of Input(Scores) must be 3."
);
"The rank of Input(Scores) must be 3."
);
PADDLE_ENFORCE_EQ
(
box_dims
[
1
],
4
,
PADDLE_ENFORCE_EQ
(
box_dims
[
2
],
4
,
"The 2nd dimension of Input(BBoxes) must be 4, "
"The 2nd dimension of Input(BBoxes) must be 4, "
"represents the layout of coordinate "
"represents the layout of coordinate "
"[xmin, ymin, xmax, ymax]"
);
"[xmin, ymin, xmax, ymax]"
);
PADDLE_ENFORCE_EQ
(
box_dims
[
0
],
score_dims
[
2
],
PADDLE_ENFORCE_EQ
(
box_dims
[
1
],
score_dims
[
2
],
"The 1st dimensiong of Input(BBoxes) must be equal to "
"The 1st dimensiong of Input(BBoxes) must be equal to "
"3rd dimension of Input(Scores), which represents the "
"3rd dimension of Input(Scores), which represents the "
"predicted bboxes."
);
"predicted bboxes."
);
// Here the box_dims[0] is not the real dimension of output.
// Here the box_dims[0] is not the real dimension of output.
// It will be rewritten in the computing kernel.
// It will be rewritten in the computing kernel.
ctx
->
SetOutputDim
(
"Out"
,
{
box_dims
[
0
],
6
});
ctx
->
SetOutputDim
(
"Out"
,
{
box_dims
[
1
],
6
});
}
}
protected:
protected:
...
@@ -260,15 +260,20 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
...
@@ -260,15 +260,20 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
int64_t
batch_size
=
score_dims
[
0
];
int64_t
batch_size
=
score_dims
[
0
];
int64_t
class_num
=
score_dims
[
1
];
int64_t
class_num
=
score_dims
[
1
];
int64_t
predict_dim
=
score_dims
[
2
];
int64_t
predict_dim
=
score_dims
[
2
];
int64_t
box_dim
=
boxes
->
dims
()[
2
];
std
::
vector
<
std
::
map
<
int
,
std
::
vector
<
int
>>>
all_indices
;
std
::
vector
<
std
::
map
<
int
,
std
::
vector
<
int
>>>
all_indices
;
std
::
vector
<
size_t
>
batch_starts
=
{
0
};
std
::
vector
<
size_t
>
batch_starts
=
{
0
};
for
(
int64_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
Tensor
ins_score
=
scores
->
Slice
(
i
,
i
+
1
);
Tensor
ins_score
=
scores
->
Slice
(
i
,
i
+
1
);
ins_score
.
Resize
({
class_num
,
predict_dim
});
ins_score
.
Resize
({
class_num
,
predict_dim
});
Tensor
ins_boxes
=
boxes
->
Slice
(
i
,
i
+
1
);
ins_boxes
.
Resize
({
predict_dim
,
box_dim
});
std
::
map
<
int
,
std
::
vector
<
int
>>
indices
;
std
::
map
<
int
,
std
::
vector
<
int
>>
indices
;
int
num_nmsed_out
=
0
;
int
num_nmsed_out
=
0
;
MultiClassNMS
(
ctx
,
ins_score
,
*
boxes
,
indices
,
num_nmsed_out
);
MultiClassNMS
(
ctx
,
ins_score
,
ins_
boxes
,
indices
,
num_nmsed_out
);
all_indices
.
push_back
(
indices
);
all_indices
.
push_back
(
indices
);
batch_starts
.
push_back
(
batch_starts
.
back
()
+
num_nmsed_out
);
batch_starts
.
push_back
(
batch_starts
.
back
()
+
num_nmsed_out
);
}
}
...
@@ -282,11 +287,15 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
...
@@ -282,11 +287,15 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
for
(
int64_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
Tensor
ins_score
=
scores
->
Slice
(
i
,
i
+
1
);
Tensor
ins_score
=
scores
->
Slice
(
i
,
i
+
1
);
ins_score
.
Resize
({
class_num
,
predict_dim
});
ins_score
.
Resize
({
class_num
,
predict_dim
});
Tensor
ins_boxes
=
boxes
->
Slice
(
i
,
i
+
1
);
ins_boxes
.
Resize
({
predict_dim
,
box_dim
});
int64_t
s
=
batch_starts
[
i
];
int64_t
s
=
batch_starts
[
i
];
int64_t
e
=
batch_starts
[
i
+
1
];
int64_t
e
=
batch_starts
[
i
+
1
];
if
(
e
>
s
)
{
if
(
e
>
s
)
{
Tensor
out
=
outs
->
Slice
(
s
,
e
);
Tensor
out
=
outs
->
Slice
(
s
,
e
);
MultiClassOutput
(
ins_score
,
*
boxes
,
all_indices
[
i
],
&
out
);
MultiClassOutput
(
ins_score
,
ins_
boxes
,
all_indices
[
i
],
&
out
);
}
}
}
}
}
}
...
@@ -303,9 +312,9 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -303,9 +312,9 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
MultiClassNMSOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
MultiClassNMSOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"BBoxes"
,
AddInput
(
"BBoxes"
,
"(Tensor) A
2-D Tensor with shape [
M, 4] represents the "
"(Tensor) A
3-D Tensor with shape [N,
M, 4] represents the "
"predicted locations of M bounding bboxes
. Each bounding box
"
"predicted locations of M bounding bboxes
, N is the batch size.
"
"has four coordinate values and the layout is "
"
Each bounding box
has four coordinate values and the layout is "
"[xmin, ymin, xmax, ymax]."
);
"[xmin, ymin, xmax, ymax]."
);
AddInput
(
"Scores"
,
AddInput
(
"Scores"
,
"(Tensor) A 3-D Tensor with shape [N, C, M] represents the "
"(Tensor) A 3-D Tensor with shape [N, C, M] represents the "
...
...
paddle/fluid/operators/send_op.cc
浏览文件 @
5ccab2dc
...
@@ -24,6 +24,22 @@ limitations under the License. */
...
@@ -24,6 +24,22 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
static
bool
IsVariableInitialized
(
const
framework
::
Scope
&
scope
,
const
std
::
string
&
varname
)
{
auto
*
var
=
scope
.
FindVar
(
varname
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
"Can not find variable '%s' in the send side."
,
varname
);
if
(
var
->
IsType
<
framework
::
LoDTensor
>
())
{
return
var
->
Get
<
framework
::
LoDTensor
>
().
IsInitialized
();
}
else
if
(
var
->
IsType
<
framework
::
SelectedRows
>
())
{
return
var
->
Get
<
framework
::
SelectedRows
>
().
value
().
IsInitialized
();
}
else
{
PADDLE_THROW
(
"Variable type in send side should be in "
"[LodTensor, SelectedRows]"
);
}
return
false
;
}
class
SendOp
:
public
framework
::
OperatorBase
{
class
SendOp
:
public
framework
::
OperatorBase
{
public:
public:
...
@@ -51,8 +67,12 @@ class SendOp : public framework::OperatorBase {
...
@@ -51,8 +67,12 @@ class SendOp : public framework::OperatorBase {
detail
::
RPCClient
*
rpc_client
=
client_var
->
GetMutable
<
detail
::
RPCClient
>
();
detail
::
RPCClient
*
rpc_client
=
client_var
->
GetMutable
<
detail
::
RPCClient
>
();
for
(
size_t
i
=
0
;
i
<
ins
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
ins
.
size
();
i
++
)
{
if
(
IsVariableInitialized
(
scope
,
ins
[
i
]))
{
VLOG
(
3
)
<<
"sending "
<<
ins
[
i
]
<<
" to "
<<
epmap
[
i
];
VLOG
(
3
)
<<
"sending "
<<
ins
[
i
]
<<
" to "
<<
epmap
[
i
];
rpc_client
->
AsyncSendVariable
(
epmap
[
i
],
ctx
,
scope
,
ins
[
i
]);
rpc_client
->
AsyncSendVariable
(
epmap
[
i
],
ctx
,
scope
,
ins
[
i
]);
}
else
{
VLOG
(
3
)
<<
"don't send no-initialied variable: "
<<
ins
[
i
];
}
}
}
PADDLE_ENFORCE
(
rpc_client
->
Wait
());
PADDLE_ENFORCE
(
rpc_client
->
Wait
());
...
...
paddle/fluid/operators/send_recv_op_test.cc
浏览文件 @
5ccab2dc
...
@@ -22,7 +22,7 @@ limitations under the License. */
...
@@ -22,7 +22,7 @@ limitations under the License. */
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/string/printf.h"
#include "paddle/
fluid/
string/printf.h"
USE_NO_KERNEL_OP
(
send
);
USE_NO_KERNEL_OP
(
send
);
USE_NO_KERNEL_OP
(
listen_and_serv
);
USE_NO_KERNEL_OP
(
listen_and_serv
);
...
...
paddle/fluid/operators/sequence_expand_op.cc
浏览文件 @
5ccab2dc
...
@@ -29,7 +29,9 @@ class SequenceExpandOp : public framework::OperatorWithKernel {
...
@@ -29,7 +29,9 @@ class SequenceExpandOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
));
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Y"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Y"
));
framework
::
DDim
out_dim
;
framework
::
DDim
out_dim
;
out_dim
=
ctx
->
GetInputDim
(
"Y"
);
auto
y_dim
=
ctx
->
GetInputDim
(
"Y"
);
out_dim
=
ctx
->
GetInputDim
(
"X"
);
out_dim
[
0
]
=
y_dim
[
0
];
ctx
->
ShareLoD
(
"Y"
,
"Out"
);
ctx
->
ShareLoD
(
"Y"
,
"Out"
);
ctx
->
SetOutputDim
(
"Out"
,
out_dim
);
ctx
->
SetOutputDim
(
"Out"
,
out_dim
);
}
}
...
...
paddle/fluid/operators/split_op.h
浏览文件 @
5ccab2dc
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <chrono>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/operators/strided_memcpy.h"
...
@@ -27,18 +28,18 @@ class SplitOpKernel : public framework::OpKernel<T> {
...
@@ -27,18 +28,18 @@ class SplitOpKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
outs
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
"Out"
);
auto
outs
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
"Out"
);
auto
in_stride
=
framework
::
stride
(
in
->
dims
());
auto
in_stride
=
framework
::
stride
_numel
(
in
->
dims
());
int64_t
axis
=
static_cast
<
int64_t
>
(
ctx
.
Attr
<
int
>
(
"axis"
));
int64_t
axis
=
static_cast
<
int64_t
>
(
ctx
.
Attr
<
int
>
(
"axis"
));
const
size_t
n
=
outs
.
size
();
auto
place
=
ctx
.
GetPlace
();
size_t
input_offset
=
0
;
size_t
input_offset
=
0
;
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
for
(
auto
&
out
:
outs
)
{
auto
&
out
=
outs
[
i
];
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
size_t
axis_dim
=
out
->
dims
()[
axis
]
;
auto
out_stride
=
framework
::
stride_numel
(
out
->
dims
())
;
auto
out_stride
=
framework
::
stride
(
out
->
dims
());
StridedNumelCopyWithAxis
<
T
>
(
ctx
.
device_context
(),
axis
,
out
->
data
<
T
>
(),
StridedMemcpy
<
T
>
(
ctx
.
device_context
()
,
in
->
data
<
T
>
()
+
input_offset
,
out_stride
,
in
->
data
<
T
>
()
+
input_offset
,
in_stride
,
out
->
dims
(),
out_stride
,
out
->
data
<
T
>
()
);
in_stride
);
input_offset
+=
axis_dim
*
in
_stride
[
axis
];
input_offset
+=
out
_stride
[
axis
];
}
}
}
}
};
};
...
...
paddle/fluid/operators/split_selected_rows_op.cc
浏览文件 @
5ccab2dc
...
@@ -22,7 +22,7 @@ class SplitSelectedRowsOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -22,7 +22,7 @@ class SplitSelectedRowsOpMaker : public framework::OpProtoAndCheckerMaker {
SplitSelectedRowsOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
SplitSelectedRowsOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"The input SelectedRows."
);
AddInput
(
"X"
,
"The input SelectedRows."
);
AddOutput
(
"Out"
,
"The outputs of input SelectedRows."
).
AsDuplicable
();
AddOutput
(
"Out"
,
"The outputs of
the
input SelectedRows."
).
AsDuplicable
();
AddAttr
<
std
::
vector
<
int
>>
(
"height_sections"
,
AddAttr
<
std
::
vector
<
int
>>
(
"height_sections"
,
"Height for each output SelectedRows."
)
"Height for each output SelectedRows."
)
.
SetDefault
(
std
::
vector
<
int
>
({}));
.
SetDefault
(
std
::
vector
<
int
>
({}));
...
@@ -56,27 +56,6 @@ class SplitSelectedRowsOp : public framework::OperatorWithKernel {
...
@@ -56,27 +56,6 @@ class SplitSelectedRowsOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"SplitSelectedRowsOp must has input X."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"SplitSelectedRowsOp must has input X."
);
PADDLE_ENFORCE
(
ctx
->
HasOutputs
(
"Out"
),
PADDLE_ENFORCE
(
ctx
->
HasOutputs
(
"Out"
),
"SplitSelectedRowsOp must has output Out."
);
"SplitSelectedRowsOp must has output Out."
);
std
::
vector
<
int
>
height_sections
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"height_sections"
);
int64_t
n
=
ctx
->
Outputs
(
"Out"
).
size
();
std
::
vector
<
framework
::
DDim
>
outs_dims
;
outs_dims
.
reserve
(
n
);
// make output dims
for
(
int64_t
i
=
0
;
i
<
n
;
++
i
)
{
auto
dims
=
ctx
->
GetInputDim
(
"X"
);
if
(
height_sections
.
size
())
{
PADDLE_ENFORCE_EQ
(
height_sections
.
size
(),
static_cast
<
size_t
>
(
n
),
"The size of height section should be the same with height"
" section size."
);
dims
[
0
]
=
height_sections
[
i
];
}
outs_dims
.
push_back
(
dims
);
}
ctx
->
SetOutputsDim
(
"Out"
,
outs_dims
);
}
}
};
};
...
...
paddle/fluid/operators/split_selected_rows_op.h
浏览文件 @
5ccab2dc
...
@@ -55,6 +55,7 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel<T> {
...
@@ -55,6 +55,7 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel<T> {
for
(
size_t
i
=
0
;
i
<
outs_rows_idx
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
outs_rows_idx
.
size
();
++
i
)
{
auto
rows_idx
=
outs_rows_idx
[
i
];
auto
rows_idx
=
outs_rows_idx
[
i
];
outs
[
i
]
->
set_height
(
height_sections
[
i
]);
if
(
rows_idx
.
size
()
>
0
)
{
if
(
rows_idx
.
size
()
>
0
)
{
auto
dims
=
x
->
GetCompleteDims
();
auto
dims
=
x
->
GetCompleteDims
();
dims
[
0
]
=
rows_idx
.
size
();
dims
[
0
]
=
rows_idx
.
size
();
...
...
paddle/fluid/operators/strided_memcpy.h
浏览文件 @
5ccab2dc
...
@@ -41,5 +41,62 @@ inline void StridedMemcpy(const platform::DeviceContext& dev_ctx, const T* src,
...
@@ -41,5 +41,62 @@ inline void StridedMemcpy(const platform::DeviceContext& dev_ctx, const T* src,
StridedCopyDimVisitor
<
T
>
func
(
dev_ctx
,
src
,
src_stride
,
dst_stride
,
dst
);
StridedCopyDimVisitor
<
T
>
func
(
dev_ctx
,
src
,
src_stride
,
dst_stride
,
dst
);
boost
::
apply_visitor
(
func
,
dst_dim
);
boost
::
apply_visitor
(
func
,
dst_dim
);
}
}
// Strided numel memory copy from src to dst by the specified axis
//
// For example, for a tensor dims [4, 20, 100], the strieded numel is
// [8000, 2000, 100]
//
// NOTE: The src and dst tensor should have the same elements
// except the specified axis.
template
<
typename
T
>
inline
void
StridedNumelCopyWithAxis
(
const
platform
::
DeviceContext
&
ctx
,
int64_t
axis
,
T
*
dst
,
const
framework
::
DDim
&
dst_stride_numel
,
const
T
*
src
,
const
framework
::
DDim
&
src_stride_numel
)
{
int64_t
before
=
dst_stride_numel
[
0
]
/
dst_stride_numel
[
axis
];
int64_t
src_after
=
src_stride_numel
[
axis
];
int64_t
dst_after
=
dst_stride_numel
[
axis
];
auto
place
=
ctx
.
GetPlace
();
PADDLE_ENFORCE_EQ
(
src_stride_numel
.
size
(),
dst_stride_numel
.
size
(),
"src and dst tensor should have the same dims size."
);
for
(
int64_t
i
=
0
;
i
<
axis
;
++
i
)
{
if
(
i
<
axis
)
{
PADDLE_ENFORCE_EQ
(
src_stride_numel
[
i
]
/
src_stride_numel
[
axis
],
dst_stride_numel
[
i
]
/
dst_stride_numel
[
axis
],
"src and dst should have the same elements "
"except the specified axis."
);
}
else
if
(
i
==
axis
)
{
continue
;
}
else
{
PADDLE_ENFORCE_EQ
(
src_stride_numel
[
i
],
dst_stride_numel
[
i
],
"src and dst should have the same elements "
"except the specified axis."
);
}
}
for
(
int64_t
i
=
0
;
i
<
before
;
++
i
)
{
if
(
platform
::
is_cpu_place
(
place
))
{
auto
&
cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
place
);
memory
::
Copy
(
cpu_place
,
dst
+
i
*
dst_after
,
cpu_place
,
src
+
i
*
src_after
,
sizeof
(
T
)
*
src_after
);
}
else
{
#ifdef PADDLE_WITH_CUDA
auto
&
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
);
auto
&
cuda_ctx
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
);
memory
::
Copy
(
gpu_place
,
dst
+
i
*
dst_after
,
gpu_place
,
src
+
i
*
src_after
,
sizeof
(
T
)
*
src_after
,
cuda_ctx
.
stream
());
#else
PADDLE_THROW
(
"Paddle is not compiled with GPU"
);
#endif
}
}
}
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
paddle/fluid/operators/sum_op.h
浏览文件 @
5ccab2dc
...
@@ -116,7 +116,9 @@ class SumKernel : public framework::OpKernel<T> {
...
@@ -116,7 +116,9 @@ class SumKernel : public framework::OpKernel<T> {
int64_t
offset
=
0
;
int64_t
offset
=
0
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
auto
&
sel_row
=
get_selected_row
(
i
);
auto
&
sel_row
=
get_selected_row
(
i
);
if
(
!
sel_row
.
value
().
IsInitialized
()
||
sel_row
.
rows
().
size
()
==
0
)
{
continue
;
}
PADDLE_ENFORCE_EQ
(
out
->
height
(),
sel_row
.
height
());
PADDLE_ENFORCE_EQ
(
out
->
height
(),
sel_row
.
height
());
functor
(
context
.
template
device_context
<
DeviceContext
>(),
sel_row
,
functor
(
context
.
template
device_context
<
DeviceContext
>(),
sel_row
,
offset
,
out
);
offset
,
out
);
...
...
paddle/fluid/operators/target_assign_op.cc
浏览文件 @
5ccab2dc
...
@@ -22,69 +22,43 @@ class TargetAssignOp : public framework::OperatorWithKernel {
...
@@ -22,69 +22,43 @@ class TargetAssignOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
// checkout inputs
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"EncodedGTBBox"
),
"Input(X) of TargetAssignOp should not be null"
);
"Input(EncodedGTBBox) of TargetAssignOp should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"GTScoreLabel"
),
"Input(GTScoreLabel) of TargetAssignOp should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"MatchIndices"
),
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"MatchIndices"
),
"Input(MatchIndices) of TargetAssignOp should not be null"
);
"Input(MatchIndices) of TargetAssignOp should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"NegIndices"
),
"Input(NegIndices) of TargetAssignOp should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of TargetAssignOp should not be null."
);
// checkout outputs
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"OutWeight"
),
PADDLE_ENFORCE
(
"Output(OutWeight) of TargetAssignOp should not be null."
);
ctx
->
HasOutput
(
"PredBBoxLabel"
),
"Output(PredBBoxLabel) of TargetAssignOp should not be null."
);
auto
in_dims
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"PredBBoxWeight"
),
"Output(PredBBoxWeight) of TargetAssignOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"PredScoreLabel"
),
"Output(PredScoreLabel) of TargetAssignOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"PredScoreWeight"
),
"Output(PredScoreWeight) of TargetAssignOp should not be null."
);
auto
blabel_dims
=
ctx
->
GetInputDim
(
"EncodedGTBBox"
);
auto
slabel_dims
=
ctx
->
GetInputDim
(
"GTScoreLabel"
);
auto
mi_dims
=
ctx
->
GetInputDim
(
"MatchIndices"
);
auto
mi_dims
=
ctx
->
GetInputDim
(
"MatchIndices"
);
auto
neg_dims
=
ctx
->
GetInputDim
(
"NegIndices"
);
PADDLE_ENFORCE_EQ
(
blabel_dims
.
size
(),
3UL
,
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
3
,
"The rank of Input(X) must be 3."
);
"The rank of Input(EncodedGTBBox) must be 3."
);
PADDLE_ENFORCE_EQ
(
mi_dims
.
size
(),
2
,
PADDLE_ENFORCE_EQ
(
slabel_dims
.
size
(),
2UL
,
"The rank of Input(GTScoreLabel) must be 2."
);
PADDLE_ENFORCE_EQ
(
mi_dims
.
size
(),
2UL
,
"The rank of Input(MatchIndices) must be 2."
);
"The rank of Input(MatchIndices) must be 2."
);
PADDLE_ENFORCE_EQ
(
neg_dims
.
size
(),
2UL
,
"The rank of Input(NegIndices) must be 2."
);
PADDLE_ENFORCE_EQ
(
blabel_dims
[
0
],
slabel_dims
[
0
],
if
(
ctx
->
HasInput
(
"NegIndices"
))
{
"The 1st dimension (means the total number of "
auto
neg_dims
=
ctx
->
GetInputDim
(
"NegIndices"
);
"ground-truth bounding boxes) of Input(EncodedGTBBox) "
PADDLE_ENFORCE_EQ
(
neg_dims
.
size
(),
2
,
"and Input(GTScoreLabel) must be the same."
);
"The rank of Input(NegIndices) must be 2."
);
PADDLE_ENFORCE_EQ
(
blabel_dims
[
1
],
mi_dims
[
1
],
PADDLE_ENFORCE_EQ
(
neg_dims
[
1
],
1
,
"The 2nd dimension (means the number of priod boxes) "
"The last dimenstion of Out(NegIndices) must be 1."
);
"of Input(EncodedGTBBox) and "
}
"Input(MatchIndices) must be the same."
);
PADDLE_ENFORCE_EQ
(
blabel_dims
[
2
],
4
,
"The 3rd dimension of Input(EncodedGTBBox) must be 4."
);
auto
n
=
mi_dims
[
0
];
auto
n
=
mi_dims
[
0
];
auto
np
=
mi_dims
[
1
];
auto
m
=
mi_dims
[
1
];
ctx
->
SetOutputDim
(
"PredBBoxLabel"
,
{
n
,
np
,
4
});
auto
k
=
in_dims
[
in_dims
.
size
()
-
1
];
ctx
->
SetOutputDim
(
"PredBBoxWeight"
,
{
n
,
np
,
1
});
ctx
->
SetOutputDim
(
"Out"
,
{
n
,
m
,
k
});
ctx
->
SetOutputDim
(
"PredScoreLabel"
,
{
n
,
np
,
1
});
ctx
->
SetOutputDim
(
"OutWeight"
,
{
n
,
m
,
1
});
ctx
->
SetOutputDim
(
"PredScoreWeight"
,
{
n
,
np
,
1
});
}
}
protected:
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
)
->
type
()),
ctx
.
Input
<
framework
::
LoDTensor
>
(
"EncodedGTBBox"
)
->
type
()),
ctx
.
device_context
());
ctx
.
device_context
());
}
}
};
};
...
@@ -93,102 +67,87 @@ class TargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -93,102 +67,87 @@ class TargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
TargetAssignOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
TargetAssignOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"EncodedGTBBox"
,
AddInput
(
"X"
,
"(LoDTensor), The encoded ground-truth bounding boxes with shape "
"(LoDTensor), This input is a 3D LoDTensor with shape [M, P, K]. "
"[Ng, Np, 4], where Ng is the total number of ground-truth boxes "
"Some elements in X will be assigned to Out based on the "
"in this mini-batch, Np the number of predictions, 4 is the "
"MatchIndices and NegIndices."
);
"number of coordinate in [xmin, ymin, xmax, ymax] layout."
);
AddInput
(
"GTScoreLabel"
,
"(LoDTensor, default LoDTensor<int>), The input ground-truth "
"labels with shape [Ng, 1], where the Ng is the same as it in "
"the input of EncodedGTBBox."
);
AddInput
(
"MatchIndices"
,
AddInput
(
"MatchIndices"
,
"(Tensor, default Tensor<int>), The input matched indices "
"(Tensor, default Tensor<int>), The input matched indices "
"with shape [N, Np], where N is the batch size, Np is the same "
"with shape [N, P], If MatchIndices[i][j] is -1, the j-th entity "
"as it in the input of EncodedGTBBox. If MatchIndices[i][j] "
"of column is not matched to any entity of row in i-th instance."
);
"is -1, the j-th prior box is not matched to any ground-truh "
"box in i-th instance."
);
AddInput
(
"NegIndices"
,
AddInput
(
"NegIndices"
,
"(LoDTensor, default LoDTensor<int>), The input negative example "
"(LoDTensor, default LoDTensor<int>), The input negative example "
"indices with shape [Neg, 1], where is the total number of "
"indices are an optional input with shape [Neg, 1], where Neg is "
"negative example indices."
);
"the total number of negative example indices."
)
AddAttr
<
int
>
(
"background_label"
,
.
AsDispensable
();
"(int, default 0), Label index of background class."
)
AddAttr
<
int
>
(
"mismatch_value"
,
"(int, default 0), Fill this value to the "
"mismatched location."
)
.
SetDefault
(
0
);
.
SetDefault
(
0
);
AddOutput
(
"PredBBoxLabel"
,
AddOutput
(
"Out"
,
"(Tensor), The output encoded ground-truth labels "
"(Tensor), The output is a 3D Tensor with shape [N, P, K], "
"with shape [N, Np, 4], N is the batch size and Np, 4 is the "
"N and P is the same as they are in NegIndices, K is the "
"same as they in input of EncodedGTBBox. If MatchIndices[i][j] "
"same as it in input of X. If MatchIndices[i][j] "
"is -1, the PredBBoxLabel[i][j][:] is the encoded ground-truth "
"is -1, the Out[i][j][0 : K] is the mismatch_value."
);
"box for background_label in i-th instance."
);
AddOutput
(
"OutWeight"
,
AddOutput
(
"PredBBoxWeight"
,
"(Tensor), The weight for output with the shape of [N, P, 1]"
);
"(Tensor), The weight for PredBBoxLabel with the shape "
"of [N, Np, 1]"
);
AddOutput
(
"PredScoreLabel"
,
"(Tensor, default Tensor<int>), The output score labels for "
"each predictions with shape [N, Np, 1]. If MatchIndices[i][j] "
"is -1, PredScoreLabel[i][j] = background_label."
);
AddOutput
(
"PredScoreWeight"
,
"(Tensor), The weight for PredScoreLabel with the shape "
"of [N, Np, 1]"
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
This operator is, for given the encoded boxes between prior boxes and
This operator can be, for given the target bounding boxes or labels,
ground-truth boxes and ground-truth class labels, to assign classification
to assign classification and regression targets to each prediction as well as
and regression targets to each prior box as well as weights to each
weights to prediction. The weights is used to specify which prediction would
prior box. The weights is used to specify which prior box would not contribute
not contribute to training loss.
to training loss.
For each instance, the output `Out` and`OutWeight` are assigned based on
For each instance, the output `PredBBoxLabel`, `PredBBoxWeight`,
`MatchIndices` and `NegIndices`.
`PredScoreLabel` and `PredScoreWeight` are assigned based on `MatchIndices`.
Assumed that the row offset for each instance in `X` is called lod,
Assumed that the row offset for each instance in `EncodedGTBBox` is called lod,
this operator assigns classification/regression targets by performing the
this operato assigns classification/regression targets by performing the
following steps:
following steps:
1. Assigning all outpts based on `MatchIndices`:
1. Assigning all outpts based on `MatchIndices`:
If id = MatchIndices[i][j] > 0,
If id = MatchIndices[i][j] > 0,
PredBBoxLabel[i][j] = EncodedGTBBox[lod[i] + id][j]
Out[i][j][0 : K] = X[lod[i] + id][j % P][0 : K]
PredBBoxWeight[i][j] = 1.
OutWeight[i][j] = 1.
PredScoreLabel[i][j] = GTScoreLabel[lod[i] + id]
PredScoreWeight[i][j] = 1.
Otherwise,
Otherwise,
PredBBoxLabel[j][j] = [0., 0., 0., 0.]
Out[j][j][0 : K] = {mismatch_value, mismatch_value, ...}
PredBBoxWeight[i][j] = 0.
OutWeight[i][j] = 0.
PredScoreLabel[i][j] = background_label
PredScoreWeight[i][j] = 0.
2. Assigning
PredScoreWeight based on `NegIndices`
:
2. Assigning
OutWeight based on `NegIndices` if `NegIndices` is provided
:
Assumed that the row offset for each instance in `NegIndices` is cal
e
ed neg_lod,
Assumed that the row offset for each instance in `NegIndices` is cal
l
ed neg_lod,
for i-th instance and
all ids
of NegIndices in this instance:
for i-th instance and
each `id`
of NegIndices in this instance:
PredScoreLabel[i][id] = background_label
Out[i][id][0 : K] = {mismatch_value, mismatch_value, ...}
PredScore
Weight[i][id] = 1.0
Out
Weight[i][id] = 1.0
)DOC"
);
)DOC"
);
}
}
};
};
template
<
typename
T
>
template
<
typename
T
,
typename
WT
>
struct
NegTargetAssignFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
struct
NegTargetAssignFunctor
<
platform
::
CPUDeviceContext
,
T
,
WT
>
{
void
operator
()(
const
platform
::
CPUDeviceContext
&
ctx
,
const
int
*
neg_indices
,
void
operator
()(
const
platform
::
CPUDeviceContext
&
ctx
,
const
int
*
neg_indices
,
const
size_t
*
lod
,
const
int
num
,
const
int
num_prior_box
,
const
size_t
*
lod
,
const
int
N
,
const
int
M
,
const
int
K
,
const
int
background_label
,
int
*
out_label
,
T
*
out_label
_wt
)
{
const
int
mismatch_value
,
T
*
out
,
WT
*
out
_wt
)
{
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
for
(
size_t
j
=
lod
[
i
];
j
<
lod
[
i
+
1
];
++
j
)
{
for
(
size_t
j
=
lod
[
i
];
j
<
lod
[
i
+
1
];
++
j
)
{
int
id
=
neg_indices
[
j
];
int
id
=
neg_indices
[
j
];
out_label
[
i
*
num_prior_box
+
id
]
=
background_label
;
int
off
=
(
i
*
M
+
id
)
*
K
;
out_label_wt
[
i
*
num_prior_box
+
id
]
=
static_cast
<
T
>
(
1.0
);
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
out
[
off
+
k
]
=
mismatch_value
;
out_wt
[
off
+
k
]
=
static_cast
<
WT
>
(
1.0
);
}
}
}
}
}
}
}
};
};
template
struct
NegTargetAssignFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
struct
NegTargetAssignFunctor
<
platform
::
CPUDeviceContext
,
int
,
float
>;
template
struct
NegTargetAssignFunctor
<
platform
::
CPUDeviceContext
,
double
>;
template
struct
NegTargetAssignFunctor
<
platform
::
CPUDeviceContext
,
float
,
float
>;
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
...
@@ -198,5 +157,5 @@ REGISTER_OP_WITHOUT_GRADIENT(target_assign, ops::TargetAssignOp,
...
@@ -198,5 +157,5 @@ REGISTER_OP_WITHOUT_GRADIENT(target_assign, ops::TargetAssignOp,
ops
::
TargetAssignOpMaker
);
ops
::
TargetAssignOpMaker
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
target_assign
,
target_assign
,
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
,
float
>
,
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
,
float
>
);
paddle/fluid/operators/target_assign_op.cu
浏览文件 @
5ccab2dc
...
@@ -17,39 +17,41 @@ limitations under the License. */
...
@@ -17,39 +17,41 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
template
<
typename
T
>
template
<
typename
T
,
typename
WT
>
__global__
void
NegTargetAssignKernel
(
const
int
*
neg_indices
,
const
size_t
*
lod
,
__global__
void
NegTargetAssignKernel
(
const
int
*
neg_indices
,
const
size_t
*
lod
,
const
int
num
,
const
int
num_prior_box
,
const
int
N
,
const
int
M
,
const
int
K
,
const
int
background_label
,
const
int
mismatch_value
,
T
*
out
,
int
*
out_label
,
T
*
out_label
_wt
)
{
WT
*
out
_wt
)
{
int
bidx
=
blockIdx
.
x
;
int
bidx
=
blockIdx
.
x
;
int
st
=
lod
[
bidx
];
int
st
=
lod
[
bidx
];
int
ed
=
lod
[
bidx
+
1
];
int
ed
=
lod
[
bidx
+
1
];
int
row_start
=
bidx
*
num_prior_box
;
int
row_start
=
bidx
*
M
;
for
(
int
i
=
st
+
threadIdx
.
x
;
i
<
ed
;
i
+=
blockDim
.
x
)
{
for
(
int
i
=
st
+
threadIdx
.
x
;
i
<
ed
;
i
+=
blockDim
.
x
)
{
int
id
=
row_start
+
neg_indices
[
i
];
int
id
=
row_start
+
neg_indices
[
i
];
out_label
[
id
]
=
background_label
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
out_label_wt
[
id
]
=
1.
;
out
[
id
*
K
+
k
]
=
T
(
mismatch_value
);
out_wt
[
id
*
K
+
k
]
=
WT
(
1.
);
}
}
}
}
}
template
<
typename
T
>
template
<
typename
T
,
typename
WT
>
struct
NegTargetAssignFunctor
<
platform
::
CUDADeviceContext
,
T
>
{
struct
NegTargetAssignFunctor
<
platform
::
CUDADeviceContext
,
T
,
WT
>
{
void
operator
()(
const
platform
::
CUDADeviceContext
&
ctx
,
void
operator
()(
const
platform
::
CUDADeviceContext
&
ctx
,
const
int
*
neg_indices
,
const
size_t
*
lod
,
const
int
num
,
const
int
*
neg_indices
,
const
size_t
*
lod
,
const
int
N
,
const
int
num_prior_box
,
const
int
background_label
,
const
int
M
,
const
int
K
,
const
int
mismatch_value
,
T
*
out
,
int
*
out_label
,
T
*
out_label
_wt
)
{
WT
*
out
_wt
)
{
const
int
block_size
=
256
;
const
int
block_size
=
256
;
const
int
grid_size
=
num
;
const
int
grid_size
=
N
;
NegTargetAssignKernel
<
T
><<<
grid_size
,
block_size
,
0
,
ctx
.
stream
()
>>>
(
NegTargetAssignKernel
<
T
,
WT
><<<
grid_size
,
block_size
,
0
,
ctx
.
stream
()
>>>
(
neg_indices
,
lod
,
num
,
num_prior_box
,
background_label
,
out_label
,
neg_indices
,
lod
,
N
,
M
,
K
,
mismatch_value
,
out
,
out_wt
);
out_label_wt
);
}
}
};
};
template
struct
NegTargetAssignFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
struct
NegTargetAssignFunctor
<
platform
::
CUDADeviceContext
,
int
,
float
>;
template
struct
NegTargetAssignFunctor
<
platform
::
CUDADeviceContext
,
double
>;
template
struct
NegTargetAssignFunctor
<
platform
::
CUDADeviceContext
,
float
,
float
>;
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
...
@@ -57,5 +59,5 @@ template struct NegTargetAssignFunctor<platform::CUDADeviceContext, double>;
...
@@ -57,5 +59,5 @@ template struct NegTargetAssignFunctor<platform::CUDADeviceContext, double>;
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
target_assign
,
target_assign
,
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
float
>
,
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
float
>
);
paddle/fluid/operators/target_assign_op.h
浏览文件 @
5ccab2dc
...
@@ -19,140 +19,113 @@ limitations under the License. */
...
@@ -19,140 +19,113 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
template
<
typename
T
,
typename
WT
>
template
<
typename
T
>
struct
TargetAssignFunctor
{
struct
TargetAssignFunctor
{
const
T
*
gt_box_
;
const
T
*
in_
;
const
int
*
gt_label_
;
const
int
*
match_indices_
;
const
int
*
match_indices_
;
const
size_t
*
lod_
;
const
size_t
*
lod_
;
const
int
background_label_
;
const
int
mismatch_value_
;
const
int64_t
num_
;
const
int64_t
N_
;
const
int64_t
num_prior_box_
;
const
int64_t
M_
;
const
int64_t
P_
;
T
*
out_box_
;
const
int64_t
K_
;
T
*
out_box_wt_
;
int
*
out_label_
;
T
*
out_
;
T
*
out_label_wt_
;
WT
*
out_wt_
;
TargetAssignFunctor
(
const
T
*
gt_box
,
const
int
*
gt_label
,
TargetAssignFunctor
(
const
T
*
input
,
const
int
*
match_indices
,
const
int
*
match_indices
,
const
size_t
*
lod
,
const
size_t
*
lod
,
const
int
mismatch_value
,
const
int
background_label
,
const
int64_t
num
,
const
int64_t
N
,
const
int64_t
M
,
const
int64_t
P
,
const
int64_t
np
,
T
*
out_box
,
T
*
out_box_wt
,
const
int64_t
K
,
T
*
out
,
WT
*
out_wt
)
int
*
out_label
,
T
*
out_label_wt
)
:
in_
(
input
),
:
gt_box_
(
gt_box
),
gt_label_
(
gt_label
),
match_indices_
(
match_indices
),
match_indices_
(
match_indices
),
lod_
(
lod
),
lod_
(
lod
),
background_label_
(
background_label
),
mismatch_value_
(
mismatch_value
),
num_
(
num
),
N_
(
N
),
num_prior_box_
(
np
),
M_
(
M
),
out_box_
(
out_box
),
P_
(
P
),
out_box_wt_
(
out_box_wt
),
K_
(
K
),
out_
label_
(
out_label
),
out_
(
out
),
out_
label_wt_
(
out_label
_wt
)
{}
out_
wt_
(
out
_wt
)
{}
HOSTDEVICE
void
operator
()(
size_t
i
)
const
{
HOSTDEVICE
void
operator
()(
size_t
i
)
const
{
int
row
=
i
/
num_prior_box
_
;
int
h
=
i
/
M
_
;
int
col
=
i
-
row
*
num_prior_box
_
;
int
w
=
i
-
h
*
M
_
;
size_t
row_off
=
lod_
[
row
];
size_t
off
=
lod_
[
h
];
int
offset
=
row
*
num_prior_box_
+
col
;
int
id
=
match_indices_
[
i
]
;
int
id
=
match_indices_
[
offset
];
T
*
out
=
out_
+
i
*
K_
;
T
*
obox
=
out_box_
+
offset
*
4
;
WT
*
out_wt
=
out_wt_
+
i
;
int
*
olabel
=
out_label_
+
offset
;
T
*
obox_wt
=
out_box_wt_
+
offset
;
T
*
olabel_wt
=
out_label_wt_
+
offset
;
if
(
id
>
-
1
)
{
if
(
id
>
-
1
)
{
const
T
*
gtbox
=
gt_box_
+
((
row_off
+
id
)
*
num_prior_box_
+
col
)
*
4
;
int
w_off
=
w
%
P_
;
const
T
*
in
=
in_
+
((
off
+
id
)
*
P_
+
w_off
)
*
K_
;
obox
[
0
]
=
gtbox
[
0
];
for
(
int64_t
k
=
0
;
k
<
K_
;
++
k
)
{
obox
[
1
]
=
gtbox
[
1
];
out
[
k
]
=
in
[
k
];
obox
[
2
]
=
gtbox
[
2
];
}
obox
[
3
]
=
gtbox
[
3
];
out_wt
[
0
]
=
static_cast
<
WT
>
(
1.
);
olabel
[
0
]
=
gt_label_
[
row_off
+
id
];
obox_wt
[
0
]
=
static_cast
<
T
>
(
1.
);
olabel_wt
[
0
]
=
static_cast
<
T
>
(
1.
);
}
else
{
}
else
{
obox
[
0
]
=
static_cast
<
T
>
(
0.
);
for
(
int64_t
k
=
0
;
k
<
K_
;
++
k
)
{
obox
[
1
]
=
static_cast
<
T
>
(
0.
);
out
[
k
]
=
static_cast
<
T
>
(
mismatch_value_
);
obox
[
2
]
=
static_cast
<
T
>
(
0.
);
}
obox
[
3
]
=
static_cast
<
T
>
(
0.
);
out_wt
[
0
]
=
static_cast
<
WT
>
(
0.
);
olabel
[
0
]
=
background_label_
;
obox_wt
[
0
]
=
static_cast
<
T
>
(
0.
);
olabel_wt
[
0
]
=
static_cast
<
T
>
(
0.
);
}
}
}
}
};
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
,
typename
WT
>
struct
NegTargetAssignFunctor
{
struct
NegTargetAssignFunctor
{
void
operator
()(
const
platform
::
DeviceContext
&
ctx
,
const
int
*
neg_indices
,
void
operator
()(
const
platform
::
DeviceContext
&
ctx
,
const
int
*
neg_indices
,
const
size_t
*
lod
,
const
int
num
,
const
int
num_prior_box
,
const
size_t
*
lod
,
const
int
N
,
const
int
M
,
const
int
K
,
const
int
background_label
,
int
*
out_label
,
const
int
mismatch_value
,
T
*
out
,
WT
*
out_wt
)
const
;
T
*
out_label_wt
)
const
;
};
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
,
typename
WT
>
class
TargetAssignKernel
:
public
framework
::
OpKernel
<
T
>
{
class
TargetAssignKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
enc_gt_box
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"EncodedGTBBox"
);
auto
*
x
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
);
auto
*
gt_label
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"GTScoreLabel"
);
auto
*
match_indices
=
ctx
.
Input
<
framework
::
Tensor
>
(
"MatchIndices"
);
auto
*
match_indices
=
ctx
.
Input
<
framework
::
Tensor
>
(
"MatchIndices"
);
auto
*
neg_indices
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"NegIndices"
);
auto
*
out_box
=
ctx
.
Output
<
framework
::
Tensor
>
(
"PredBBoxLabel"
);
auto
*
out_box_wt
=
ctx
.
Output
<
framework
::
Tensor
>
(
"PredBBoxWeight"
);
auto
*
out_label
=
ctx
.
Output
<
framework
::
Tensor
>
(
"PredScoreLabel"
);
auto
*
out_label_wt
=
ctx
.
Output
<
framework
::
Tensor
>
(
"PredScoreWeight"
);
PADDLE_ENFORCE_EQ
(
enc_gt_box
->
lod
().
size
(),
1UL
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
PADDLE_ENFORCE_EQ
(
gt_label
->
lod
().
size
(),
1UL
);
auto
*
out_wt
=
ctx
.
Output
<
framework
::
Tensor
>
(
"OutWeight"
);
PADDLE_ENFORCE_EQ
(
neg_indices
->
lod
().
size
(),
1UL
);
int
background_label
=
ctx
.
Attr
<
int
>
(
"background_label"
);
PADDLE_ENFORCE_EQ
(
x
->
lod
().
size
(),
1UL
);
int
mismatch_value
=
ctx
.
Attr
<
int
>
(
"mismatch_value"
);
const
T
*
box_data
=
enc_gt_box
->
data
<
T
>
();
const
T
*
x_data
=
x
->
data
<
T
>
();
const
int
*
label_data
=
gt_label
->
data
<
int
>
();
const
int
*
match_idx_data
=
match_indices
->
data
<
int
>
();
const
int
*
match_idx_data
=
match_indices
->
data
<
int
>
();
const
int
*
neg_idx_data
=
neg_indices
->
data
<
int
>
();
T
*
obox_data
=
out_box
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
out_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
obox_wt_data
=
out_box_wt
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
WT
*
out_wt_data
=
out_wt
->
mutable_data
<
WT
>
(
ctx
.
GetPlace
());
int
*
olabel_data
=
out_label
->
mutable_data
<
int
>
(
ctx
.
GetPlace
());
T
*
olabel_wt_data
=
out_label_wt
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int64_t
num
=
match_indices
->
dims
()[
0
];
int64_t
n
=
match_indices
->
dims
()[
0
];
int64_t
num_prior_box
=
match_indices
->
dims
()[
1
];
int64_t
m
=
match_indices
->
dims
()[
1
];
int64_t
p
=
x
->
dims
()[
1
];
int64_t
k
=
x
->
dims
()[
2
];
auto
gt_lod
=
enc_gt_box
->
lod
().
back
();
auto
x_lod
=
x
->
lod
().
back
();
auto
gt_label_lod
=
gt_label
->
lod
().
back
();
size_t
*
x_lod_data
=
x_lod
.
MutableData
(
ctx
.
GetPlace
());
auto
neg_lod
=
neg_indices
->
lod
().
back
();
for
(
size_t
i
=
0
;
i
<
gt_lod
.
size
();
++
i
)
{
PADDLE_ENFORCE_EQ
(
gt_lod
.
data
()[
i
],
gt_label_lod
.
data
()[
i
]);
}
size_t
*
gt_lod_data
=
gt_lod
.
MutableData
(
ctx
.
GetPlace
());
size_t
*
neg_lod_data
=
neg_lod
.
MutableData
(
ctx
.
GetPlace
());
TargetAssignFunctor
<
T
>
functor
(
box_data
,
label_data
,
match_idx_data
,
TargetAssignFunctor
<
T
,
WT
>
functor
(
x_data
,
match_idx_data
,
x_lod_data
,
gt_lod_data
,
background_label
,
num
,
mismatch_value
,
n
,
m
,
p
,
k
,
out_data
,
num_prior_box
,
obox_data
,
obox_wt_data
,
out_wt_data
);
olabel_data
,
olabel_wt_data
);
auto
&
device_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
&
device_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
platform
::
ForRange
<
DeviceContext
>
for_range
(
device_ctx
,
platform
::
ForRange
<
DeviceContext
>
for_range
(
device_ctx
,
n
*
m
);
num
*
num_prior_box
);
for_range
(
functor
);
for_range
(
functor
);
NegTargetAssignFunctor
<
DeviceContext
,
T
>
neg_trg_functor
;
auto
*
neg_indices
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"NegIndices"
);
neg_trg_functor
(
device_ctx
,
neg_idx_data
,
neg_lod_data
,
num
,
num_prior_box
,
if
(
neg_indices
)
{
background_label
,
olabel_data
,
olabel_wt_data
);
PADDLE_ENFORCE_EQ
(
neg_indices
->
lod
().
size
(),
1UL
);
const
int
*
neg_idx_data
=
neg_indices
->
data
<
int
>
();
auto
neg_lod
=
neg_indices
->
lod
().
back
();
size_t
*
neg_lod_data
=
neg_lod
.
MutableData
(
ctx
.
GetPlace
());
NegTargetAssignFunctor
<
DeviceContext
,
T
,
WT
>
neg_trg_functor
;
neg_trg_functor
(
device_ctx
,
neg_idx_data
,
neg_lod_data
,
n
,
m
,
k
,
mismatch_value
,
out_data
,
out_wt_data
);
}
}
}
};
};
...
...
paddle/fluid/platform/cpu_info_test.cc
浏览文件 @
5ccab2dc
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/string/printf.h"
#include "paddle/
fluid/
string/printf.h"
#include <ostream>
#include <ostream>
#include <sstream>
#include <sstream>
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
5ccab2dc
...
@@ -23,8 +23,8 @@ limitations under the License. */
...
@@ -23,8 +23,8 @@ limitations under the License. */
#include <string>
#include <string>
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/string/printf.h"
#include "paddle/
fluid/
string/printf.h"
#include "paddle/string/to_string.h"
#include "paddle/
fluid/
string/to_string.h"
#ifdef __GNUC__
#ifdef __GNUC__
#include <cxxabi.h> // for __cxa_demangle
#include <cxxabi.h> // for __cxa_demangle
...
...
paddle/fluid/platform/enforce_test.cc
浏览文件 @
5ccab2dc
...
@@ -15,7 +15,7 @@ limitations under the License. */
...
@@ -15,7 +15,7 @@ limitations under the License. */
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/string/piece.h"
#include "paddle/
fluid/
string/piece.h"
using
StringPiece
=
paddle
::
string
::
Piece
;
using
StringPiece
=
paddle
::
string
::
Piece
;
using
paddle
::
string
::
HasPrefix
;
using
paddle
::
string
::
HasPrefix
;
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
5ccab2dc
...
@@ -35,7 +35,7 @@ limitations under the License. */
...
@@ -35,7 +35,7 @@ limitations under the License. */
#include "paddle/fluid/pybind/exception.h"
#include "paddle/fluid/pybind/exception.h"
#include "paddle/fluid/pybind/pybind.h"
#include "paddle/fluid/pybind/pybind.h"
#include "paddle/fluid/pybind/tensor_py.h"
#include "paddle/fluid/pybind/tensor_py.h"
#include "paddle/string/to_string.h"
#include "paddle/
fluid/
string/to_string.h"
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
...
...
paddle/string/.clang-format
→
paddle/
fluid/
string/.clang-format
浏览文件 @
5ccab2dc
文件已移动
paddle/string/CMakeLists.txt
→
paddle/
fluid/
string/CMakeLists.txt
浏览文件 @
5ccab2dc
文件已移动
paddle/string/piece.cc
→
paddle/
fluid/
string/piece.cc
浏览文件 @
5ccab2dc
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "p
addle/string/p
iece.h"
#include "piece.h"
#include <string.h>
#include <string.h>
...
...
paddle/string/piece.h
→
paddle/
fluid/
string/piece.h
浏览文件 @
5ccab2dc
...
@@ -28,7 +28,7 @@ namespace string {
...
@@ -28,7 +28,7 @@ namespace string {
// its syntax is simple as it doesn't own/manage the string, it is
// its syntax is simple as it doesn't own/manage the string, it is
// cheap to construct Pieces and pass them around.
// cheap to construct Pieces and pass them around.
class
Piece
{
class
Piece
{
public:
public:
static
const
size_t
npos
=
static_cast
<
size_t
>
(
-
1
);
static
const
size_t
npos
=
static_cast
<
size_t
>
(
-
1
);
// We provide non-explicit singleton constructors so users can
// We provide non-explicit singleton constructors so users can
...
@@ -55,7 +55,7 @@ public:
...
@@ -55,7 +55,7 @@ public:
// Return a string that contains the copy of the referenced data.
// Return a string that contains the copy of the referenced data.
std
::
string
ToString
()
const
{
return
std
::
string
(
data_
,
size_
);
}
std
::
string
ToString
()
const
{
return
std
::
string
(
data_
,
size_
);
}
private:
private:
const
char
*
data_
;
const
char
*
data_
;
size_t
size_
;
size_t
size_
;
...
...
paddle/string/piece_test.cc
→
paddle/
fluid/
string/piece_test.cc
浏览文件 @
5ccab2dc
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/string/piece.h"
#include "paddle/
fluid/
string/piece.h"
#include <sstream>
#include <sstream>
...
...
paddle/string/printf.h
→
paddle/
fluid/
string/printf.h
浏览文件 @
5ccab2dc
...
@@ -71,7 +71,7 @@
...
@@ -71,7 +71,7 @@
#include <iostream>
#include <iostream>
#include <sstream>
#include <sstream>
#include "
paddle/string/
tinyformat/tinyformat.h" // https://github.com/c42f/tinyformat
#include "tinyformat/tinyformat.h" // https://github.com/c42f/tinyformat
namespace
paddle
{
namespace
paddle
{
namespace
string
{
namespace
string
{
...
...
paddle/string/printf_test.cc
→
paddle/
fluid/
string/printf_test.cc
浏览文件 @
5ccab2dc
...
@@ -11,7 +11,7 @@
...
@@ -11,7 +11,7 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "p
addle/string/p
rintf.h"
#include "printf.h"
#include <string>
#include <string>
...
@@ -24,6 +24,6 @@ TEST(StringPrintf, StringPrintf) {
...
@@ -24,6 +24,6 @@ TEST(StringPrintf, StringPrintf) {
long
hour
=
14
;
long
hour
=
14
;
int
min
=
44
;
int
min
=
44
;
EXPECT_EQ
(
std
::
string
(
"Wednesday, July 27, 14:44"
),
EXPECT_EQ
(
std
::
string
(
"Wednesday, July 27, 14:44"
),
paddle
::
string
::
Sprintf
(
paddle
::
string
::
Sprintf
(
"%s, %s %d, %.2d:%.2d"
,
weekday
,
month
,
day
,
"%s, %s %d, %.2d:%.2d"
,
weekday
,
month
,
day
,
hour
,
min
));
hour
,
min
));
}
}
paddle/string/tinyformat/tinyformat.h
→
paddle/
fluid/
string/tinyformat/tinyformat.h
浏览文件 @
5ccab2dc
...
@@ -147,7 +147,7 @@ namespace detail {
...
@@ -147,7 +147,7 @@ namespace detail {
// Test whether type T1 is convertible to type T2
// Test whether type T1 is convertible to type T2
template
<
typename
T1
,
typename
T2
>
template
<
typename
T1
,
typename
T2
>
struct
is_convertible
{
struct
is_convertible
{
private:
private:
// two types of different size
// two types of different size
struct
fail
{
struct
fail
{
char
dummy
[
2
];
char
dummy
[
2
];
...
@@ -160,7 +160,7 @@ private:
...
@@ -160,7 +160,7 @@ private:
static
succeed
tryConvert
(
const
T2
&
);
static
succeed
tryConvert
(
const
T2
&
);
static
const
T1
&
makeT1
();
static
const
T1
&
makeT1
();
public:
public:
// Standard trick: the (...) version of tryConvert will be chosen from
// Standard trick: the (...) version of tryConvert will be chosen from
// the overload set only if the version taking a T2 doesn't match.
// the overload set only if the version taking a T2 doesn't match.
// Then we compare the sizes of the return types to check which
// Then we compare the sizes of the return types to check which
...
@@ -170,8 +170,7 @@ public:
...
@@ -170,8 +170,7 @@ public:
// Format the value by casting to type fmtT. This default implementation
// Format the value by casting to type fmtT. This default implementation
// should never be called.
// should never be called.
template
<
typename
T
,
template
<
typename
T
,
typename
fmtT
,
typename
fmtT
,
bool
convertible
=
is_convertible
<
T
,
fmtT
>
::
value
>
bool
convertible
=
is_convertible
<
T
,
fmtT
>
::
value
>
struct
formatValueAsType
{
struct
formatValueAsType
{
static
void
invoke
(
std
::
ostream
&
/*out*/
,
const
T
&
/*value*/
)
{
assert
(
0
);
}
static
void
invoke
(
std
::
ostream
&
/*out*/
,
const
T
&
/*value*/
)
{
assert
(
0
);
}
...
@@ -241,11 +240,8 @@ TINYFORMAT_DEFINE_FORMAT_TRUNCATED_CSTR(char)
...
@@ -241,11 +240,8 @@ TINYFORMAT_DEFINE_FORMAT_TRUNCATED_CSTR(char)
/// operator<< to format the type T, with special cases for the %c and %p
/// operator<< to format the type T, with special cases for the %c and %p
/// conversions.
/// conversions.
template
<
typename
T
>
template
<
typename
T
>
inline
void
formatValue
(
std
::
ostream
&
out
,
inline
void
formatValue
(
std
::
ostream
&
out
,
const
char
*
/*fmtBegin*/
,
const
char
*
/*fmtBegin*/
,
const
char
*
fmtEnd
,
int
ntrunc
,
const
T
&
value
)
{
const
char
*
fmtEnd
,
int
ntrunc
,
const
T
&
value
)
{
// The mess here is to support the %c and %p conversions: if these
// The mess here is to support the %c and %p conversions: if these
// conversions are active we try to convert the type to a char or const
// conversions are active we try to convert the type to a char or const
// void* respectively and format that instead of the value itself. For the
// void* respectively and format that instead of the value itself. For the
...
@@ -268,11 +264,8 @@ inline void formatValue(std::ostream &out,
...
@@ -268,11 +264,8 @@ inline void formatValue(std::ostream &out,
// Overloaded version for char types to support printing as an integer
// Overloaded version for char types to support printing as an integer
#define TINYFORMAT_DEFINE_FORMATVALUE_CHAR(charType) \
#define TINYFORMAT_DEFINE_FORMATVALUE_CHAR(charType) \
inline void formatValue(std::ostream &out, \
inline void formatValue(std::ostream &out, const char *
/*fmtBegin*/
, \
const char *
/*fmtBegin*/
, \
const char *fmtEnd, int
/**/
, charType value) { \
const char *fmtEnd, \
int
/**/
, \
charType value) { \
switch (*(fmtEnd - 1)) { \
switch (*(fmtEnd - 1)) { \
case 'u': \
case 'u': \
case 'd': \
case 'd': \
...
@@ -482,7 +475,7 @@ namespace detail {
...
@@ -482,7 +475,7 @@ namespace detail {
// each argument to be allocated as a homogenous array inside FormatList
// each argument to be allocated as a homogenous array inside FormatList
// whereas a naive implementation based on inheritance does not.
// whereas a naive implementation based on inheritance does not.
class
FormatArg
{
class
FormatArg
{
public:
public:
FormatArg
()
{}
FormatArg
()
{}
template
<
typename
T
>
template
<
typename
T
>
...
@@ -491,22 +484,17 @@ public:
...
@@ -491,22 +484,17 @@ public:
m_formatImpl
(
&
formatImpl
<
T
>
),
m_formatImpl
(
&
formatImpl
<
T
>
),
m_toIntImpl
(
&
toIntImpl
<
T
>
)
{}
m_toIntImpl
(
&
toIntImpl
<
T
>
)
{}
void
format
(
std
::
ostream
&
out
,
void
format
(
std
::
ostream
&
out
,
const
char
*
fmtBegin
,
const
char
*
fmtEnd
,
const
char
*
fmtBegin
,
const
char
*
fmtEnd
,
int
ntrunc
)
const
{
int
ntrunc
)
const
{
m_formatImpl
(
out
,
fmtBegin
,
fmtEnd
,
ntrunc
,
m_value
);
m_formatImpl
(
out
,
fmtBegin
,
fmtEnd
,
ntrunc
,
m_value
);
}
}
int
toInt
()
const
{
return
m_toIntImpl
(
m_value
);
}
int
toInt
()
const
{
return
m_toIntImpl
(
m_value
);
}
private:
private:
template
<
typename
T
>
template
<
typename
T
>
static
void
formatImpl
(
std
::
ostream
&
out
,
static
void
formatImpl
(
std
::
ostream
&
out
,
const
char
*
fmtBegin
,
const
char
*
fmtBegin
,
const
char
*
fmtEnd
,
int
ntrunc
,
const
void
*
value
)
{
const
char
*
fmtEnd
,
int
ntrunc
,
const
void
*
value
)
{
formatValue
(
out
,
fmtBegin
,
fmtEnd
,
ntrunc
,
*
static_cast
<
const
T
*>
(
value
));
formatValue
(
out
,
fmtBegin
,
fmtEnd
,
ntrunc
,
*
static_cast
<
const
T
*>
(
value
));
}
}
...
@@ -516,11 +504,8 @@ private:
...
@@ -516,11 +504,8 @@ private:
}
}
const
void
*
m_value
;
const
void
*
m_value
;
void
(
*
m_formatImpl
)(
std
::
ostream
&
out
,
void
(
*
m_formatImpl
)(
std
::
ostream
&
out
,
const
char
*
fmtBegin
,
const
char
*
fmtBegin
,
const
char
*
fmtEnd
,
int
ntrunc
,
const
void
*
value
);
const
char
*
fmtEnd
,
int
ntrunc
,
const
void
*
value
);
int
(
*
m_toIntImpl
)(
const
void
*
value
);
int
(
*
m_toIntImpl
)(
const
void
*
value
);
};
};
...
@@ -569,12 +554,10 @@ inline const char *printFormatStringLiteral(std::ostream &out,
...
@@ -569,12 +554,10 @@ inline const char *printFormatStringLiteral(std::ostream &out,
// necessary to pull out variable width and precision . The function returns a
// necessary to pull out variable width and precision . The function returns a
// pointer to the character after the end of the current format spec.
// pointer to the character after the end of the current format spec.
inline
const
char
*
streamStateFromFormat
(
std
::
ostream
&
out
,
inline
const
char
*
streamStateFromFormat
(
std
::
ostream
&
out
,
bool
&
spacePadPositive
,
bool
&
spacePadPositive
,
int
&
ntrunc
,
int
&
ntrunc
,
const
char
*
fmtStart
,
const
char
*
fmtStart
,
const
detail
::
FormatArg
*
formatters
,
const
detail
::
FormatArg
*
formatters
,
int
&
argIndex
,
int
&
argIndex
,
int
numFormatters
)
{
int
numFormatters
)
{
if
(
*
fmtStart
!=
'%'
)
{
if
(
*
fmtStart
!=
'%'
)
{
TINYFORMAT_ERROR
(
TINYFORMAT_ERROR
(
"tinyformat: Not enough conversion specifiers in format string"
);
"tinyformat: Not enough conversion specifiers in format string"
);
...
@@ -750,10 +733,8 @@ inline const char *streamStateFromFormat(std::ostream &out,
...
@@ -750,10 +733,8 @@ inline const char *streamStateFromFormat(std::ostream &out,
}
}
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
inline
void
formatImpl
(
std
::
ostream
&
out
,
inline
void
formatImpl
(
std
::
ostream
&
out
,
const
char
*
fmt
,
const
char
*
fmt
,
const
detail
::
FormatArg
*
formatters
,
int
numFormatters
)
{
const
detail
::
FormatArg
*
formatters
,
int
numFormatters
)
{
// Saved stream state
// Saved stream state
std
::
streamsize
origWidth
=
out
.
width
();
std
::
streamsize
origWidth
=
out
.
width
();
std
::
streamsize
origPrecision
=
out
.
precision
();
std
::
streamsize
origPrecision
=
out
.
precision
();
...
@@ -765,13 +746,9 @@ inline void formatImpl(std::ostream &out,
...
@@ -765,13 +746,9 @@ inline void formatImpl(std::ostream &out,
fmt
=
printFormatStringLiteral
(
out
,
fmt
);
fmt
=
printFormatStringLiteral
(
out
,
fmt
);
bool
spacePadPositive
=
false
;
bool
spacePadPositive
=
false
;
int
ntrunc
=
-
1
;
int
ntrunc
=
-
1
;
const
char
*
fmtEnd
=
streamStateFromFormat
(
out
,
const
char
*
fmtEnd
=
spacePadPositive
,
streamStateFromFormat
(
out
,
spacePadPositive
,
ntrunc
,
fmt
,
formatters
,
ntrunc
,
argIndex
,
numFormatters
);
fmt
,
formatters
,
argIndex
,
numFormatters
);
if
(
argIndex
>=
numFormatters
)
{
if
(
argIndex
>=
numFormatters
)
{
// Check args remain after reading any variable width/precision
// Check args remain after reading any variable width/precision
TINYFORMAT_ERROR
(
"tinyformat: Not enough format arguments"
);
TINYFORMAT_ERROR
(
"tinyformat: Not enough format arguments"
);
...
@@ -820,15 +797,14 @@ inline void formatImpl(std::ostream &out,
...
@@ -820,15 +797,14 @@ inline void formatImpl(std::ostream &out,
/// information has been stripped from the arguments, leaving just enough of a
/// information has been stripped from the arguments, leaving just enough of a
/// common interface to perform formatting as required.
/// common interface to perform formatting as required.
class
FormatList
{
class
FormatList
{
public:
public:
FormatList
(
detail
::
FormatArg
*
formatters
,
int
N
)
FormatList
(
detail
::
FormatArg
*
formatters
,
int
N
)
:
m_formatters
(
formatters
),
m_N
(
N
)
{}
:
m_formatters
(
formatters
),
m_N
(
N
)
{}
friend
void
vformat
(
std
::
ostream
&
out
,
friend
void
vformat
(
std
::
ostream
&
out
,
const
char
*
fmt
,
const
char
*
fmt
,
const
FormatList
&
list
);
const
FormatList
&
list
);
private:
private:
const
detail
::
FormatArg
*
m_formatters
;
const
detail
::
FormatArg
*
m_formatters
;
int
m_N
;
int
m_N
;
};
};
...
@@ -841,7 +817,7 @@ namespace detail {
...
@@ -841,7 +817,7 @@ namespace detail {
// Format list subclass with fixed storage to avoid dynamic allocation
// Format list subclass with fixed storage to avoid dynamic allocation
template
<
int
N
>
template
<
int
N
>
class
FormatListN
:
public
FormatList
{
class
FormatListN
:
public
FormatList
{
public:
public:
template
<
typename
...
Args
>
template
<
typename
...
Args
>
FormatListN
(
const
Args
&
...
args
)
FormatListN
(
const
Args
&
...
args
)
:
FormatList
(
&
m_formatterStore
[
0
],
N
),
:
FormatList
(
&
m_formatterStore
[
0
],
N
),
...
@@ -849,14 +825,14 @@ public:
...
@@ -849,14 +825,14 @@ public:
static_assert
(
sizeof
...(
args
)
==
N
,
"Number of args must be N"
);
static_assert
(
sizeof
...(
args
)
==
N
,
"Number of args must be N"
);
}
}
private:
private:
FormatArg
m_formatterStore
[
N
];
FormatArg
m_formatterStore
[
N
];
};
};
// Special 0-arg version - MSVC says zero-sized C array in struct is nonstandard
// Special 0-arg version - MSVC says zero-sized C array in struct is nonstandard
template
<
>
template
<
>
class
FormatListN
<
0
>
:
public
FormatList
{
class
FormatListN
<
0
>
:
public
FormatList
{
public:
public:
FormatListN
()
:
FormatList
(
0
,
0
)
{}
FormatListN
()
:
FormatList
(
0
,
0
)
{}
};
};
...
...
paddle/string/to_string.h
→
paddle/
fluid/
string/to_string.h
浏览文件 @
5ccab2dc
文件已移动
paddle/string/to_string_test.cc
→
paddle/
fluid/
string/to_string_test.cc
浏览文件 @
5ccab2dc
...
@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "
paddle/string/
to_string.h"
#include "to_string.h"
#include <gtest/gtest.h>
#include <gtest/gtest.h>
constexpr
char
kOutputString
[]
=
"User Defined Output"
;
constexpr
char
kOutputString
[]
=
"User Defined Output"
;
class
UserDefinedClass
{
class
UserDefinedClass
{
public:
public:
};
};
std
::
ostream
&
operator
<<
(
std
::
ostream
&
s
,
const
UserDefinedClass
&
ins
)
{
std
::
ostream
&
operator
<<
(
std
::
ostream
&
s
,
const
UserDefinedClass
&
ins
)
{
...
...
paddle/scripts/docker/build.sh
浏览文件 @
5ccab2dc
...
@@ -115,8 +115,8 @@ EOF
...
@@ -115,8 +115,8 @@ EOF
-DWITH_AVX
=
${
WITH_AVX
:-
ON
}
\
-DWITH_AVX
=
${
WITH_AVX
:-
ON
}
\
-DWITH_SWIG_PY
=
ON
\
-DWITH_SWIG_PY
=
ON
\
-DWITH_STYLE_CHECK
=
OFF
-DWITH_STYLE_CHECK
=
OFF
make
-j
`
nproc
`
gen_proto_py
make
-j
`
nproc
`
gen_proto_py
framework_py_proto
make
-j
`
nproc
`
paddle_python
make
-j
`
nproc
`
copy_paddle_pybind
make
-j
`
nproc
`
paddle_docs paddle_docs_cn paddle_api_docs
make
-j
`
nproc
`
paddle_docs paddle_docs_cn paddle_api_docs
popd
popd
fi
fi
...
...
paddle/scripts/travis/build_doc.sh
浏览文件 @
5ccab2dc
...
@@ -6,9 +6,9 @@ mkdir -p $TRAVIS_BUILD_DIR/build
...
@@ -6,9 +6,9 @@ mkdir -p $TRAVIS_BUILD_DIR/build
cd
$TRAVIS_BUILD_DIR
/build
cd
$TRAVIS_BUILD_DIR
/build
# Compile Documentation only.
# Compile Documentation only.
cmake ..
-DCMAKE_BUILD_TYPE
=
Debug
-DWITH_GPU
=
OFF
-DWITH_MKL
=
OFF
-DWITH_DOC
=
ON
cmake ..
-DCMAKE_BUILD_TYPE
=
Release
-DWITH_GPU
=
OFF
-DWITH_MKL
=
OFF
-DWITH_DOC
=
ON
-DWITH_STYLE_CHECK
=
OFF
make
-j
`
nproc
`
gen_proto_py
make
-j
`
nproc
`
gen_proto_py
framework_py_proto
make
-j
`
nproc
`
paddle_python
make
-j
`
nproc
`
copy_paddle_pybind
make
-j
`
nproc
`
paddle_docs paddle_docs_cn paddle_api_docs
make
-j
`
nproc
`
paddle_docs paddle_docs_cn paddle_api_docs
# check websites for broken links
# check websites for broken links
...
...
python/paddle/v2/fluid/distribute_transpiler.py
浏览文件 @
5ccab2dc
...
@@ -33,6 +33,57 @@ class VarBlock:
...
@@ -33,6 +33,57 @@ class VarBlock:
return
"%s:%d:%d"
%
(
self
.
varname
,
self
.
offset
,
self
.
size
)
return
"%s:%d:%d"
%
(
self
.
varname
,
self
.
offset
,
self
.
size
)
class
UnionFind
(
object
):
""" Union-find data struct.
Union-find is a data struct that keeps track of a set of elements partitioned
into a number of disjoint (non-overlapping) subsets.
Reference:
https://en.wikipedia.org/wiki/Disjoint-set_data_structure
Args:
elements(list): The initialize element list.
"""
def
__init__
(
self
,
elementes
=
None
):
self
.
_parents
=
[]
# index -> parent index
self
.
_index
=
{}
# element -> index
self
.
_curr_idx
=
0
if
not
elementes
:
elementes
=
[]
for
ele
in
elementes
:
self
.
_parents
.
append
(
self
.
_curr_idx
)
self
.
_index
.
update
({
ele
:
self
.
_curr_idx
})
self
.
_curr_idx
+=
1
def
find
(
self
,
x
):
# Find the root index of given element x,
# execute the path compress while findind the root index
if
not
x
in
self
.
_index
:
return
-
1
idx
=
self
.
_index
[
x
]
while
idx
!=
self
.
_parents
[
idx
]:
t
=
self
.
_parents
[
idx
]
self
.
_parents
[
idx
]
=
self
.
_parents
[
t
]
idx
=
t
return
idx
def
union
(
self
,
x
,
y
):
# Union two given element
x_root
=
self
.
find
(
x
)
y_root
=
self
.
find
(
y
)
if
x_root
==
y_root
:
return
self
.
_parents
[
x_root
]
=
y_root
def
is_connected
(
self
,
x
,
y
):
# If two given elements have the same root index,
# then they are connected.
return
self
.
find
(
x
)
==
self
.
find
(
y
)
def
same_or_split_var
(
p_name
,
var_name
):
def
same_or_split_var
(
p_name
,
var_name
):
return
p_name
==
var_name
or
p_name
.
startswith
(
var_name
+
".block"
)
return
p_name
==
var_name
or
p_name
.
startswith
(
var_name
+
".block"
)
...
@@ -140,6 +191,7 @@ class DistributeTranspiler:
...
@@ -140,6 +191,7 @@ class DistributeTranspiler:
for
b
in
param_blocks
:
for
b
in
param_blocks
:
varname
,
block_id
,
_
=
b
.
split
(
":"
)
varname
,
block_id
,
_
=
b
.
split
(
":"
)
send_outputs
.
append
(
param_var_mapping
[
varname
][
int
(
block_id
)])
send_outputs
.
append
(
param_var_mapping
[
varname
][
int
(
block_id
)])
# let send_op know which endpoint to send which var to, eplist has the same
# let send_op know which endpoint to send which var to, eplist has the same
# order as send_inputs.
# order as send_inputs.
eplist
=
split_method
(
send_inputs
,
pserver_endpoints
)
eplist
=
split_method
(
send_inputs
,
pserver_endpoints
)
...
@@ -178,6 +230,21 @@ class DistributeTranspiler:
...
@@ -178,6 +230,21 @@ class DistributeTranspiler:
outputs
=
{
"Out"
:
[
orig_param
]},
outputs
=
{
"Out"
:
[
orig_param
]},
attrs
=
{
"axis"
:
0
})
attrs
=
{
"axis"
:
0
})
self
.
lr_param_mapping
=
self
.
_create_lr_param_mapping
()
def
_create_lr_param_mapping
(
self
):
lr_mapping
=
dict
()
for
_
,
opt_op
in
enumerate
(
self
.
optimize_ops
):
if
not
opt_op
.
inputs
or
not
opt_op
.
inputs
.
has_key
(
"LearningRate"
)
\
or
not
opt_op
.
inputs
.
has_key
(
"Param"
):
continue
lr
=
opt_op
.
inputs
[
"LearningRate"
].
name
param
=
opt_op
.
inputs
[
"Param"
].
name
if
not
lr_mapping
.
has_key
(
lr
):
lr_mapping
.
update
({
lr
:
list
()})
lr_mapping
[
lr
].
append
(
param
)
return
lr_mapping
def
_create_vars_from_blocklist
(
self
,
program
,
block_list
):
def
_create_vars_from_blocklist
(
self
,
program
,
block_list
):
# Create respective variables using the block_list
# Create respective variables using the block_list
block_map
=
dict
()
block_map
=
dict
()
...
@@ -208,6 +275,7 @@ class DistributeTranspiler:
...
@@ -208,6 +275,7 @@ class DistributeTranspiler:
name
=
"%s.block%d"
%
(
varname
,
i
),
name
=
"%s.block%d"
%
(
varname
,
i
),
psersistable
=
False
,
psersistable
=
False
,
dtype
=
orig_var
.
dtype
,
dtype
=
orig_var
.
dtype
,
type
=
orig_var
.
type
,
shape
=
splited_shape
)
# flattend splited var
shape
=
splited_shape
)
# flattend splited var
var_mapping
[
varname
].
append
(
var
)
var_mapping
[
varname
].
append
(
var
)
return
var_mapping
return
var_mapping
...
@@ -269,6 +337,7 @@ class DistributeTranspiler:
...
@@ -269,6 +337,7 @@ class DistributeTranspiler:
name
=
"%s.trainer_%d"
%
(
var
.
name
,
i
),
name
=
"%s.trainer_%d"
%
(
var
.
name
,
i
),
psersistable
=
var
.
persistable
,
psersistable
=
var
.
persistable
,
dtype
=
var
.
dtype
,
dtype
=
var
.
dtype
,
type
=
var
.
type
,
shape
=
var
.
shape
)
shape
=
var
.
shape
)
var_list
.
append
(
var_each
)
var_list
.
append
(
var_each
)
return
var_list
return
var_list
...
@@ -300,52 +369,15 @@ class DistributeTranspiler:
...
@@ -300,52 +369,15 @@ class DistributeTranspiler:
pass
pass
return
orig_shape
return
orig_shape
def
_op_input_var
(
self
,
op
,
varname
):
def
_fetch_var_names
(
self
,
param_dict
):
pass
res
=
[]
if
not
param_dict
:
def
_is_op_on_pserver
(
self
,
endpoint
,
all_ops
,
idx
):
return
res
"""
for
_
,
values
in
param_dict
.
iteritems
():
Recursively check if the op need to run on current server.
if
not
isinstance
(
values
,
list
):
Assume that ops are in the execution order.
values
=
[
values
]
"""
res
+=
[
v
.
name
for
v
in
values
]
param_names
=
[
return
res
p
.
name
for
p
in
self
.
param_grad_ep_mapping
[
endpoint
][
"params"
]
]
op
=
all_ops
[
idx
]
input_names
=
set
(
op
.
input_names
)
# TODO(typhoonzero): using Param and Grad input name to identify
# that the operator is an optimization operator, need a better way.
if
"Param"
in
input_names
:
if
op
.
input
(
"Param"
)[
0
]
in
param_names
:
return
True
else
:
for
n
in
param_names
:
if
same_or_split_var
(
n
,
op
.
input
(
"Param"
)[
0
])
\
and
n
!=
op
.
input
(
"Param"
)[
0
]:
return
True
return
False
else
:
j
=
idx
-
1
while
j
>=
0
:
prev_op
=
all_ops
[
j
]
# prev_output_names = [o.name for o in prev_op.outputs.values()]
# prev_input_names = [o.name for o in prev_op.inputs.values()]
# NOTE(typhoonzero): consider list input/output
prev_output_names
=
prev_op
.
desc
.
output_arg_names
()
prev_input_names
=
prev_op
.
desc
.
input_arg_names
()
found1
=
False
found2
=
False
for
varname
in
op
.
desc
.
input_arg_names
():
if
varname
in
prev_output_names
:
found1
=
self
.
_is_op_on_pserver
(
endpoint
,
all_ops
,
j
)
# later ops may produce output for prev op's next batch use.
for
varname
in
op
.
desc
.
output_arg_names
():
if
varname
in
prev_input_names
:
found2
=
self
.
_is_op_on_pserver
(
endpoint
,
all_ops
,
j
)
if
found1
or
found2
:
return
True
j
-=
1
return
False
def
_append_pserver_ops
(
self
,
optimize_block
,
opt_op
,
endpoint
):
def
_append_pserver_ops
(
self
,
optimize_block
,
opt_op
,
endpoint
):
program
=
optimize_block
.
program
program
=
optimize_block
.
program
...
@@ -363,11 +395,7 @@ class DistributeTranspiler:
...
@@ -363,11 +395,7 @@ class DistributeTranspiler:
# do not append this op if current endpoint
# do not append this op if current endpoint
# is not dealing with this grad block
# is not dealing with this grad block
return
return
merged_var
=
program
.
global_block
().
create_var
(
merged_var
=
program
.
global_block
().
vars
[
grad_block
.
name
]
name
=
grad_block
.
name
,
persistable
=
grad_block
.
persistable
,
dtype
=
grad_block
.
dtype
,
shape
=
grad_block
.
shape
)
# append merging ops if trainers > 1
# append merging ops if trainers > 1
if
self
.
trainers
>
1
:
if
self
.
trainers
>
1
:
vars2merge
=
self
.
_create_var_for_trainers
(
vars2merge
=
self
.
_create_var_for_trainers
(
...
@@ -398,13 +426,19 @@ class DistributeTranspiler:
...
@@ -398,13 +426,19 @@ class DistributeTranspiler:
shape
=
param_block
.
shape
)
shape
=
param_block
.
shape
)
new_inputs
[
key
]
=
tmpvar
new_inputs
[
key
]
=
tmpvar
elif
key
==
"LearningRate"
:
# leraning rate variable has already be created by non-optimize op,
# don't create it once again.
new_inputs
[
key
]
=
program
.
global_block
().
vars
[
opt_op
.
input
(
key
)[
0
]]
for
key
in
opt_op
.
input_names
:
for
key
in
opt_op
.
input_names
:
if
key
in
[
"Param"
,
"Grad"
]:
new_shape
=
None
if
key
in
[
"Param"
,
"Grad"
,
"LearningRate"
]:
continue
continue
var
=
program
.
global_block
().
vars
[
opt_op
.
input
(
key
)[
0
]]
# update accumulator variable shape
# update accumulator variable shape
param_shape
=
new_inputs
[
"Param"
].
shape
param_shape
=
new_inputs
[
"Param"
].
shape
var
=
program
.
global_block
().
vars
[
opt_op
.
input
(
key
)[
0
]]
new_shape
=
self
.
_get_optimizer_input_shape
(
opt_op
.
type
,
key
,
new_shape
=
self
.
_get_optimizer_input_shape
(
opt_op
.
type
,
key
,
var
.
shape
,
param_shape
)
var
.
shape
,
param_shape
)
tmpvar
=
program
.
global_block
().
create_var
(
tmpvar
=
program
.
global_block
().
create_var
(
...
@@ -415,12 +449,11 @@ class DistributeTranspiler:
...
@@ -415,12 +449,11 @@ class DistributeTranspiler:
new_inputs
[
key
]
=
tmpvar
new_inputs
[
key
]
=
tmpvar
# change output's ParamOut variable
# change output's ParamOut variable
outputs
=
self
.
_get_output_map_from_op
(
program
.
global_block
(),
opt_op
)
opt_op
.
outputs
[
"ParamOut"
]
=
new_inputs
[
"Param"
]
outputs
[
"ParamOut"
]
=
new_inputs
[
"Param"
]
optimize_block
.
append_op
(
optimize_block
.
append_op
(
type
=
opt_op
.
type
,
type
=
opt_op
.
type
,
inputs
=
new_inputs
,
inputs
=
new_inputs
,
outputs
=
outputs
,
outputs
=
o
pt_op
.
o
utputs
,
attrs
=
opt_op
.
attrs
)
attrs
=
opt_op
.
attrs
)
def
_append_pserver_non_opt_ops
(
self
,
optimize_block
,
opt_op
):
def
_append_pserver_non_opt_ops
(
self
,
optimize_block
,
opt_op
):
...
@@ -428,11 +461,10 @@ class DistributeTranspiler:
...
@@ -428,11 +461,10 @@ class DistributeTranspiler:
# Append the ops for parameters that do not need to be optimized/updated
# Append the ops for parameters that do not need to be optimized/updated
inputs
=
self
.
_get_input_map_from_op
(
self
.
program
.
global_block
().
vars
,
inputs
=
self
.
_get_input_map_from_op
(
self
.
program
.
global_block
().
vars
,
opt_op
)
opt_op
)
for
var
in
inputs
.
itervalues
():
for
varlist
in
inputs
.
itervalues
():
if
type
(
var
)
==
list
:
if
not
isinstance
(
varlist
,
list
):
varlist
=
var
varlist
=
[
varlist
]
else
:
varlist
=
[
var
]
for
var
in
varlist
:
for
var
in
varlist
:
if
not
program
.
global_block
().
vars
.
has_key
(
var
.
name
):
if
not
program
.
global_block
().
vars
.
has_key
(
var
.
name
):
program
.
global_block
().
create_var
(
program
.
global_block
().
create_var
(
...
@@ -444,12 +476,70 @@ class DistributeTranspiler:
...
@@ -444,12 +476,70 @@ class DistributeTranspiler:
outputs
=
self
.
_get_output_map_from_op
(
self
.
program
.
global_block
().
vars
,
outputs
=
self
.
_get_output_map_from_op
(
self
.
program
.
global_block
().
vars
,
opt_op
)
opt_op
)
for
varlist
in
outputs
.
itervalues
():
if
not
isinstance
(
varlist
,
list
):
varlist
=
[
varlist
]
for
var
in
varlist
:
program
.
global_block
().
create_var
(
name
=
var
.
name
,
persistable
=
var
.
persistable
,
dtype
=
var
.
dtype
,
shape
=
var
.
shape
)
optimize_block
.
append_op
(
optimize_block
.
append_op
(
type
=
opt_op
.
type
,
type
=
opt_op
.
type
,
inputs
=
inputs
,
inputs
=
inputs
,
outputs
=
outputs
,
outputs
=
outputs
,
attrs
=
opt_op
.
attrs
)
attrs
=
opt_op
.
attrs
)
def
_is_op_connected
(
self
,
op1
,
op2
):
# If one op's input is another op's output or
# one op's output is another op's input, we say
# the two operator is connected.
op1_input_names
=
self
.
_fetch_var_names
(
op1
.
inputs
)
op1_output_names
=
self
.
_fetch_var_names
(
op1
.
outputs
)
op2_input_names
=
self
.
_fetch_var_names
(
op2
.
inputs
)
op2_output_names
=
self
.
_fetch_var_names
(
op2
.
outputs
)
if
set
(
op1_output_names
)
&
set
(
op2_input_names
)
or
\
set
(
op1_input_names
)
&
set
(
op2_output_names
):
return
True
return
False
def
_create_ufind
(
self
,
optimize_ops
):
# Create a unit find data struct by optimize ops
ufind
=
UnionFind
(
optimize_ops
)
for
i
in
xrange
(
len
(
optimize_ops
)):
for
j
in
xrange
(
i
,
len
(
optimize_ops
)):
op1
=
optimize_ops
[
i
]
op2
=
optimize_ops
[
j
]
if
self
.
_is_op_connected
(
op1
,
op2
):
ufind
.
union
(
op1
,
op2
)
return
ufind
def
_is_opt_op
(
self
,
op
):
# NOTE: It's a HACK implement.
# optimize op: SGDOptimize, MomentumOptimizer, AdamOptimizer and etc...
if
op
.
inputs
and
op
.
inputs
.
has_key
(
"Param"
)
\
and
op
.
inputs
.
has_key
(
"LearningRate"
):
return
True
return
False
def
_is_opt_op_on_pserver
(
self
,
endpoint
,
op
):
param_names
=
[
p
.
name
for
p
in
self
.
param_grad_ep_mapping
[
endpoint
][
"params"
]
]
if
op
.
inputs
[
"Param"
].
name
in
param_names
:
return
True
else
:
for
n
in
param_names
:
param
=
op
.
inputs
[
"Param"
].
name
if
same_or_split_var
(
n
,
param
)
and
n
!=
op
.
inputs
[
"Param"
].
name
:
return
True
return
False
return
False
def
get_pserver_program
(
self
,
endpoint
):
def
get_pserver_program
(
self
,
endpoint
):
"""
"""
Get pserver side program using the endpoint
Get pserver side program using the endpoint
...
@@ -469,26 +559,38 @@ class DistributeTranspiler:
...
@@ -469,26 +559,38 @@ class DistributeTranspiler:
pserver_program
.
global_block
().
create_var
(
pserver_program
.
global_block
().
create_var
(
name
=
v
.
name
,
persistable
=
True
,
dtype
=
v
.
dtype
,
shape
=
v
.
shape
)
name
=
v
.
name
,
persistable
=
True
,
dtype
=
v
.
dtype
,
shape
=
v
.
shape
)
for
trainer_id
in
xrange
(
self
.
trainers
):
for
trainer_id
in
xrange
(
self
.
trainers
):
print
(
"create variable for program: %s.trainer_%d"
%
(
v
.
name
,
trainer_id
))
pserver_program
.
global_block
().
create_var
(
pserver_program
.
global_block
().
create_var
(
name
=
"%s.trainer_%d"
%
(
v
.
name
,
trainer_id
),
name
=
"%s.trainer_%d"
%
(
v
.
name
,
trainer_id
),
persistable
=
True
,
persistable
=
True
,
dtype
=
v
.
dtype
,
dtype
=
v
.
dtype
,
shape
=
v
.
shape
)
shape
=
v
.
shape
)
# step6
# step6
optimize_block
=
pserver_program
.
create_block
(
0
)
optimize_block
=
pserver_program
.
create_block
(
0
)
# Iterate through the ops and append ops as needed
# step 6.1
for
idx
,
opt_op
in
enumerate
(
self
.
optimize_ops
):
# Create a union-find data struct by optimize ops,
is_op_on_pserver
=
self
.
_is_op_on_pserver
(
endpoint
,
# If two ops are connected, we could add these two ops
self
.
optimize_ops
,
idx
)
# into one set.
if
not
is_op_on_pserver
:
ufind
=
self
.
_create_ufind
(
self
.
optimize_ops
)
continue
# step 6.2
if
"Grad"
in
opt_op
.
desc
.
input_arg_names
():
# Iterate through the ops and append optimize op which
self
.
_append_pserver_ops
(
optimize_block
,
opt_op
,
endpoint
)
# located on current pserver
opt_op_on_pserver
=
[]
for
_
,
op
in
enumerate
(
self
.
optimize_ops
):
if
self
.
_is_opt_op
(
op
)
and
self
.
_is_opt_op_on_pserver
(
endpoint
,
op
):
opt_op_on_pserver
.
append
(
op
)
# step 6.3
# Iterate through the ops, and if an op and the optimize ops
# which located on current pserver are in one set, then
# append it into the sub program.
for
_
,
op
in
enumerate
(
self
.
optimize_ops
):
for
_
,
opt_op
in
enumerate
(
opt_op_on_pserver
):
if
ufind
.
is_connected
(
op
,
opt_op
):
if
self
.
_is_opt_op
(
op
):
self
.
_append_pserver_ops
(
optimize_block
,
op
,
endpoint
)
else
:
else
:
self
.
_append_pserver_non_opt_ops
(
optimize_block
,
opt_
op
)
self
.
_append_pserver_non_opt_ops
(
optimize_block
,
op
)
break
# Append the listen_and_serv op
# Append the listen_and_serv op
pserver_program
.
global_block
().
append_op
(
pserver_program
.
global_block
().
append_op
(
type
=
"listen_and_serv"
,
type
=
"listen_and_serv"
,
...
...
python/paddle/v2/fluid/layers/__init__.py
浏览文件 @
5ccab2dc
...
@@ -16,6 +16,8 @@ import ops
...
@@ -16,6 +16,8 @@ import ops
from
ops
import
*
from
ops
import
*
import
nn
import
nn
from
nn
import
*
from
nn
import
*
import
detection
from
detection
import
*
import
io
import
io
from
io
import
*
from
io
import
*
import
tensor
import
tensor
...
@@ -31,6 +33,7 @@ from detection import *
...
@@ -31,6 +33,7 @@ from detection import *
__all__
=
[]
__all__
=
[]
__all__
+=
math_op_patch
.
__all__
__all__
+=
math_op_patch
.
__all__
__all__
+=
detection
.
__all__
__all__
+=
nn
.
__all__
__all__
+=
nn
.
__all__
__all__
+=
io
.
__all__
__all__
+=
io
.
__all__
__all__
+=
tensor
.
__all__
__all__
+=
tensor
.
__all__
...
...
python/paddle/v2/fluid/layers/detection.py
浏览文件 @
5ccab2dc
...
@@ -22,7 +22,106 @@ from ops import reshape
...
@@ -22,7 +22,106 @@ from ops import reshape
from
operator
import
mul
from
operator
import
mul
import
math
import
math
__all__
=
[
'prior_box'
,
]
__all__
=
[
'detection_output'
,
'prior_box'
,
]
def
detection_output
(
scores
,
loc
,
prior_box
,
prior_box_var
,
background_label
=
0
,
nms_threshold
=
0.3
,
nms_top_k
=
400
,
keep_top_k
=
200
,
score_threshold
=
0.01
,
nms_eta
=
1.0
):
"""
**Detection Output Layer**
This layer applies the NMS to the output of network and computes the
predict bounding box location. The output's shape of this layer could
be zero if there is no valid bounding box.
Args:
scores(Variable): A 3-D Tensor with shape [N, C, M] represents the
predicted confidence predictions. N is the batch size, C is the
class number, M is number of bounding boxes. For each category
there are total M scores which corresponding M bounding boxes.
loc(Variable): A 3-D Tensor with shape [N, M, 4] represents the
predicted locations of M bounding bboxes. N is the batch size,
and each bounding box has four coordinate values and the layout
is [xmin, ymin, xmax, ymax].
prior_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes,
each box is represented as [xmin, ymin, xmax, ymax],
[xmin, ymin] is the left top coordinate of the anchor box,
if the input is image feature map, they are close to the origin
of the coordinate system. [xmax, ymax] is the right bottom
coordinate of the anchor box.
prior_box_var(Variable): A 2-D Tensor with shape [M, 4] holds M group
of variance.
background_label(float): The index of background label,
the background label will be ignored. If set to -1, then all
categories will be considered.
nms_threshold(float): The threshold to be used in NMS.
nms_top_k(int): Maximum number of detections to be kept according
to the confidences aftern the filtering detections based on
score_threshold.
keep_top_k(int): Number of total bboxes to be kept per image after
NMS step. -1 means keeping all bboxes after NMS step.
score_threshold(float): Threshold to filter out bounding boxes with
low confidence score. If not provided, consider all boxes.
nms_eta(float): The parameter for adaptive NMS.
Returns:
The detected bounding boxes which are a Tensor.
Examples:
.. code-block:: python
pb = layers.data(name='prior_box', shape=[10, 4],
append_batch_size=False, dtype='float32')
pbv = layers.data(name='prior_box_var', shape=[10, 4],
append_batch_size=False, dtype='float32')
loc = layers.data(name='target_box', shape=[21, 4],
append_batch_size=False, dtype='float32')
scores = layers.data(name='scores', shape=[2, 21, 10],
append_batch_size=False, dtype='float32')
nmsed_outs = fluid.layers.detection_output(scores=scores,
loc=loc,
prior_box=pb,
prior_box_var=pbv)
"""
helper
=
LayerHelper
(
"detection_output"
,
**
locals
())
decoded_box
=
helper
.
create_tmp_variable
(
dtype
=
loc
.
dtype
)
helper
.
append_op
(
type
=
"box_coder"
,
inputs
=
{
'PriorBox'
:
prior_box
,
'PriorBoxVar'
:
prior_box_var
,
'TargetBox'
:
loc
},
outputs
=
{
'OutputBox'
:
decoded_box
},
attrs
=
{
'code_type'
:
'decode_center_size'
})
nmsed_outs
=
helper
.
create_tmp_variable
(
dtype
=
decoded_box
.
dtype
)
helper
.
append_op
(
type
=
"multiclass_nms"
,
inputs
=
{
'Scores'
:
scores
,
'BBoxes'
:
decoded_box
},
outputs
=
{
'Out'
:
nmsed_outs
},
attrs
=
{
'background_label'
:
0
,
'nms_threshold'
:
nms_threshold
,
'nms_top_k'
:
nms_top_k
,
'keep_top_k'
:
keep_top_k
,
'score_threshold'
:
score_threshold
,
'nms_eta'
:
1.0
})
return
nmsed_outs
def
prior_box
(
inputs
,
def
prior_box
(
inputs
,
...
@@ -85,16 +184,15 @@ def prior_box(inputs,
...
@@ -85,16 +184,15 @@ def prior_box(inputs,
Examples:
Examples:
.. code-block:: python
.. code-block:: python
prior_box
es
(
prior_box(
inputs = [conv1, conv2, conv3, conv4, conv5, conv6],
inputs = [conv1, conv2, conv3, conv4, conv5, conv6],
image = data,
image = data,
min_ratio = 20, # 0.20
min_ratio = 20, # 0.20
max_ratio = 90, # 0.90
max_ratio = 90, # 0.90
steps = [8., 16., 32., 64., 100., 300.],
aspect_ratios = [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]],
base_size = 300,
offset = 0.5,
offset = 0.5,
base_size = 300,
variance = [0.1,0.1,0.1,0.1],
variance = [0.1,0.1,0.1,0.1],
aspect_ratios = [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]],
flip=True,
flip=True,
clip=True)
clip=True)
"""
"""
...
...
python/paddle/v2/fluid/layers/math_op_patch.py
浏览文件 @
5ccab2dc
...
@@ -117,6 +117,7 @@ def monkey_patch_variable():
...
@@ -117,6 +117,7 @@ def monkey_patch_variable():
tmp_name
=
unique_tmp_name
()
tmp_name
=
unique_tmp_name
()
out
=
self
.
block
.
create_var
(
name
=
tmp_name
,
dtype
=
lhs_dtype
)
out
=
self
.
block
.
create_var
(
name
=
tmp_name
,
dtype
=
lhs_dtype
)
self
.
block
.
append_op
(
self
.
block
.
append_op
(
type
=
op_type
,
type
=
op_type
,
inputs
=
{
'X'
:
[
self
],
inputs
=
{
'X'
:
[
self
],
...
...
python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py
浏览文件 @
5ccab2dc
...
@@ -99,7 +99,7 @@ elif training_role == "TRAINER":
...
@@ -99,7 +99,7 @@ elif training_role == "TRAINER":
exe
.
run
(
fluid
.
default_startup_program
())
exe
.
run
(
fluid
.
default_startup_program
())
for
pass_id
in
range
(
PASS_NUM
):
for
pass_id
in
range
(
PASS_NUM
):
for
data
in
train_reader
():
for
data
in
train_reader
():
avg_cost_np
=
exe
.
run
(
fluid
.
default_main
_program
(),
avg_cost_np
=
exe
.
run
(
t
.
get_trainer
_program
(),
feed
=
feeder
.
feed
(
data
),
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
])
fetch_list
=
[
avg_cost
])
print
(
"avg_cost_np"
,
avg_cost_np
)
print
(
"avg_cost_np"
,
avg_cost_np
)
...
...
python/paddle/v2/fluid/tests/test_cpp_reader.py
浏览文件 @
5ccab2dc
...
@@ -64,9 +64,7 @@ exe = fluid.Executor(place)
...
@@ -64,9 +64,7 @@ exe = fluid.Executor(place)
[
res1
,
res2
]
=
exe
.
run
(
prog
,
fetch_list
=
[
out1
,
out2
])
[
res1
,
res2
]
=
exe
.
run
(
prog
,
fetch_list
=
[
out1
,
out2
])
test_pass
=
res1
.
shape
==
(
10
,
2
)
and
res2
.
shape
==
(
10
,
1
)
if
not
(
res1
.
shape
==
(
10
,
2
)
and
res2
.
shape
==
(
10
,
1
)):
if
not
test_pass
:
exit
(
1
)
exit
(
1
)
exit
(
0
)
exit
(
0
)
python/paddle/v2/fluid/tests/test_detection.py
0 → 100644
浏览文件 @
5ccab2dc
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
paddle.v2.fluid
as
fluid
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.layers.detection
as
detection
from
paddle.v2.fluid.framework
import
Program
,
program_guard
import
unittest
import
numpy
as
np
class
TestBook
(
unittest
.
TestCase
):
def
test_detection_output
(
self
):
program
=
Program
()
with
program_guard
(
program
):
pb
=
layers
.
data
(
name
=
'prior_box'
,
shape
=
[
10
,
4
],
append_batch_size
=
False
,
dtype
=
'float32'
)
pbv
=
layers
.
data
(
name
=
'prior_box_var'
,
shape
=
[
10
,
4
],
append_batch_size
=
False
,
dtype
=
'float32'
)
loc
=
layers
.
data
(
name
=
'target_box'
,
shape
=
[
20
,
4
],
append_batch_size
=
False
,
dtype
=
'float32'
)
scores
=
layers
.
data
(
name
=
'scores'
,
shape
=
[
2
,
20
,
10
],
append_batch_size
=
False
,
dtype
=
'float32'
)
out
=
layers
.
detection_output
(
scores
=
scores
,
loc
=
loc
,
prior_box
=
pb
,
prior_box_var
=
pbv
)
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
class
TestPriorBox
(
unittest
.
TestCase
):
def
test_prior_box
(
self
):
self
.
check_prior_box
(
use_cuda
=
False
)
self
.
check_prior_box
(
use_cuda
=
True
)
def
prior_box_output
(
self
,
data_shape
):
images
=
fluid
.
layers
.
data
(
name
=
'pixel'
,
shape
=
data_shape
,
dtype
=
'float32'
)
conv1
=
fluid
.
layers
.
conv2d
(
input
=
images
,
num_filters
=
3
,
filter_size
=
3
,
stride
=
2
,
use_cudnn
=
False
)
conv2
=
fluid
.
layers
.
conv2d
(
input
=
conv1
,
num_filters
=
3
,
filter_size
=
3
,
stride
=
2
,
use_cudnn
=
False
)
conv3
=
fluid
.
layers
.
conv2d
(
input
=
conv2
,
num_filters
=
3
,
filter_size
=
3
,
stride
=
2
,
use_cudnn
=
False
)
conv4
=
fluid
.
layers
.
conv2d
(
input
=
conv3
,
num_filters
=
3
,
filter_size
=
3
,
stride
=
2
,
use_cudnn
=
False
)
conv5
=
fluid
.
layers
.
conv2d
(
input
=
conv4
,
num_filters
=
3
,
filter_size
=
3
,
stride
=
2
,
use_cudnn
=
False
)
box
,
var
=
detection
.
prior_box
(
inputs
=
[
conv1
,
conv2
,
conv3
,
conv4
,
conv5
,
conv5
],
image
=
images
,
min_ratio
=
20
,
max_ratio
=
90
,
# steps=[8, 16, 32, 64, 100, 300],
aspect_ratios
=
[[
2.
],
[
2.
,
3.
],
[
2.
,
3.
],
[
2.
,
3.
],
[
2.
],
[
2.
]],
base_size
=
300
,
offset
=
0.5
,
flip
=
True
,
clip
=
True
)
return
box
,
var
def
check_prior_box
(
self
,
use_cuda
):
if
use_cuda
:
# prior_box only support CPU.
return
data_shape
=
[
3
,
224
,
224
]
box
,
var
=
self
.
prior_box_output
(
data_shape
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
batch
=
[
4
]
# batch is not used in the prior_box.
assert
box
.
shape
[
1
]
==
4
assert
var
.
shape
[
1
]
==
4
assert
box
.
shape
==
var
.
shape
assert
len
(
box
.
shape
)
==
2
x
=
np
.
random
.
random
(
batch
+
data_shape
).
astype
(
"float32"
)
tensor_x
=
core
.
LoDTensor
()
tensor_x
.
set
(
x
,
place
)
boxes
,
vars
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
'pixel'
:
tensor_x
},
fetch_list
=
[
box
,
var
])
assert
vars
.
shape
==
var
.
shape
assert
boxes
.
shape
==
box
.
shape
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/fluid/tests/test_multiclass_nms_op.py
浏览文件 @
5ccab2dc
...
@@ -137,7 +137,7 @@ def batched_multiclass_nms(boxes, scores, background, score_threshold,
...
@@ -137,7 +137,7 @@ def batched_multiclass_nms(boxes, scores, background, score_threshold,
det_outs
=
[]
det_outs
=
[]
lod
=
[
0
]
lod
=
[
0
]
for
n
in
range
(
batch_size
):
for
n
in
range
(
batch_size
):
nmsed_outs
,
nmsed_num
=
multiclass_nms
(
boxes
,
scores
[
n
],
background
,
nmsed_outs
,
nmsed_num
=
multiclass_nms
(
boxes
[
n
]
,
scores
[
n
],
background
,
score_threshold
,
nms_threshold
,
score_threshold
,
nms_threshold
,
nms_top_k
,
keep_top_k
)
nms_top_k
,
keep_top_k
)
lod
.
append
(
lod
[
-
1
]
+
nmsed_num
)
lod
.
append
(
lod
[
-
1
]
+
nmsed_num
)
...
@@ -145,7 +145,7 @@ def batched_multiclass_nms(boxes, scores, background, score_threshold,
...
@@ -145,7 +145,7 @@ def batched_multiclass_nms(boxes, scores, background, score_threshold,
for
c
,
indices
in
nmsed_outs
.
iteritems
():
for
c
,
indices
in
nmsed_outs
.
iteritems
():
for
idx
in
indices
:
for
idx
in
indices
:
xmin
,
ymin
,
xmax
,
ymax
=
boxes
[
idx
][:]
xmin
,
ymin
,
xmax
,
ymax
=
boxes
[
n
][
idx
][:]
det_outs
.
append
([
c
,
scores
[
n
][
c
][
idx
],
xmin
,
ymin
,
xmax
,
ymax
])
det_outs
.
append
([
c
,
scores
[
n
][
c
][
idx
],
xmin
,
ymin
,
xmax
,
ymax
])
return
det_outs
,
lod
return
det_outs
,
lod
...
@@ -179,9 +179,9 @@ class TestMulticlassNMSOp(OpTest):
...
@@ -179,9 +179,9 @@ class TestMulticlassNMSOp(OpTest):
scores
=
np
.
reshape
(
scores
,
(
N
,
M
,
C
))
scores
=
np
.
reshape
(
scores
,
(
N
,
M
,
C
))
scores
=
np
.
transpose
(
scores
,
(
0
,
2
,
1
))
scores
=
np
.
transpose
(
scores
,
(
0
,
2
,
1
))
boxes
=
np
.
random
.
random
((
M
,
BOX_SIZE
)).
astype
(
'float32'
)
boxes
=
np
.
random
.
random
((
N
,
M
,
BOX_SIZE
)).
astype
(
'float32'
)
boxes
[:,
0
:
2
]
=
boxes
[
:,
0
:
2
]
*
0.5
boxes
[:,
:,
0
:
2
]
=
boxes
[:,
:,
0
:
2
]
*
0.5
boxes
[:,
2
:
4
]
=
boxes
[
:,
2
:
4
]
*
0.5
+
0.5
boxes
[:,
:,
2
:
4
]
=
boxes
[:,
:,
2
:
4
]
*
0.5
+
0.5
nmsed_outs
,
lod
=
batched_multiclass_nms
(
boxes
,
scores
,
background
,
nmsed_outs
,
lod
=
batched_multiclass_nms
(
boxes
,
scores
,
background
,
score_threshold
,
nms_threshold
,
score_threshold
,
nms_threshold
,
...
...
python/paddle/v2/fluid/tests/test_prior_boxes.py
已删除
100644 → 0
浏览文件 @
bbff442e
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
numpy
as
np
import
paddle.v2.fluid
as
fluid
import
paddle.v2.fluid.layers.detection
as
detection
import
paddle.v2.fluid.core
as
core
import
unittest
def
prior_box_output
(
data_shape
):
images
=
fluid
.
layers
.
data
(
name
=
'pixel'
,
shape
=
data_shape
,
dtype
=
'float32'
)
conv1
=
fluid
.
layers
.
conv2d
(
input
=
images
,
num_filters
=
3
,
filter_size
=
3
,
stride
=
2
,
use_cudnn
=
False
)
conv2
=
fluid
.
layers
.
conv2d
(
input
=
conv1
,
num_filters
=
3
,
filter_size
=
3
,
stride
=
2
,
use_cudnn
=
False
)
conv3
=
fluid
.
layers
.
conv2d
(
input
=
conv2
,
num_filters
=
3
,
filter_size
=
3
,
stride
=
2
,
use_cudnn
=
False
)
conv4
=
fluid
.
layers
.
conv2d
(
input
=
conv3
,
num_filters
=
3
,
filter_size
=
3
,
stride
=
2
,
use_cudnn
=
False
)
conv5
=
fluid
.
layers
.
conv2d
(
input
=
conv4
,
num_filters
=
3
,
filter_size
=
3
,
stride
=
2
,
use_cudnn
=
False
)
box
,
var
=
detection
.
prior_box
(
inputs
=
[
conv1
,
conv2
,
conv3
,
conv4
,
conv5
,
conv5
],
image
=
images
,
min_ratio
=
20
,
max_ratio
=
90
,
# steps=[8, 16, 32, 64, 100, 300],
aspect_ratios
=
[[
2.
],
[
2.
,
3.
],
[
2.
,
3.
],
[
2.
,
3.
],
[
2.
],
[
2.
]],
base_size
=
300
,
offset
=
0.5
,
flip
=
True
,
clip
=
True
)
return
box
,
var
def
main
(
use_cuda
):
if
use_cuda
:
# prior_box only support CPU.
return
data_shape
=
[
3
,
224
,
224
]
box
,
var
=
prior_box_output
(
data_shape
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
batch
=
[
4
]
# batch is not used in the prior_box.
assert
box
.
shape
[
1
]
==
4
assert
var
.
shape
[
1
]
==
4
assert
box
.
shape
==
var
.
shape
assert
len
(
box
.
shape
)
==
2
for
_
in
range
(
1
):
x
=
np
.
random
.
random
(
batch
+
data_shape
).
astype
(
"float32"
)
tensor_x
=
core
.
LoDTensor
()
tensor_x
.
set
(
x
,
place
)
boxes
,
vars
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
'pixel'
:
tensor_x
},
fetch_list
=
[
box
,
var
])
assert
vars
.
shape
==
var
.
shape
assert
boxes
.
shape
==
box
.
shape
class
TestFitALine
(
unittest
.
TestCase
):
def
test_cpu
(
self
):
main
(
use_cuda
=
False
)
def
test_cuda
(
self
):
main
(
use_cuda
=
True
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/fluid/tests/test_sequence_expand.py
浏览文件 @
5ccab2dc
...
@@ -73,5 +73,20 @@ class TestSequenceExpandCase3(TestSequenceExpand):
...
@@ -73,5 +73,20 @@ class TestSequenceExpandCase3(TestSequenceExpand):
self
.
inputs
=
{
'X'
:
(
x_data
,
x_lod
),
'Y'
:
(
y_data
,
y_lod
)}
self
.
inputs
=
{
'X'
:
(
x_data
,
x_lod
),
'Y'
:
(
y_data
,
y_lod
)}
class
TestSequenceExpandCase4
(
TestSequenceExpand
):
def
set_data
(
self
):
x_data
=
np
.
array
(
[
0.1
,
0.3
,
0.2
,
0.15
,
0.25
,
0.2
,
0.15
,
0.25
,
0.1
,
0.3
]).
reshape
(
[
2
,
5
]).
astype
(
'float32'
)
x_lod
=
[[
0
,
1
,
2
,
]]
y_data
=
np
.
random
.
uniform
(
0.1
,
1
,
[
2
,
1
]).
astype
(
'float32'
)
y_lod
=
[[
0
,
1
,
2
],
[
0
,
1
,
2
]]
self
.
inputs
=
{
'X'
:
(
x_data
,
x_lod
),
'Y'
:
(
y_data
,
y_lod
)}
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/v2/fluid/tests/test_split_op.py
浏览文件 @
5ccab2dc
...
@@ -20,11 +20,11 @@ from op_test import OpTest
...
@@ -20,11 +20,11 @@ from op_test import OpTest
class
TestSplitOp
(
OpTest
):
class
TestSplitOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"split"
self
.
op_type
=
"split"
axis
=
0
axis
=
1
x
=
np
.
random
.
random
((
4
,
2
,
5
)).
astype
(
'float32'
)
x
=
np
.
random
.
random
((
4
,
5
,
6
)).
astype
(
'float32'
)
out
=
np
.
split
(
x
,
[
1
,
3
],
axis
)
out
=
np
.
split
(
x
,
[
2
,
3
],
axis
)
self
.
inputs
=
{
'X'
:
x
}
self
.
inputs
=
{
'X'
:
x
}
self
.
attrs
=
{
'axis'
:
axis
,
'sections'
:
[
1
,
2
,
1
]}
self
.
attrs
=
{
'axis'
:
axis
,
'sections'
:
[
2
,
1
,
2
]}
self
.
outputs
=
{
'Out'
:
[(
'out%d'
%
i
,
out
[
i
])
\
self
.
outputs
=
{
'Out'
:
[(
'out%d'
%
i
,
out
[
i
])
\
for
i
in
xrange
(
len
(
out
))]}
for
i
in
xrange
(
len
(
out
))]}
...
...
python/paddle/v2/fluid/tests/test_target_assign_op.py
浏览文件 @
5ccab2dc
...
@@ -43,7 +43,7 @@ def gen_match_and_neg_indices(num_prior, gt_lod, neg_lod):
...
@@ -43,7 +43,7 @@ def gen_match_and_neg_indices(num_prior, gt_lod, neg_lod):
def
target_assign
(
encoded_box
,
gt_label
,
match_indices
,
neg_indices
,
gt_lod
,
def
target_assign
(
encoded_box
,
gt_label
,
match_indices
,
neg_indices
,
gt_lod
,
neg_lod
,
background_label
):
neg_lod
,
mismatch_value
):
batch_size
,
num_prior
=
match_indices
.
shape
batch_size
,
num_prior
=
match_indices
.
shape
# init target bbox
# init target bbox
...
@@ -52,7 +52,7 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod,
...
@@ -52,7 +52,7 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod,
trg_box_wt
=
np
.
zeros
((
batch_size
,
num_prior
,
1
)).
astype
(
'float32'
)
trg_box_wt
=
np
.
zeros
((
batch_size
,
num_prior
,
1
)).
astype
(
'float32'
)
# init target label
# init target label
trg_label
=
np
.
ones
((
batch_size
,
num_prior
,
1
)).
astype
(
'int32'
)
trg_label
=
np
.
ones
((
batch_size
,
num_prior
,
1
)).
astype
(
'int32'
)
trg_label
=
trg_label
*
background_label
trg_label
=
trg_label
*
mismatch_value
# init weight for target label
# init weight for target label
trg_label_wt
=
np
.
zeros
((
batch_size
,
num_prior
,
1
)).
astype
(
'float32'
)
trg_label_wt
=
np
.
zeros
((
batch_size
,
num_prior
,
1
)).
astype
(
'float32'
)
...
@@ -65,53 +65,90 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod,
...
@@ -65,53 +65,90 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod,
# target bbox
# target bbox
for
v
,
c
in
zip
(
col_val
+
gt_start
,
col_ids
[
0
].
tolist
()):
for
v
,
c
in
zip
(
col_val
+
gt_start
,
col_ids
[
0
].
tolist
()):
trg_box
[
i
][
c
][:]
=
encoded_box
[
v
][
c
][:]
trg_box
[
i
][
c
][:]
=
encoded_box
[
v
][
c
][:]
# weight for target bbox
# weight for target bbox
trg_box_wt
[
i
][
col_ids
]
=
1.0
trg_box_wt
[
i
][
col_ids
]
=
1.0
trg_label
[
i
][
col_ids
]
=
gt_label
[
col_val
+
gt_start
]
trg_label
[
i
][
col_ids
]
=
gt_label
[
col_val
+
gt_start
]
trg_label_wt
[
i
][
col_ids
]
=
1.0
trg_label_wt
[
i
][
col_ids
]
=
1.0
# set target label weight to 1.0 for the negative samples
# set target label weight to 1.0 for the negative samples
if
neg_indices
is
not
None
:
neg_ids
=
neg_indices
[
neg_lod
[
i
]:
neg_lod
[
i
+
1
]]
neg_ids
=
neg_indices
[
neg_lod
[
i
]:
neg_lod
[
i
+
1
]]
trg_label_wt
[
i
][
neg_ids
]
=
1.0
trg_label_wt
[
i
][
neg_ids
]
=
1.0
return
trg_box
,
trg_box_wt
,
trg_label
,
trg_label_wt
return
trg_box
,
trg_box_wt
,
trg_label
,
trg_label_wt
class
TestTargetAssgin
Op
(
OpTest
):
class
TestTargetAssgin
FloatType
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"target_assign"
self
.
op_type
=
"target_assign"
num_prior
=
120
num_class
=
21
gt_lod
=
[
0
,
5
,
11
,
23
]
neg_lod
=
[
0
,
4
,
7
,
13
]
mismatch_value
=
0
batch_size
=
len
(
gt_lod
)
-
1
num_gt
=
gt_lod
[
-
1
]
encoded_box
=
np
.
random
.
random
((
num_gt
,
num_prior
,
4
)).
astype
(
'float32'
)
gt_label
=
np
.
random
.
randint
(
num_class
,
size
=
(
num_gt
,
1
)).
astype
(
'int32'
)
match_indices
,
neg_indices
=
gen_match_and_neg_indices
(
num_prior
,
gt_lod
,
neg_lod
)
out
,
out_wt
,
_
,
_
=
target_assign
(
encoded_box
,
gt_label
,
match_indices
,
neg_indices
,
gt_lod
,
neg_lod
,
mismatch_value
)
# assign regression targets
x
=
encoded_box
self
.
inputs
=
{
'X'
:
(
x
,
[
gt_lod
]),
'MatchIndices'
:
match_indices
,
}
self
.
attrs
=
{
'mismatch_value'
:
mismatch_value
}
self
.
outputs
=
{
'Out'
:
out
,
'OutWeight'
:
out_wt
,
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestTargetAssginIntType
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"target_assign"
num_prior
=
120
num_prior
=
120
num_class
=
21
num_class
=
21
gt_lod
=
[
0
,
5
,
11
,
23
]
gt_lod
=
[
0
,
5
,
11
,
23
]
neg_lod
=
[
0
,
4
,
7
,
13
]
neg_lod
=
[
0
,
4
,
7
,
13
]
mismatch_value
=
0
batch_size
=
len
(
gt_lod
)
-
1
batch_size
=
len
(
gt_lod
)
-
1
num_gt
=
gt_lod
[
-
1
]
num_gt
=
gt_lod
[
-
1
]
background_label
=
0
encoded_box
=
np
.
random
.
random
((
num_gt
,
num_prior
,
4
)).
astype
(
'float32'
)
encoded_box
=
np
.
random
.
random
((
num_gt
,
num_prior
,
4
)).
astype
(
'float32'
)
gt_label
=
np
.
random
.
randint
(
gt_label
=
np
.
random
.
randint
(
num_class
,
size
=
(
num_gt
,
1
)).
astype
(
'int32'
)
num_class
,
size
=
(
num_gt
,
1
)).
astype
(
'int32'
)
match_indices
,
neg_indices
=
gen_match_and_neg_indices
(
num_prior
,
match_indices
,
neg_indices
=
gen_match_and_neg_indices
(
num_prior
,
gt_lod
,
neg_lod
)
gt_lod
,
neg_lod
)
trg_box
,
trg_box_wt
,
trg_label
,
trg_label_wt
=
target_assign
(
encoded_box
,
gt_label
,
match_indices
,
neg_indices
,
gt_lod
,
neg_lod
,
background_label
)
_
,
_
,
out
,
out_wt
,
=
target_assign
(
encoded_box
,
gt_label
,
match_indices
,
neg_indices
,
gt_lod
,
neg_lod
,
mismatch_value
)
# assign cassification argets
x
=
np
.
reshape
(
gt_label
,
(
num_gt
,
1
,
1
))
self
.
inputs
=
{
self
.
inputs
=
{
'EncodedGTBBox'
:
(
encoded_box
,
[
gt_lod
]),
'X'
:
(
x
,
[
gt_lod
]),
'GTScoreLabel'
:
(
gt_label
,
[
gt_lod
]),
'MatchIndices'
:
match_indices
,
'MatchIndices'
:
(
match_indices
),
'NegIndices'
:
(
neg_indices
,
[
neg_lod
]),
'NegIndices'
:
(
neg_indices
,
[
neg_lod
]),
}
}
self
.
attrs
=
{
'
background_label'
:
background_label
}
self
.
attrs
=
{
'
mismatch_value'
:
mismatch_value
}
self
.
outputs
=
{
self
.
outputs
=
{
'PredBBoxLabel'
:
(
trg_box
),
'Out'
:
out
,
'PredBBoxWeight'
:
(
trg_box_wt
),
'OutWeight'
:
out_wt
,
'PredScoreLabel'
:
(
trg_label
),
'PredScoreWeight'
:
(
trg_label_wt
),
}
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录