Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
11bcb43a
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
11bcb43a
编写于
2月 11, 2018
作者:
T
typhoonzero
浏览文件
操作
浏览文件
下载
差异文件
fix merge issue
上级
a8b630c8
4f4abfa3
变更
43
隐藏空白更改
内联
并排
Showing
43 changed file
with
612 addition
and
465 deletion
+612
-465
AUTHORS.md
AUTHORS.md
+1
-1
doc/templates/conf.py.cn.in
doc/templates/conf.py.cn.in
+1
-1
doc/templates/conf.py.en.in
doc/templates/conf.py.en.in
+1
-1
paddle/CMakeLists.txt
paddle/CMakeLists.txt
+0
-1
paddle/fluid/CMakeLists.txt
paddle/fluid/CMakeLists.txt
+1
-0
paddle/fluid/framework/ddim.cc
paddle/fluid/framework/ddim.cc
+10
-0
paddle/fluid/framework/ddim.h
paddle/fluid/framework/ddim.h
+2
-0
paddle/fluid/framework/init.cc
paddle/fluid/framework/init.cc
+1
-1
paddle/fluid/framework/mixed_vector.h
paddle/fluid/framework/mixed_vector.h
+9
-4
paddle/fluid/framework/mixed_vector_test.cu
paddle/fluid/framework/mixed_vector_test.cu
+11
-4
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+1
-1
paddle/fluid/operators/concat_op.h
paddle/fluid/operators/concat_op.h
+19
-19
paddle/fluid/operators/listen_and_serv_op.cc
paddle/fluid/operators/listen_and_serv_op.cc
+4
-4
paddle/fluid/operators/send_recv_op_test.cc
paddle/fluid/operators/send_recv_op_test.cc
+1
-1
paddle/fluid/operators/sequence_expand_op.cc
paddle/fluid/operators/sequence_expand_op.cc
+3
-1
paddle/fluid/operators/split_op.h
paddle/fluid/operators/split_op.h
+10
-9
paddle/fluid/operators/strided_memcpy.h
paddle/fluid/operators/strided_memcpy.h
+57
-0
paddle/fluid/operators/target_assign_op.cc
paddle/fluid/operators/target_assign_op.cc
+76
-117
paddle/fluid/operators/target_assign_op.cu
paddle/fluid/operators/target_assign_op.cu
+22
-20
paddle/fluid/operators/target_assign_op.h
paddle/fluid/operators/target_assign_op.h
+71
-98
paddle/fluid/platform/cpu_info_test.cc
paddle/fluid/platform/cpu_info_test.cc
+1
-1
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+2
-2
paddle/fluid/platform/enforce_test.cc
paddle/fluid/platform/enforce_test.cc
+1
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+1
-1
paddle/fluid/string/.clang-format
paddle/fluid/string/.clang-format
+0
-0
paddle/fluid/string/CMakeLists.txt
paddle/fluid/string/CMakeLists.txt
+0
-0
paddle/fluid/string/piece.cc
paddle/fluid/string/piece.cc
+1
-1
paddle/fluid/string/piece.h
paddle/fluid/string/piece.h
+2
-2
paddle/fluid/string/piece_test.cc
paddle/fluid/string/piece_test.cc
+1
-1
paddle/fluid/string/printf.h
paddle/fluid/string/printf.h
+1
-1
paddle/fluid/string/printf_test.cc
paddle/fluid/string/printf_test.cc
+3
-3
paddle/fluid/string/tinyformat/tinyformat.h
paddle/fluid/string/tinyformat/tinyformat.h
+41
-65
paddle/fluid/string/to_string.h
paddle/fluid/string/to_string.h
+0
-0
paddle/fluid/string/to_string_test.cc
paddle/fluid/string/to_string_test.cc
+2
-2
paddle/scripts/docker/build.sh
paddle/scripts/docker/build.sh
+2
-2
paddle/scripts/travis/build_doc.sh
paddle/scripts/travis/build_doc.sh
+3
-3
python/paddle/v2/fluid/distribute_transpiler.py
python/paddle/v2/fluid/distribute_transpiler.py
+172
-70
python/paddle/v2/fluid/layers/math_op_patch.py
python/paddle/v2/fluid/layers/math_op_patch.py
+1
-0
python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py
...le/v2/fluid/tests/book_distribute/notest_dist_word2vec.py
+1
-1
python/paddle/v2/fluid/tests/test_cpp_reader.py
python/paddle/v2/fluid/tests/test_cpp_reader.py
+1
-3
python/paddle/v2/fluid/tests/test_sequence_expand.py
python/paddle/v2/fluid/tests/test_sequence_expand.py
+15
-0
python/paddle/v2/fluid/tests/test_split_op.py
python/paddle/v2/fluid/tests/test_split_op.py
+4
-4
python/paddle/v2/fluid/tests/test_target_assign_op.py
python/paddle/v2/fluid/tests/test_target_assign_op.py
+56
-19
未找到文件。
AUTHORS.md
浏览文件 @
11bcb43a
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
|---|---|
|---|---|
| backyes | Yan-Fei Wang |
| backyes | Yan-Fei Wang |
| beckett1124 | Bin Qi |
| beckett1124 | Bin Qi |
|
Canpio
| Jia-Yi Feng |
|
JiayiFeng
| Jia-Yi Feng |
| chengxiaohua1105 | Xiao-Hua Cheng |
| chengxiaohua1105 | Xiao-Hua Cheng |
| cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang |
| cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang |
| cxysteven | Xing-Yi Cheng |
| cxysteven | Xing-Yi Cheng |
...
...
doc/templates/conf.py.cn.in
浏览文件 @
11bcb43a
...
@@ -82,7 +82,7 @@ language = 'zh_CN'
...
@@ -82,7 +82,7 @@ language = 'zh_CN'
# List of patterns, relative to source directory, that match files and
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# directories to ignore when looking for source files.
exclude_patterns = ['_build', '**/*_en*', '*_en*']
exclude_patterns = ['_build', '**/*_en*', '*_en*'
, 'api/*'
]
# The reST default role (used for this markup: `text`) to use for all
# The reST default role (used for this markup: `text`) to use for all
# documents.
# documents.
...
...
doc/templates/conf.py.en.in
浏览文件 @
11bcb43a
...
@@ -82,7 +82,7 @@ language = None
...
@@ -82,7 +82,7 @@ language = None
# List of patterns, relative to source directory, that match files and
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# directories to ignore when looking for source files.
exclude_patterns = ['_build', '**/*_cn*', '*_cn*']
exclude_patterns = ['_build', '**/*_cn*', '*_cn*'
, 'api/*'
]
# The reST default role (used for this markup: `text`) to use for all
# The reST default role (used for this markup: `text`) to use for all
# documents.
# documents.
...
...
paddle/CMakeLists.txt
浏览文件 @
11bcb43a
...
@@ -11,7 +11,6 @@ if(MOBILE_INFERENCE)
...
@@ -11,7 +11,6 @@ if(MOBILE_INFERENCE)
else
()
else
()
add_subdirectory
(
pserver
)
add_subdirectory
(
pserver
)
add_subdirectory
(
trainer
)
add_subdirectory
(
trainer
)
add_subdirectory
(
string
)
add_subdirectory
(
scripts
)
add_subdirectory
(
scripts
)
if
(
WITH_C_API
)
if
(
WITH_C_API
)
...
...
paddle/fluid/CMakeLists.txt
浏览文件 @
11bcb43a
...
@@ -4,3 +4,4 @@ add_subdirectory(framework)
...
@@ -4,3 +4,4 @@ add_subdirectory(framework)
add_subdirectory
(
operators
)
add_subdirectory
(
operators
)
add_subdirectory
(
pybind
)
add_subdirectory
(
pybind
)
add_subdirectory
(
inference
)
add_subdirectory
(
inference
)
add_subdirectory
(
string
)
paddle/fluid/framework/ddim.cc
浏览文件 @
11bcb43a
...
@@ -314,5 +314,15 @@ DDim stride(const DDim& ddim) {
...
@@ -314,5 +314,15 @@ DDim stride(const DDim& ddim) {
}
}
return
framework
::
make_ddim
(
strides
);
return
framework
::
make_ddim
(
strides
);
}
}
DDim
stride_numel
(
const
framework
::
DDim
&
ddim
)
{
std
::
vector
<
int64_t
>
strides
(
ddim
.
size
());
strides
[
ddim
.
size
()
-
1
]
=
ddim
[
ddim
.
size
()
-
1
];
for
(
int
i
=
ddim
.
size
()
-
2
;
i
>=
0
;
--
i
)
{
strides
[
i
]
=
strides
[
i
+
1
]
*
ddim
[
i
];
}
return
framework
::
make_ddim
(
strides
);
}
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/ddim.h
浏览文件 @
11bcb43a
...
@@ -125,6 +125,8 @@ DDim flatten_to_2d(const DDim& src, int num_col_dims);
...
@@ -125,6 +125,8 @@ DDim flatten_to_2d(const DDim& src, int num_col_dims);
DDim
flatten_to_1d
(
const
DDim
&
src
);
DDim
flatten_to_1d
(
const
DDim
&
src
);
DDim
stride
(
const
DDim
&
ddim
);
DDim
stride
(
const
DDim
&
ddim
);
DDim
stride_numel
(
const
DDim
&
ddim
);
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
...
...
paddle/fluid/framework/init.cc
浏览文件 @
11bcb43a
...
@@ -20,7 +20,7 @@ limitations under the License. */
...
@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/string/piece.h"
#include "paddle/
fluid/
string/piece.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/framework/mixed_vector.h
浏览文件 @
11bcb43a
...
@@ -37,9 +37,8 @@ class Vector {
...
@@ -37,9 +37,8 @@ class Vector {
// Fill vector with value. The vector size is `count`.
// Fill vector with value. The vector size is `count`.
explicit
Vector
(
size_t
count
,
const
T
&
value
=
T
())
{
explicit
Vector
(
size_t
count
,
const
T
&
value
=
T
())
{
if
(
count
==
0
)
{
InitEmpty
();
InitEmpty
();
if
(
count
!=
0
)
{
}
else
{
resize
(
count
);
resize
(
count
);
T
*
ptr
=
begin
();
T
*
ptr
=
begin
();
for
(
size_t
i
=
0
;
i
<
count
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
count
;
++
i
)
{
...
@@ -122,6 +121,10 @@ class Vector {
...
@@ -122,6 +121,10 @@ class Vector {
const
T
*
begin
()
const
{
return
&
this
->
operator
[](
0
);
}
const
T
*
begin
()
const
{
return
&
this
->
operator
[](
0
);
}
const
T
*
end
()
const
{
return
&
this
->
operator
[](
size
());
}
const
T
*
end
()
const
{
return
&
this
->
operator
[](
size
());
}
const
T
*
cbegin
()
const
{
return
begin
();
}
const
T
*
cend
()
const
{
return
end
();
}
const
T
&
back
()
const
{
const
T
&
back
()
const
{
auto
it
=
end
();
auto
it
=
end
();
--
it
;
--
it
;
...
@@ -244,7 +247,9 @@ class Vector {
...
@@ -244,7 +247,9 @@ class Vector {
bool
operator
==
(
const
Vector
<
T
>&
other
)
const
{
bool
operator
==
(
const
Vector
<
T
>&
other
)
const
{
if
(
size
()
!=
other
.
size
())
return
false
;
if
(
size
()
!=
other
.
size
())
return
false
;
for
(
auto
it1
=
begin
(),
it2
=
other
.
begin
();
it1
<
end
();
++
it1
,
++
it2
)
{
auto
it1
=
cbegin
();
auto
it2
=
other
.
cbegin
();
for
(;
it1
<
cend
();
++
it1
,
++
it2
)
{
if
(
*
it1
!=
*
it2
)
{
if
(
*
it1
!=
*
it2
)
{
return
false
;
return
false
;
}
}
...
...
paddle/fluid/framework/mixed_vector_test.cu
浏览文件 @
11bcb43a
...
@@ -26,10 +26,10 @@ TEST(mixed_vector, CPU_VECTOR) {
...
@@ -26,10 +26,10 @@ TEST(mixed_vector, CPU_VECTOR) {
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
tmp
.
push_back
(
i
);
tmp
.
push_back
(
i
);
}
}
ASSERT_EQ
(
tmp
.
size
(),
10
);
ASSERT_EQ
(
tmp
.
size
(),
10
UL
);
vec
<
int
>
tmp2
;
vec
<
int
>
tmp2
;
tmp2
=
tmp
;
tmp2
=
tmp
;
ASSERT_EQ
(
tmp2
.
size
(),
10
);
ASSERT_EQ
(
tmp2
.
size
(),
10
UL
);
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
ASSERT_EQ
(
tmp2
[
i
],
i
);
ASSERT_EQ
(
tmp2
[
i
],
i
);
ASSERT_EQ
(
tmp2
[
i
],
tmp
[
i
]);
ASSERT_EQ
(
tmp2
[
i
],
tmp
[
i
]);
...
@@ -58,7 +58,7 @@ TEST(mixed_vector, GPU_VECTOR) {
...
@@ -58,7 +58,7 @@ TEST(mixed_vector, GPU_VECTOR) {
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
tmp
.
push_back
(
i
);
tmp
.
push_back
(
i
);
}
}
ASSERT_EQ
(
tmp
.
size
(),
10
);
ASSERT_EQ
(
tmp
.
size
(),
10
UL
);
paddle
::
platform
::
CUDAPlace
gpu
(
0
);
paddle
::
platform
::
CUDAPlace
gpu
(
0
);
multiply_10
<<<
1
,
1
,
0
,
GetCUDAStream
(
gpu
)
>>>
(
tmp
.
MutableData
(
gpu
));
multiply_10
<<<
1
,
1
,
0
,
GetCUDAStream
(
gpu
)
>>>
(
tmp
.
MutableData
(
gpu
));
...
@@ -79,7 +79,7 @@ TEST(mixed_vector, MultiGPU) {
...
@@ -79,7 +79,7 @@ TEST(mixed_vector, MultiGPU) {
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
tmp
.
push_back
(
i
);
tmp
.
push_back
(
i
);
}
}
ASSERT_EQ
(
tmp
.
size
(),
10
);
ASSERT_EQ
(
tmp
.
size
(),
10
UL
);
paddle
::
platform
::
CUDAPlace
gpu0
(
0
);
paddle
::
platform
::
CUDAPlace
gpu0
(
0
);
paddle
::
platform
::
SetDeviceId
(
0
);
paddle
::
platform
::
SetDeviceId
(
0
);
multiply_10
<<<
1
,
1
,
0
,
GetCUDAStream
(
gpu0
)
>>>
(
tmp
.
MutableData
(
gpu0
));
multiply_10
<<<
1
,
1
,
0
,
GetCUDAStream
(
gpu0
)
>>>
(
tmp
.
MutableData
(
gpu0
));
...
@@ -91,3 +91,10 @@ TEST(mixed_vector, MultiGPU) {
...
@@ -91,3 +91,10 @@ TEST(mixed_vector, MultiGPU) {
ASSERT_EQ
(
tmp
[
i
],
i
*
100
);
ASSERT_EQ
(
tmp
[
i
],
i
*
100
);
}
}
}
}
TEST
(
mixed_vector
,
InitWithCount
)
{
paddle
::
framework
::
Vector
<
int
>
vec
(
10
,
10
);
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
ASSERT_EQ
(
vec
[
i
],
10
);
}
}
paddle/fluid/framework/scope.cc
浏览文件 @
11bcb43a
...
@@ -18,7 +18,7 @@ limitations under the License. */
...
@@ -18,7 +18,7 @@ limitations under the License. */
#include <mutex> // for call_once
#include <mutex> // for call_once
#include "glog/logging.h"
#include "glog/logging.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/string/printf.h"
#include "paddle/
fluid/
string/printf.h"
DEFINE_bool
(
benchmark
,
false
,
DEFINE_bool
(
benchmark
,
false
,
"Doing memory benchmark. It will make deleting scope synchronized, "
"Doing memory benchmark. It will make deleting scope synchronized, "
...
...
paddle/fluid/operators/concat_op.h
浏览文件 @
11bcb43a
...
@@ -28,17 +28,18 @@ class ConcatKernel : public framework::OpKernel<T> {
...
@@ -28,17 +28,18 @@ class ConcatKernel : public framework::OpKernel<T> {
auto
ins
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"X"
);
auto
ins
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
int64_t
axis
=
static_cast
<
int64_t
>
(
ctx
.
Attr
<
int
>
(
"axis"
));
int64_t
axis
=
static_cast
<
int64_t
>
(
ctx
.
Attr
<
int
>
(
"axis"
));
const
size_t
n
=
ins
.
size
();
auto
place
=
ctx
.
GetPlace
();
out
->
mutable_data
<
T
>
(
place
);
auto
out_stride
=
framework
::
stride_numel
(
out
->
dims
());
size_t
output_offset
=
0
;
size_t
output_offset
=
0
;
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
for
(
auto
*
in
:
ins
)
{
auto
out_stride
=
framework
::
stride
(
out
->
dims
());
auto
in_stride
=
framework
::
stride_numel
(
in
->
dims
());
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
StridedNumelCopyWithAxis
<
T
>
(
ctx
.
device_context
(),
axis
,
auto
&
in
=
ins
[
i
];
out
->
data
<
T
>
()
+
output_offset
,
out_stride
,
auto
axis_dim
=
in
->
dims
()[
axis
];
in
->
data
<
T
>
(),
in_stride
);
auto
in_stride
=
framework
::
stride
(
in
->
dims
());
output_offset
+=
in_stride
[
axis
];
StridedMemcpy
<
T
>
(
ctx
.
device_context
(),
in
->
data
<
T
>
(),
in_stride
,
in
->
dims
(),
out_stride
,
out
->
data
<
T
>
()
+
output_offset
);
output_offset
+=
axis_dim
*
in_stride
[
axis
];
}
}
}
}
};
};
...
@@ -50,17 +51,16 @@ class ConcatGradKernel : public framework::OpKernel<T> {
...
@@ -50,17 +51,16 @@ class ConcatGradKernel : public framework::OpKernel<T> {
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
outs
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
outs
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
int64_t
axis
=
static_cast
<
int64_t
>
(
ctx
.
Attr
<
int
>
(
"axis"
));
int64_t
axis
=
static_cast
<
int64_t
>
(
ctx
.
Attr
<
int
>
(
"axis"
));
const
size_t
n
=
outs
.
size
();
size_t
input_offset
=
0
;
size_t
input_offset
=
0
;
auto
in_stride
=
framework
::
stride
(
in
->
dims
());
auto
in_stride
=
framework
::
stride
_numel
(
in
->
dims
());
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
auto
&
out
=
outs
[
i
];
for
(
auto
&
out
:
outs
)
{
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
size_t
axis_dim
=
out
->
dims
()[
axis
]
;
auto
out_stride
=
framework
::
stride_numel
(
out
->
dims
())
;
auto
out_stride
=
framework
::
stride
(
out
->
dims
());
StridedNumelCopyWithAxis
<
T
>
(
ctx
.
device_context
(),
axis
,
out
->
data
<
T
>
(),
StridedMemcpy
<
T
>
(
ctx
.
device_context
()
,
in
->
data
<
T
>
()
+
input_offset
,
out_stride
,
in
->
data
<
T
>
()
+
input_offset
,
in_stride
,
out
->
dims
(),
out_stride
,
out
->
data
<
T
>
()
);
in_stride
);
input_offset
+=
axis_dim
*
in
_stride
[
axis
];
input_offset
+=
out
_stride
[
axis
];
}
}
}
}
};
};
...
...
paddle/fluid/operators/listen_and_serv_op.cc
浏览文件 @
11bcb43a
...
@@ -27,7 +27,7 @@ limitations under the License. */
...
@@ -27,7 +27,7 @@ limitations under the License. */
#include "paddle/fluid/operators/detail/grpc_server.h"
#include "paddle/fluid/operators/detail/grpc_server.h"
#include "paddle/fluid/operators/detail/sendrecvop_utils.h"
#include "paddle/fluid/operators/detail/sendrecvop_utils.h"
#include "paddle/fluid/operators/detail/simple_block_queue.h"
#include "paddle/fluid/operators/detail/simple_block_queue.h"
#include "paddle/string/printf.h"
#include "paddle/
fluid/
string/printf.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -98,6 +98,7 @@ class ListenAndServOp : public framework::OperatorBase {
...
@@ -98,6 +98,7 @@ class ListenAndServOp : public framework::OperatorBase {
// the gradients arrives, just add suffix 0~n and merge the gradient.
// the gradients arrives, just add suffix 0~n and merge the gradient.
rpc_service_
->
SetCond
(
0
);
rpc_service_
->
SetCond
(
0
);
size_t
recv_var_cnt
=
0
;
size_t
recv_var_cnt
=
0
;
size_t
update_param_cnt
=
0
;
int
batch_barrier
=
0
;
int
batch_barrier
=
0
;
while
(
batch_barrier
!=
fan_in
)
{
while
(
batch_barrier
!=
fan_in
)
{
const
detail
::
MessageWithName
&
v
=
rpc_service_
->
Get
();
const
detail
::
MessageWithName
&
v
=
rpc_service_
->
Get
();
...
@@ -122,11 +123,10 @@ class ListenAndServOp : public framework::OperatorBase {
...
@@ -122,11 +123,10 @@ class ListenAndServOp : public framework::OperatorBase {
}
}
}
}
VLOG
(
3
)
<<
"recv "
<<
recv_var_cnt
<<
" parmeters for one barrier."
;
VLOG
(
3
)
<<
"recv "
<<
recv_var_cnt
<<
" parmeters for one barrier."
;
// TODO(Yancey1989): merge SelectedRows variables here
if
(
exit_flag
)
{
if
(
exit_flag
)
{
rpc_service_
->
ShutDown
();
rpc_service_
->
ShutDown
();
}
}
VLOG
(
3
)
<<
"run optimize graph..."
;
try
{
try
{
executor
.
Run
(
*
program
,
&
recv_scope
,
block
->
ID
(),
/*global_block*/
executor
.
Run
(
*
program
,
&
recv_scope
,
block
->
ID
(),
/*global_block*/
false
/*create_local_scope*/
,
false
/*create_vars*/
);
false
/*create_local_scope*/
,
false
/*create_vars*/
);
...
@@ -134,7 +134,7 @@ class ListenAndServOp : public framework::OperatorBase {
...
@@ -134,7 +134,7 @@ class ListenAndServOp : public framework::OperatorBase {
LOG
(
ERROR
)
<<
"run sub program error "
<<
e
.
what
();
LOG
(
ERROR
)
<<
"run sub program error "
<<
e
.
what
();
}
}
rpc_service_
->
SetCond
(
1
);
rpc_service_
->
SetCond
(
1
);
rpc_service_
->
WaitClientGet
(
recv_var
_cnt
);
rpc_service_
->
WaitClientGet
(
update_param
_cnt
);
grads_counter_
.
clear
();
grads_counter_
.
clear
();
}
// while(true)
}
// while(true)
}
}
...
...
paddle/fluid/operators/send_recv_op_test.cc
浏览文件 @
11bcb43a
...
@@ -22,7 +22,7 @@ limitations under the License. */
...
@@ -22,7 +22,7 @@ limitations under the License. */
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/string/printf.h"
#include "paddle/
fluid/
string/printf.h"
USE_NO_KERNEL_OP
(
send
);
USE_NO_KERNEL_OP
(
send
);
USE_NO_KERNEL_OP
(
listen_and_serv
);
USE_NO_KERNEL_OP
(
listen_and_serv
);
...
...
paddle/fluid/operators/sequence_expand_op.cc
浏览文件 @
11bcb43a
...
@@ -29,7 +29,9 @@ class SequenceExpandOp : public framework::OperatorWithKernel {
...
@@ -29,7 +29,9 @@ class SequenceExpandOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
));
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Y"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Y"
));
framework
::
DDim
out_dim
;
framework
::
DDim
out_dim
;
out_dim
=
ctx
->
GetInputDim
(
"Y"
);
auto
y_dim
=
ctx
->
GetInputDim
(
"Y"
);
out_dim
=
ctx
->
GetInputDim
(
"X"
);
out_dim
[
0
]
=
y_dim
[
0
];
ctx
->
ShareLoD
(
"Y"
,
"Out"
);
ctx
->
ShareLoD
(
"Y"
,
"Out"
);
ctx
->
SetOutputDim
(
"Out"
,
out_dim
);
ctx
->
SetOutputDim
(
"Out"
,
out_dim
);
}
}
...
...
paddle/fluid/operators/split_op.h
浏览文件 @
11bcb43a
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <chrono>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/operators/strided_memcpy.h"
...
@@ -27,18 +28,18 @@ class SplitOpKernel : public framework::OpKernel<T> {
...
@@ -27,18 +28,18 @@ class SplitOpKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
outs
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
"Out"
);
auto
outs
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
"Out"
);
auto
in_stride
=
framework
::
stride
(
in
->
dims
());
auto
in_stride
=
framework
::
stride
_numel
(
in
->
dims
());
int64_t
axis
=
static_cast
<
int64_t
>
(
ctx
.
Attr
<
int
>
(
"axis"
));
int64_t
axis
=
static_cast
<
int64_t
>
(
ctx
.
Attr
<
int
>
(
"axis"
));
const
size_t
n
=
outs
.
size
();
auto
place
=
ctx
.
GetPlace
();
size_t
input_offset
=
0
;
size_t
input_offset
=
0
;
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
for
(
auto
&
out
:
outs
)
{
auto
&
out
=
outs
[
i
];
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
size_t
axis_dim
=
out
->
dims
()[
axis
]
;
auto
out_stride
=
framework
::
stride_numel
(
out
->
dims
())
;
auto
out_stride
=
framework
::
stride
(
out
->
dims
());
StridedNumelCopyWithAxis
<
T
>
(
ctx
.
device_context
(),
axis
,
out
->
data
<
T
>
(),
StridedMemcpy
<
T
>
(
ctx
.
device_context
()
,
in
->
data
<
T
>
()
+
input_offset
,
out_stride
,
in
->
data
<
T
>
()
+
input_offset
,
in_stride
,
out
->
dims
(),
out_stride
,
out
->
data
<
T
>
()
);
in_stride
);
input_offset
+=
axis_dim
*
in
_stride
[
axis
];
input_offset
+=
out
_stride
[
axis
];
}
}
}
}
};
};
...
...
paddle/fluid/operators/strided_memcpy.h
浏览文件 @
11bcb43a
...
@@ -41,5 +41,62 @@ inline void StridedMemcpy(const platform::DeviceContext& dev_ctx, const T* src,
...
@@ -41,5 +41,62 @@ inline void StridedMemcpy(const platform::DeviceContext& dev_ctx, const T* src,
StridedCopyDimVisitor
<
T
>
func
(
dev_ctx
,
src
,
src_stride
,
dst_stride
,
dst
);
StridedCopyDimVisitor
<
T
>
func
(
dev_ctx
,
src
,
src_stride
,
dst_stride
,
dst
);
boost
::
apply_visitor
(
func
,
dst_dim
);
boost
::
apply_visitor
(
func
,
dst_dim
);
}
}
// Strided numel memory copy from src to dst by the specified axis
//
// For example, for a tensor dims [4, 20, 100], the strieded numel is
// [8000, 2000, 100]
//
// NOTE: The src and dst tensor should have the same elements
// except the specified axis.
template
<
typename
T
>
inline
void
StridedNumelCopyWithAxis
(
const
platform
::
DeviceContext
&
ctx
,
int64_t
axis
,
T
*
dst
,
const
framework
::
DDim
&
dst_stride_numel
,
const
T
*
src
,
const
framework
::
DDim
&
src_stride_numel
)
{
int64_t
before
=
dst_stride_numel
[
0
]
/
dst_stride_numel
[
axis
];
int64_t
src_after
=
src_stride_numel
[
axis
];
int64_t
dst_after
=
dst_stride_numel
[
axis
];
auto
place
=
ctx
.
GetPlace
();
PADDLE_ENFORCE_EQ
(
src_stride_numel
.
size
(),
dst_stride_numel
.
size
(),
"src and dst tensor should have the same dims size."
);
for
(
int64_t
i
=
0
;
i
<
axis
;
++
i
)
{
if
(
i
<
axis
)
{
PADDLE_ENFORCE_EQ
(
src_stride_numel
[
i
]
/
src_stride_numel
[
axis
],
dst_stride_numel
[
i
]
/
dst_stride_numel
[
axis
],
"src and dst should have the same elements "
"except the specified axis."
);
}
else
if
(
i
==
axis
)
{
continue
;
}
else
{
PADDLE_ENFORCE_EQ
(
src_stride_numel
[
i
],
dst_stride_numel
[
i
],
"src and dst should have the same elements "
"except the specified axis."
);
}
}
for
(
int64_t
i
=
0
;
i
<
before
;
++
i
)
{
if
(
platform
::
is_cpu_place
(
place
))
{
auto
&
cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
place
);
memory
::
Copy
(
cpu_place
,
dst
+
i
*
dst_after
,
cpu_place
,
src
+
i
*
src_after
,
sizeof
(
T
)
*
src_after
);
}
else
{
#ifdef PADDLE_WITH_CUDA
auto
&
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
);
auto
&
cuda_ctx
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
);
memory
::
Copy
(
gpu_place
,
dst
+
i
*
dst_after
,
gpu_place
,
src
+
i
*
src_after
,
sizeof
(
T
)
*
src_after
,
cuda_ctx
.
stream
());
#else
PADDLE_THROW
(
"Paddle is not compiled with GPU"
);
#endif
}
}
}
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
paddle/fluid/operators/target_assign_op.cc
浏览文件 @
11bcb43a
...
@@ -22,69 +22,43 @@ class TargetAssignOp : public framework::OperatorWithKernel {
...
@@ -22,69 +22,43 @@ class TargetAssignOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
// checkout inputs
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"EncodedGTBBox"
),
"Input(X) of TargetAssignOp should not be null"
);
"Input(EncodedGTBBox) of TargetAssignOp should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"GTScoreLabel"
),
"Input(GTScoreLabel) of TargetAssignOp should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"MatchIndices"
),
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"MatchIndices"
),
"Input(MatchIndices) of TargetAssignOp should not be null"
);
"Input(MatchIndices) of TargetAssignOp should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"NegIndices"
),
"Input(NegIndices) of TargetAssignOp should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of TargetAssignOp should not be null."
);
// checkout outputs
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"OutWeight"
),
PADDLE_ENFORCE
(
"Output(OutWeight) of TargetAssignOp should not be null."
);
ctx
->
HasOutput
(
"PredBBoxLabel"
),
"Output(PredBBoxLabel) of TargetAssignOp should not be null."
);
auto
in_dims
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"PredBBoxWeight"
),
"Output(PredBBoxWeight) of TargetAssignOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"PredScoreLabel"
),
"Output(PredScoreLabel) of TargetAssignOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"PredScoreWeight"
),
"Output(PredScoreWeight) of TargetAssignOp should not be null."
);
auto
blabel_dims
=
ctx
->
GetInputDim
(
"EncodedGTBBox"
);
auto
slabel_dims
=
ctx
->
GetInputDim
(
"GTScoreLabel"
);
auto
mi_dims
=
ctx
->
GetInputDim
(
"MatchIndices"
);
auto
mi_dims
=
ctx
->
GetInputDim
(
"MatchIndices"
);
auto
neg_dims
=
ctx
->
GetInputDim
(
"NegIndices"
);
PADDLE_ENFORCE_EQ
(
blabel_dims
.
size
(),
3UL
,
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
3
,
"The rank of Input(X) must be 3."
);
"The rank of Input(EncodedGTBBox) must be 3."
);
PADDLE_ENFORCE_EQ
(
mi_dims
.
size
(),
2
,
PADDLE_ENFORCE_EQ
(
slabel_dims
.
size
(),
2UL
,
"The rank of Input(GTScoreLabel) must be 2."
);
PADDLE_ENFORCE_EQ
(
mi_dims
.
size
(),
2UL
,
"The rank of Input(MatchIndices) must be 2."
);
"The rank of Input(MatchIndices) must be 2."
);
PADDLE_ENFORCE_EQ
(
neg_dims
.
size
(),
2UL
,
"The rank of Input(NegIndices) must be 2."
);
if
(
ctx
->
HasInput
(
"NegIndices"
))
{
auto
neg_dims
=
ctx
->
GetInputDim
(
"NegIndices"
);
PADDLE_ENFORCE_EQ
(
blabel_dims
[
0
],
slabel_dims
[
0
],
PADDLE_ENFORCE_EQ
(
neg_dims
.
size
(),
2
,
"The 1st dimension (means the total number of "
"The rank of Input(NegIndices) must be 2."
);
"ground-truth bounding boxes) of Input(EncodedGTBBox) "
PADDLE_ENFORCE_EQ
(
neg_dims
[
1
],
1
,
"and Input(GTScoreLabel) must be the same."
);
"The last dimenstion of Out(NegIndices) must be 1."
);
PADDLE_ENFORCE_EQ
(
blabel_dims
[
1
],
mi_dims
[
1
],
}
"The 2nd dimension (means the number of priod boxes) "
"of Input(EncodedGTBBox) and "
"Input(MatchIndices) must be the same."
);
PADDLE_ENFORCE_EQ
(
blabel_dims
[
2
],
4
,
"The 3rd dimension of Input(EncodedGTBBox) must be 4."
);
auto
n
=
mi_dims
[
0
];
auto
n
=
mi_dims
[
0
];
auto
np
=
mi_dims
[
1
];
auto
m
=
mi_dims
[
1
];
ctx
->
SetOutputDim
(
"PredBBoxLabel"
,
{
n
,
np
,
4
});
auto
k
=
in_dims
[
in_dims
.
size
()
-
1
];
ctx
->
SetOutputDim
(
"PredBBoxWeight"
,
{
n
,
np
,
1
});
ctx
->
SetOutputDim
(
"Out"
,
{
n
,
m
,
k
});
ctx
->
SetOutputDim
(
"PredScoreLabel"
,
{
n
,
np
,
1
});
ctx
->
SetOutputDim
(
"OutWeight"
,
{
n
,
m
,
1
});
ctx
->
SetOutputDim
(
"PredScoreWeight"
,
{
n
,
np
,
1
});
}
}
protected:
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
)
->
type
()),
ctx
.
Input
<
framework
::
LoDTensor
>
(
"EncodedGTBBox"
)
->
type
()),
ctx
.
device_context
());
ctx
.
device_context
());
}
}
};
};
...
@@ -93,102 +67,87 @@ class TargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -93,102 +67,87 @@ class TargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
TargetAssignOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
TargetAssignOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"EncodedGTBBox"
,
AddInput
(
"X"
,
"(LoDTensor), The encoded ground-truth bounding boxes with shape "
"(LoDTensor), This input is a 3D LoDTensor with shape [M, P, K]. "
"[Ng, Np, 4], where Ng is the total number of ground-truth boxes "
"Some elements in X will be assigned to Out based on the "
"in this mini-batch, Np the number of predictions, 4 is the "
"MatchIndices and NegIndices."
);
"number of coordinate in [xmin, ymin, xmax, ymax] layout."
);
AddInput
(
"GTScoreLabel"
,
"(LoDTensor, default LoDTensor<int>), The input ground-truth "
"labels with shape [Ng, 1], where the Ng is the same as it in "
"the input of EncodedGTBBox."
);
AddInput
(
"MatchIndices"
,
AddInput
(
"MatchIndices"
,
"(Tensor, default Tensor<int>), The input matched indices "
"(Tensor, default Tensor<int>), The input matched indices "
"with shape [N, Np], where N is the batch size, Np is the same "
"with shape [N, P], If MatchIndices[i][j] is -1, the j-th entity "
"as it in the input of EncodedGTBBox. If MatchIndices[i][j] "
"of column is not matched to any entity of row in i-th instance."
);
"is -1, the j-th prior box is not matched to any ground-truh "
"box in i-th instance."
);
AddInput
(
"NegIndices"
,
AddInput
(
"NegIndices"
,
"(LoDTensor, default LoDTensor<int>), The input negative example "
"(LoDTensor, default LoDTensor<int>), The input negative example "
"indices with shape [Neg, 1], where is the total number of "
"indices are an optional input with shape [Neg, 1], where Neg is "
"negative example indices."
);
"the total number of negative example indices."
)
AddAttr
<
int
>
(
"background_label"
,
.
AsDispensable
();
"(int, default 0), Label index of background class."
)
AddAttr
<
int
>
(
"mismatch_value"
,
"(int, default 0), Fill this value to the "
"mismatched location."
)
.
SetDefault
(
0
);
.
SetDefault
(
0
);
AddOutput
(
"PredBBoxLabel"
,
AddOutput
(
"Out"
,
"(Tensor), The output encoded ground-truth labels "
"(Tensor), The output is a 3D Tensor with shape [N, P, K], "
"with shape [N, Np, 4], N is the batch size and Np, 4 is the "
"N and P is the same as they are in NegIndices, K is the "
"same as they in input of EncodedGTBBox. If MatchIndices[i][j] "
"same as it in input of X. If MatchIndices[i][j] "
"is -1, the PredBBoxLabel[i][j][:] is the encoded ground-truth "
"is -1, the Out[i][j][0 : K] is the mismatch_value."
);
"box for background_label in i-th instance."
);
AddOutput
(
"OutWeight"
,
AddOutput
(
"PredBBoxWeight"
,
"(Tensor), The weight for output with the shape of [N, P, 1]"
);
"(Tensor), The weight for PredBBoxLabel with the shape "
"of [N, Np, 1]"
);
AddOutput
(
"PredScoreLabel"
,
"(Tensor, default Tensor<int>), The output score labels for "
"each predictions with shape [N, Np, 1]. If MatchIndices[i][j] "
"is -1, PredScoreLabel[i][j] = background_label."
);
AddOutput
(
"PredScoreWeight"
,
"(Tensor), The weight for PredScoreLabel with the shape "
"of [N, Np, 1]"
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
This operator is, for given the encoded boxes between prior boxes and
This operator can be, for given the target bounding boxes or labels,
ground-truth boxes and ground-truth class labels, to assign classification
to assign classification and regression targets to each prediction as well as
and regression targets to each prior box as well as weights to each
weights to prediction. The weights is used to specify which prediction would
prior box. The weights is used to specify which prior box would not contribute
not contribute to training loss.
to training loss.
For each instance, the output `Out` and`OutWeight` are assigned based on
For each instance, the output `PredBBoxLabel`, `PredBBoxWeight`,
`MatchIndices` and `NegIndices`.
`PredScoreLabel` and `PredScoreWeight` are assigned based on `MatchIndices`.
Assumed that the row offset for each instance in `X` is called lod,
Assumed that the row offset for each instance in `EncodedGTBBox` is called lod,
this operator assigns classification/regression targets by performing the
this operato assigns classification/regression targets by performing the
following steps:
following steps:
1. Assigning all outpts based on `MatchIndices`:
1. Assigning all outpts based on `MatchIndices`:
If id = MatchIndices[i][j] > 0,
If id = MatchIndices[i][j] > 0,
PredBBoxLabel[i][j] = EncodedGTBBox[lod[i] + id][j]
Out[i][j][0 : K] = X[lod[i] + id][j % P][0 : K]
PredBBoxWeight[i][j] = 1.
OutWeight[i][j] = 1.
PredScoreLabel[i][j] = GTScoreLabel[lod[i] + id]
PredScoreWeight[i][j] = 1.
Otherwise,
Otherwise,
PredBBoxLabel[j][j] = [0., 0., 0., 0.]
Out[j][j][0 : K] = {mismatch_value, mismatch_value, ...}
PredBBoxWeight[i][j] = 0.
OutWeight[i][j] = 0.
PredScoreLabel[i][j] = background_label
PredScoreWeight[i][j] = 0.
2. Assigning
PredScoreWeight based on `NegIndices`
:
2. Assigning
OutWeight based on `NegIndices` if `NegIndices` is provided
:
Assumed that the row offset for each instance in `NegIndices` is cal
e
ed neg_lod,
Assumed that the row offset for each instance in `NegIndices` is cal
l
ed neg_lod,
for i-th instance and
all ids
of NegIndices in this instance:
for i-th instance and
each `id`
of NegIndices in this instance:
PredScoreLabel[i][id] = background_label
Out[i][id][0 : K] = {mismatch_value, mismatch_value, ...}
PredScore
Weight[i][id] = 1.0
Out
Weight[i][id] = 1.0
)DOC"
);
)DOC"
);
}
}
};
};
template
<
typename
T
>
template
<
typename
T
,
typename
WT
>
struct
NegTargetAssignFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
struct
NegTargetAssignFunctor
<
platform
::
CPUDeviceContext
,
T
,
WT
>
{
void
operator
()(
const
platform
::
CPUDeviceContext
&
ctx
,
const
int
*
neg_indices
,
void
operator
()(
const
platform
::
CPUDeviceContext
&
ctx
,
const
int
*
neg_indices
,
const
size_t
*
lod
,
const
int
num
,
const
int
num_prior_box
,
const
size_t
*
lod
,
const
int
N
,
const
int
M
,
const
int
K
,
const
int
background_label
,
int
*
out_label
,
T
*
out_label
_wt
)
{
const
int
mismatch_value
,
T
*
out
,
WT
*
out
_wt
)
{
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
for
(
size_t
j
=
lod
[
i
];
j
<
lod
[
i
+
1
];
++
j
)
{
for
(
size_t
j
=
lod
[
i
];
j
<
lod
[
i
+
1
];
++
j
)
{
int
id
=
neg_indices
[
j
];
int
id
=
neg_indices
[
j
];
out_label
[
i
*
num_prior_box
+
id
]
=
background_label
;
int
off
=
(
i
*
M
+
id
)
*
K
;
out_label_wt
[
i
*
num_prior_box
+
id
]
=
static_cast
<
T
>
(
1.0
);
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
out
[
off
+
k
]
=
mismatch_value
;
out_wt
[
off
+
k
]
=
static_cast
<
WT
>
(
1.0
);
}
}
}
}
}
}
}
};
};
template
struct
NegTargetAssignFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
struct
NegTargetAssignFunctor
<
platform
::
CPUDeviceContext
,
int
,
float
>;
template
struct
NegTargetAssignFunctor
<
platform
::
CPUDeviceContext
,
double
>;
template
struct
NegTargetAssignFunctor
<
platform
::
CPUDeviceContext
,
float
,
float
>;
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
...
@@ -198,5 +157,5 @@ REGISTER_OP_WITHOUT_GRADIENT(target_assign, ops::TargetAssignOp,
...
@@ -198,5 +157,5 @@ REGISTER_OP_WITHOUT_GRADIENT(target_assign, ops::TargetAssignOp,
ops
::
TargetAssignOpMaker
);
ops
::
TargetAssignOpMaker
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
target_assign
,
target_assign
,
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
,
float
>
,
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
,
float
>
);
paddle/fluid/operators/target_assign_op.cu
浏览文件 @
11bcb43a
...
@@ -17,39 +17,41 @@ limitations under the License. */
...
@@ -17,39 +17,41 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
template
<
typename
T
>
template
<
typename
T
,
typename
WT
>
__global__
void
NegTargetAssignKernel
(
const
int
*
neg_indices
,
const
size_t
*
lod
,
__global__
void
NegTargetAssignKernel
(
const
int
*
neg_indices
,
const
size_t
*
lod
,
const
int
num
,
const
int
num_prior_box
,
const
int
N
,
const
int
M
,
const
int
K
,
const
int
background_label
,
const
int
mismatch_value
,
T
*
out
,
int
*
out_label
,
T
*
out_label
_wt
)
{
WT
*
out
_wt
)
{
int
bidx
=
blockIdx
.
x
;
int
bidx
=
blockIdx
.
x
;
int
st
=
lod
[
bidx
];
int
st
=
lod
[
bidx
];
int
ed
=
lod
[
bidx
+
1
];
int
ed
=
lod
[
bidx
+
1
];
int
row_start
=
bidx
*
num_prior_box
;
int
row_start
=
bidx
*
M
;
for
(
int
i
=
st
+
threadIdx
.
x
;
i
<
ed
;
i
+=
blockDim
.
x
)
{
for
(
int
i
=
st
+
threadIdx
.
x
;
i
<
ed
;
i
+=
blockDim
.
x
)
{
int
id
=
row_start
+
neg_indices
[
i
];
int
id
=
row_start
+
neg_indices
[
i
];
out_label
[
id
]
=
background_label
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
out_label_wt
[
id
]
=
1.
;
out
[
id
*
K
+
k
]
=
T
(
mismatch_value
);
out_wt
[
id
*
K
+
k
]
=
WT
(
1.
);
}
}
}
}
}
template
<
typename
T
>
template
<
typename
T
,
typename
WT
>
struct
NegTargetAssignFunctor
<
platform
::
CUDADeviceContext
,
T
>
{
struct
NegTargetAssignFunctor
<
platform
::
CUDADeviceContext
,
T
,
WT
>
{
void
operator
()(
const
platform
::
CUDADeviceContext
&
ctx
,
void
operator
()(
const
platform
::
CUDADeviceContext
&
ctx
,
const
int
*
neg_indices
,
const
size_t
*
lod
,
const
int
num
,
const
int
*
neg_indices
,
const
size_t
*
lod
,
const
int
N
,
const
int
num_prior_box
,
const
int
background_label
,
const
int
M
,
const
int
K
,
const
int
mismatch_value
,
T
*
out
,
int
*
out_label
,
T
*
out_label
_wt
)
{
WT
*
out
_wt
)
{
const
int
block_size
=
256
;
const
int
block_size
=
256
;
const
int
grid_size
=
num
;
const
int
grid_size
=
N
;
NegTargetAssignKernel
<
T
><<<
grid_size
,
block_size
,
0
,
ctx
.
stream
()
>>>
(
NegTargetAssignKernel
<
T
,
WT
><<<
grid_size
,
block_size
,
0
,
ctx
.
stream
()
>>>
(
neg_indices
,
lod
,
num
,
num_prior_box
,
background_label
,
out_label
,
neg_indices
,
lod
,
N
,
M
,
K
,
mismatch_value
,
out
,
out_wt
);
out_label_wt
);
}
}
};
};
template
struct
NegTargetAssignFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
struct
NegTargetAssignFunctor
<
platform
::
CUDADeviceContext
,
int
,
float
>;
template
struct
NegTargetAssignFunctor
<
platform
::
CUDADeviceContext
,
double
>;
template
struct
NegTargetAssignFunctor
<
platform
::
CUDADeviceContext
,
float
,
float
>;
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
...
@@ -57,5 +59,5 @@ template struct NegTargetAssignFunctor<platform::CUDADeviceContext, double>;
...
@@ -57,5 +59,5 @@ template struct NegTargetAssignFunctor<platform::CUDADeviceContext, double>;
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
target_assign
,
target_assign
,
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
float
>
,
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
float
>
);
paddle/fluid/operators/target_assign_op.h
浏览文件 @
11bcb43a
...
@@ -19,140 +19,113 @@ limitations under the License. */
...
@@ -19,140 +19,113 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
template
<
typename
T
,
typename
WT
>
template
<
typename
T
>
struct
TargetAssignFunctor
{
struct
TargetAssignFunctor
{
const
T
*
gt_box_
;
const
T
*
in_
;
const
int
*
gt_label_
;
const
int
*
match_indices_
;
const
int
*
match_indices_
;
const
size_t
*
lod_
;
const
size_t
*
lod_
;
const
int
background_label_
;
const
int
mismatch_value_
;
const
int64_t
num_
;
const
int64_t
N_
;
const
int64_t
num_prior_box_
;
const
int64_t
M_
;
const
int64_t
P_
;
T
*
out_box_
;
const
int64_t
K_
;
T
*
out_box_wt_
;
int
*
out_label_
;
T
*
out_
;
T
*
out_label_wt_
;
WT
*
out_wt_
;
TargetAssignFunctor
(
const
T
*
gt_box
,
const
int
*
gt_label
,
TargetAssignFunctor
(
const
T
*
input
,
const
int
*
match_indices
,
const
int
*
match_indices
,
const
size_t
*
lod
,
const
size_t
*
lod
,
const
int
mismatch_value
,
const
int
background_label
,
const
int64_t
num
,
const
int64_t
N
,
const
int64_t
M
,
const
int64_t
P
,
const
int64_t
np
,
T
*
out_box
,
T
*
out_box_wt
,
const
int64_t
K
,
T
*
out
,
WT
*
out_wt
)
int
*
out_label
,
T
*
out_label_wt
)
:
in_
(
input
),
:
gt_box_
(
gt_box
),
gt_label_
(
gt_label
),
match_indices_
(
match_indices
),
match_indices_
(
match_indices
),
lod_
(
lod
),
lod_
(
lod
),
background_label_
(
background_label
),
mismatch_value_
(
mismatch_value
),
num_
(
num
),
N_
(
N
),
num_prior_box_
(
np
),
M_
(
M
),
out_box_
(
out_box
),
P_
(
P
),
out_box_wt_
(
out_box_wt
),
K_
(
K
),
out_
label_
(
out_label
),
out_
(
out
),
out_
label_wt_
(
out_label
_wt
)
{}
out_
wt_
(
out
_wt
)
{}
HOSTDEVICE
void
operator
()(
size_t
i
)
const
{
HOSTDEVICE
void
operator
()(
size_t
i
)
const
{
int
row
=
i
/
num_prior_box
_
;
int
h
=
i
/
M
_
;
int
col
=
i
-
row
*
num_prior_box
_
;
int
w
=
i
-
h
*
M
_
;
size_t
row_off
=
lod_
[
row
];
size_t
off
=
lod_
[
h
];
int
offset
=
row
*
num_prior_box_
+
col
;
int
id
=
match_indices_
[
i
]
;
int
id
=
match_indices_
[
offset
];
T
*
out
=
out_
+
i
*
K_
;
T
*
obox
=
out_box_
+
offset
*
4
;
WT
*
out_wt
=
out_wt_
+
i
;
int
*
olabel
=
out_label_
+
offset
;
T
*
obox_wt
=
out_box_wt_
+
offset
;
T
*
olabel_wt
=
out_label_wt_
+
offset
;
if
(
id
>
-
1
)
{
if
(
id
>
-
1
)
{
const
T
*
gtbox
=
gt_box_
+
((
row_off
+
id
)
*
num_prior_box_
+
col
)
*
4
;
int
w_off
=
w
%
P_
;
const
T
*
in
=
in_
+
((
off
+
id
)
*
P_
+
w_off
)
*
K_
;
obox
[
0
]
=
gtbox
[
0
];
for
(
int64_t
k
=
0
;
k
<
K_
;
++
k
)
{
obox
[
1
]
=
gtbox
[
1
];
out
[
k
]
=
in
[
k
];
obox
[
2
]
=
gtbox
[
2
];
}
obox
[
3
]
=
gtbox
[
3
];
out_wt
[
0
]
=
static_cast
<
WT
>
(
1.
);
olabel
[
0
]
=
gt_label_
[
row_off
+
id
];
obox_wt
[
0
]
=
static_cast
<
T
>
(
1.
);
olabel_wt
[
0
]
=
static_cast
<
T
>
(
1.
);
}
else
{
}
else
{
obox
[
0
]
=
static_cast
<
T
>
(
0.
);
for
(
int64_t
k
=
0
;
k
<
K_
;
++
k
)
{
obox
[
1
]
=
static_cast
<
T
>
(
0.
);
out
[
k
]
=
static_cast
<
T
>
(
mismatch_value_
);
obox
[
2
]
=
static_cast
<
T
>
(
0.
);
}
obox
[
3
]
=
static_cast
<
T
>
(
0.
);
out_wt
[
0
]
=
static_cast
<
WT
>
(
0.
);
olabel
[
0
]
=
background_label_
;
obox_wt
[
0
]
=
static_cast
<
T
>
(
0.
);
olabel_wt
[
0
]
=
static_cast
<
T
>
(
0.
);
}
}
}
}
};
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
,
typename
WT
>
struct
NegTargetAssignFunctor
{
struct
NegTargetAssignFunctor
{
void
operator
()(
const
platform
::
DeviceContext
&
ctx
,
const
int
*
neg_indices
,
void
operator
()(
const
platform
::
DeviceContext
&
ctx
,
const
int
*
neg_indices
,
const
size_t
*
lod
,
const
int
num
,
const
int
num_prior_box
,
const
size_t
*
lod
,
const
int
N
,
const
int
M
,
const
int
K
,
const
int
background_label
,
int
*
out_label
,
const
int
mismatch_value
,
T
*
out
,
WT
*
out_wt
)
const
;
T
*
out_label_wt
)
const
;
};
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
,
typename
WT
>
class
TargetAssignKernel
:
public
framework
::
OpKernel
<
T
>
{
class
TargetAssignKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
enc_gt_box
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"EncodedGTBBox"
);
auto
*
x
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
);
auto
*
gt_label
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"GTScoreLabel"
);
auto
*
match_indices
=
ctx
.
Input
<
framework
::
Tensor
>
(
"MatchIndices"
);
auto
*
match_indices
=
ctx
.
Input
<
framework
::
Tensor
>
(
"MatchIndices"
);
auto
*
neg_indices
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"NegIndices"
);
auto
*
out_box
=
ctx
.
Output
<
framework
::
Tensor
>
(
"PredBBoxLabel"
);
auto
*
out_box_wt
=
ctx
.
Output
<
framework
::
Tensor
>
(
"PredBBoxWeight"
);
auto
*
out_label
=
ctx
.
Output
<
framework
::
Tensor
>
(
"PredScoreLabel"
);
auto
*
out_label_wt
=
ctx
.
Output
<
framework
::
Tensor
>
(
"PredScoreWeight"
);
PADDLE_ENFORCE_EQ
(
enc_gt_box
->
lod
().
size
(),
1UL
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
PADDLE_ENFORCE_EQ
(
gt_label
->
lod
().
size
(),
1UL
);
auto
*
out_wt
=
ctx
.
Output
<
framework
::
Tensor
>
(
"OutWeight"
);
PADDLE_ENFORCE_EQ
(
neg_indices
->
lod
().
size
(),
1UL
);
int
background_label
=
ctx
.
Attr
<
int
>
(
"background_label"
);
PADDLE_ENFORCE_EQ
(
x
->
lod
().
size
(),
1UL
);
int
mismatch_value
=
ctx
.
Attr
<
int
>
(
"mismatch_value"
);
const
T
*
box_data
=
enc_gt_box
->
data
<
T
>
();
const
T
*
x_data
=
x
->
data
<
T
>
();
const
int
*
label_data
=
gt_label
->
data
<
int
>
();
const
int
*
match_idx_data
=
match_indices
->
data
<
int
>
();
const
int
*
match_idx_data
=
match_indices
->
data
<
int
>
();
const
int
*
neg_idx_data
=
neg_indices
->
data
<
int
>
();
T
*
obox_data
=
out_box
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
out_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
obox_wt_data
=
out_box_wt
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
WT
*
out_wt_data
=
out_wt
->
mutable_data
<
WT
>
(
ctx
.
GetPlace
());
int
*
olabel_data
=
out_label
->
mutable_data
<
int
>
(
ctx
.
GetPlace
());
T
*
olabel_wt_data
=
out_label_wt
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int64_t
num
=
match_indices
->
dims
()[
0
];
int64_t
n
=
match_indices
->
dims
()[
0
];
int64_t
num_prior_box
=
match_indices
->
dims
()[
1
];
int64_t
m
=
match_indices
->
dims
()[
1
];
int64_t
p
=
x
->
dims
()[
1
];
int64_t
k
=
x
->
dims
()[
2
];
auto
gt_lod
=
enc_gt_box
->
lod
().
back
();
auto
x_lod
=
x
->
lod
().
back
();
auto
gt_label_lod
=
gt_label
->
lod
().
back
();
size_t
*
x_lod_data
=
x_lod
.
MutableData
(
ctx
.
GetPlace
());
auto
neg_lod
=
neg_indices
->
lod
().
back
();
for
(
size_t
i
=
0
;
i
<
gt_lod
.
size
();
++
i
)
{
PADDLE_ENFORCE_EQ
(
gt_lod
.
data
()[
i
],
gt_label_lod
.
data
()[
i
]);
}
size_t
*
gt_lod_data
=
gt_lod
.
MutableData
(
ctx
.
GetPlace
());
size_t
*
neg_lod_data
=
neg_lod
.
MutableData
(
ctx
.
GetPlace
());
TargetAssignFunctor
<
T
>
functor
(
box_data
,
label_data
,
match_idx_data
,
TargetAssignFunctor
<
T
,
WT
>
functor
(
x_data
,
match_idx_data
,
x_lod_data
,
gt_lod_data
,
background_label
,
num
,
mismatch_value
,
n
,
m
,
p
,
k
,
out_data
,
num_prior_box
,
obox_data
,
obox_wt_data
,
out_wt_data
);
olabel_data
,
olabel_wt_data
);
auto
&
device_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
&
device_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
platform
::
ForRange
<
DeviceContext
>
for_range
(
device_ctx
,
platform
::
ForRange
<
DeviceContext
>
for_range
(
device_ctx
,
n
*
m
);
num
*
num_prior_box
);
for_range
(
functor
);
for_range
(
functor
);
NegTargetAssignFunctor
<
DeviceContext
,
T
>
neg_trg_functor
;
auto
*
neg_indices
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"NegIndices"
);
neg_trg_functor
(
device_ctx
,
neg_idx_data
,
neg_lod_data
,
num
,
num_prior_box
,
if
(
neg_indices
)
{
background_label
,
olabel_data
,
olabel_wt_data
);
PADDLE_ENFORCE_EQ
(
neg_indices
->
lod
().
size
(),
1UL
);
const
int
*
neg_idx_data
=
neg_indices
->
data
<
int
>
();
auto
neg_lod
=
neg_indices
->
lod
().
back
();
size_t
*
neg_lod_data
=
neg_lod
.
MutableData
(
ctx
.
GetPlace
());
NegTargetAssignFunctor
<
DeviceContext
,
T
,
WT
>
neg_trg_functor
;
neg_trg_functor
(
device_ctx
,
neg_idx_data
,
neg_lod_data
,
n
,
m
,
k
,
mismatch_value
,
out_data
,
out_wt_data
);
}
}
}
};
};
...
...
paddle/fluid/platform/cpu_info_test.cc
浏览文件 @
11bcb43a
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/string/printf.h"
#include "paddle/
fluid/
string/printf.h"
#include <ostream>
#include <ostream>
#include <sstream>
#include <sstream>
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
11bcb43a
...
@@ -23,8 +23,8 @@ limitations under the License. */
...
@@ -23,8 +23,8 @@ limitations under the License. */
#include <string>
#include <string>
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/string/printf.h"
#include "paddle/
fluid/
string/printf.h"
#include "paddle/string/to_string.h"
#include "paddle/
fluid/
string/to_string.h"
#ifdef __GNUC__
#ifdef __GNUC__
#include <cxxabi.h> // for __cxa_demangle
#include <cxxabi.h> // for __cxa_demangle
...
...
paddle/fluid/platform/enforce_test.cc
浏览文件 @
11bcb43a
...
@@ -15,7 +15,7 @@ limitations under the License. */
...
@@ -15,7 +15,7 @@ limitations under the License. */
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/string/piece.h"
#include "paddle/
fluid/
string/piece.h"
using
StringPiece
=
paddle
::
string
::
Piece
;
using
StringPiece
=
paddle
::
string
::
Piece
;
using
paddle
::
string
::
HasPrefix
;
using
paddle
::
string
::
HasPrefix
;
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
11bcb43a
...
@@ -35,7 +35,7 @@ limitations under the License. */
...
@@ -35,7 +35,7 @@ limitations under the License. */
#include "paddle/fluid/pybind/exception.h"
#include "paddle/fluid/pybind/exception.h"
#include "paddle/fluid/pybind/pybind.h"
#include "paddle/fluid/pybind/pybind.h"
#include "paddle/fluid/pybind/tensor_py.h"
#include "paddle/fluid/pybind/tensor_py.h"
#include "paddle/string/to_string.h"
#include "paddle/
fluid/
string/to_string.h"
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
...
...
paddle/string/.clang-format
→
paddle/
fluid/
string/.clang-format
浏览文件 @
11bcb43a
文件已移动
paddle/string/CMakeLists.txt
→
paddle/
fluid/
string/CMakeLists.txt
浏览文件 @
11bcb43a
文件已移动
paddle/string/piece.cc
→
paddle/
fluid/
string/piece.cc
浏览文件 @
11bcb43a
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "p
addle/string/p
iece.h"
#include "piece.h"
#include <string.h>
#include <string.h>
...
...
paddle/string/piece.h
→
paddle/
fluid/
string/piece.h
浏览文件 @
11bcb43a
...
@@ -28,7 +28,7 @@ namespace string {
...
@@ -28,7 +28,7 @@ namespace string {
// its syntax is simple as it doesn't own/manage the string, it is
// its syntax is simple as it doesn't own/manage the string, it is
// cheap to construct Pieces and pass them around.
// cheap to construct Pieces and pass them around.
class
Piece
{
class
Piece
{
public:
public:
static
const
size_t
npos
=
static_cast
<
size_t
>
(
-
1
);
static
const
size_t
npos
=
static_cast
<
size_t
>
(
-
1
);
// We provide non-explicit singleton constructors so users can
// We provide non-explicit singleton constructors so users can
...
@@ -55,7 +55,7 @@ public:
...
@@ -55,7 +55,7 @@ public:
// Return a string that contains the copy of the referenced data.
// Return a string that contains the copy of the referenced data.
std
::
string
ToString
()
const
{
return
std
::
string
(
data_
,
size_
);
}
std
::
string
ToString
()
const
{
return
std
::
string
(
data_
,
size_
);
}
private:
private:
const
char
*
data_
;
const
char
*
data_
;
size_t
size_
;
size_t
size_
;
...
...
paddle/string/piece_test.cc
→
paddle/
fluid/
string/piece_test.cc
浏览文件 @
11bcb43a
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/string/piece.h"
#include "paddle/
fluid/
string/piece.h"
#include <sstream>
#include <sstream>
...
...
paddle/string/printf.h
→
paddle/
fluid/
string/printf.h
浏览文件 @
11bcb43a
...
@@ -71,7 +71,7 @@
...
@@ -71,7 +71,7 @@
#include <iostream>
#include <iostream>
#include <sstream>
#include <sstream>
#include "
paddle/string/
tinyformat/tinyformat.h" // https://github.com/c42f/tinyformat
#include "tinyformat/tinyformat.h" // https://github.com/c42f/tinyformat
namespace
paddle
{
namespace
paddle
{
namespace
string
{
namespace
string
{
...
...
paddle/string/printf_test.cc
→
paddle/
fluid/
string/printf_test.cc
浏览文件 @
11bcb43a
...
@@ -11,7 +11,7 @@
...
@@ -11,7 +11,7 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "p
addle/string/p
rintf.h"
#include "printf.h"
#include <string>
#include <string>
...
@@ -24,6 +24,6 @@ TEST(StringPrintf, StringPrintf) {
...
@@ -24,6 +24,6 @@ TEST(StringPrintf, StringPrintf) {
long
hour
=
14
;
long
hour
=
14
;
int
min
=
44
;
int
min
=
44
;
EXPECT_EQ
(
std
::
string
(
"Wednesday, July 27, 14:44"
),
EXPECT_EQ
(
std
::
string
(
"Wednesday, July 27, 14:44"
),
paddle
::
string
::
Sprintf
(
paddle
::
string
::
Sprintf
(
"%s, %s %d, %.2d:%.2d"
,
weekday
,
month
,
day
,
"%s, %s %d, %.2d:%.2d"
,
weekday
,
month
,
day
,
hour
,
min
));
hour
,
min
));
}
}
paddle/string/tinyformat/tinyformat.h
→
paddle/
fluid/
string/tinyformat/tinyformat.h
浏览文件 @
11bcb43a
...
@@ -147,7 +147,7 @@ namespace detail {
...
@@ -147,7 +147,7 @@ namespace detail {
// Test whether type T1 is convertible to type T2
// Test whether type T1 is convertible to type T2
template
<
typename
T1
,
typename
T2
>
template
<
typename
T1
,
typename
T2
>
struct
is_convertible
{
struct
is_convertible
{
private:
private:
// two types of different size
// two types of different size
struct
fail
{
struct
fail
{
char
dummy
[
2
];
char
dummy
[
2
];
...
@@ -160,7 +160,7 @@ private:
...
@@ -160,7 +160,7 @@ private:
static
succeed
tryConvert
(
const
T2
&
);
static
succeed
tryConvert
(
const
T2
&
);
static
const
T1
&
makeT1
();
static
const
T1
&
makeT1
();
public:
public:
// Standard trick: the (...) version of tryConvert will be chosen from
// Standard trick: the (...) version of tryConvert will be chosen from
// the overload set only if the version taking a T2 doesn't match.
// the overload set only if the version taking a T2 doesn't match.
// Then we compare the sizes of the return types to check which
// Then we compare the sizes of the return types to check which
...
@@ -170,8 +170,7 @@ public:
...
@@ -170,8 +170,7 @@ public:
// Format the value by casting to type fmtT. This default implementation
// Format the value by casting to type fmtT. This default implementation
// should never be called.
// should never be called.
template
<
typename
T
,
template
<
typename
T
,
typename
fmtT
,
typename
fmtT
,
bool
convertible
=
is_convertible
<
T
,
fmtT
>
::
value
>
bool
convertible
=
is_convertible
<
T
,
fmtT
>
::
value
>
struct
formatValueAsType
{
struct
formatValueAsType
{
static
void
invoke
(
std
::
ostream
&
/*out*/
,
const
T
&
/*value*/
)
{
assert
(
0
);
}
static
void
invoke
(
std
::
ostream
&
/*out*/
,
const
T
&
/*value*/
)
{
assert
(
0
);
}
...
@@ -241,11 +240,8 @@ TINYFORMAT_DEFINE_FORMAT_TRUNCATED_CSTR(char)
...
@@ -241,11 +240,8 @@ TINYFORMAT_DEFINE_FORMAT_TRUNCATED_CSTR(char)
/// operator<< to format the type T, with special cases for the %c and %p
/// operator<< to format the type T, with special cases for the %c and %p
/// conversions.
/// conversions.
template
<
typename
T
>
template
<
typename
T
>
inline
void
formatValue
(
std
::
ostream
&
out
,
inline
void
formatValue
(
std
::
ostream
&
out
,
const
char
*
/*fmtBegin*/
,
const
char
*
/*fmtBegin*/
,
const
char
*
fmtEnd
,
int
ntrunc
,
const
T
&
value
)
{
const
char
*
fmtEnd
,
int
ntrunc
,
const
T
&
value
)
{
// The mess here is to support the %c and %p conversions: if these
// The mess here is to support the %c and %p conversions: if these
// conversions are active we try to convert the type to a char or const
// conversions are active we try to convert the type to a char or const
// void* respectively and format that instead of the value itself. For the
// void* respectively and format that instead of the value itself. For the
...
@@ -267,25 +263,22 @@ inline void formatValue(std::ostream &out,
...
@@ -267,25 +263,22 @@ inline void formatValue(std::ostream &out,
}
}
// Overloaded version for char types to support printing as an integer
// Overloaded version for char types to support printing as an integer
#define TINYFORMAT_DEFINE_FORMATVALUE_CHAR(charType) \
#define TINYFORMAT_DEFINE_FORMATVALUE_CHAR(charType) \
inline void formatValue(std::ostream &out, \
inline void formatValue(std::ostream &out, const char *
/*fmtBegin*/
, \
const char *
/*fmtBegin*/
, \
const char *fmtEnd, int
/**/
, charType value) { \
const char *fmtEnd, \
switch (*(fmtEnd - 1)) { \
int
/**/
, \
case 'u': \
charType value) { \
case 'd': \
switch (*(fmtEnd - 1)) { \
case 'i': \
case 'u': \
case 'o': \
case 'd': \
case 'X': \
case 'i': \
case 'x': \
case 'o': \
out << static_cast<int>(value); \
case 'X': \
break; \
case 'x': \
default: \
out << static_cast<int>(value); \
out << value; \
break; \
break; \
default: \
} \
out << value; \
break; \
} \
}
}
// per 3.9.1: char, signed char and unsigned char are all distinct types
// per 3.9.1: char, signed char and unsigned char are all distinct types
TINYFORMAT_DEFINE_FORMATVALUE_CHAR
(
char
)
TINYFORMAT_DEFINE_FORMATVALUE_CHAR
(
char
)
...
@@ -482,7 +475,7 @@ namespace detail {
...
@@ -482,7 +475,7 @@ namespace detail {
// each argument to be allocated as a homogenous array inside FormatList
// each argument to be allocated as a homogenous array inside FormatList
// whereas a naive implementation based on inheritance does not.
// whereas a naive implementation based on inheritance does not.
class
FormatArg
{
class
FormatArg
{
public:
public:
FormatArg
()
{}
FormatArg
()
{}
template
<
typename
T
>
template
<
typename
T
>
...
@@ -491,22 +484,17 @@ public:
...
@@ -491,22 +484,17 @@ public:
m_formatImpl
(
&
formatImpl
<
T
>
),
m_formatImpl
(
&
formatImpl
<
T
>
),
m_toIntImpl
(
&
toIntImpl
<
T
>
)
{}
m_toIntImpl
(
&
toIntImpl
<
T
>
)
{}
void
format
(
std
::
ostream
&
out
,
void
format
(
std
::
ostream
&
out
,
const
char
*
fmtBegin
,
const
char
*
fmtEnd
,
const
char
*
fmtBegin
,
const
char
*
fmtEnd
,
int
ntrunc
)
const
{
int
ntrunc
)
const
{
m_formatImpl
(
out
,
fmtBegin
,
fmtEnd
,
ntrunc
,
m_value
);
m_formatImpl
(
out
,
fmtBegin
,
fmtEnd
,
ntrunc
,
m_value
);
}
}
int
toInt
()
const
{
return
m_toIntImpl
(
m_value
);
}
int
toInt
()
const
{
return
m_toIntImpl
(
m_value
);
}
private:
private:
template
<
typename
T
>
template
<
typename
T
>
static
void
formatImpl
(
std
::
ostream
&
out
,
static
void
formatImpl
(
std
::
ostream
&
out
,
const
char
*
fmtBegin
,
const
char
*
fmtBegin
,
const
char
*
fmtEnd
,
int
ntrunc
,
const
void
*
value
)
{
const
char
*
fmtEnd
,
int
ntrunc
,
const
void
*
value
)
{
formatValue
(
out
,
fmtBegin
,
fmtEnd
,
ntrunc
,
*
static_cast
<
const
T
*>
(
value
));
formatValue
(
out
,
fmtBegin
,
fmtEnd
,
ntrunc
,
*
static_cast
<
const
T
*>
(
value
));
}
}
...
@@ -516,11 +504,8 @@ private:
...
@@ -516,11 +504,8 @@ private:
}
}
const
void
*
m_value
;
const
void
*
m_value
;
void
(
*
m_formatImpl
)(
std
::
ostream
&
out
,
void
(
*
m_formatImpl
)(
std
::
ostream
&
out
,
const
char
*
fmtBegin
,
const
char
*
fmtBegin
,
const
char
*
fmtEnd
,
int
ntrunc
,
const
void
*
value
);
const
char
*
fmtEnd
,
int
ntrunc
,
const
void
*
value
);
int
(
*
m_toIntImpl
)(
const
void
*
value
);
int
(
*
m_toIntImpl
)(
const
void
*
value
);
};
};
...
@@ -569,12 +554,10 @@ inline const char *printFormatStringLiteral(std::ostream &out,
...
@@ -569,12 +554,10 @@ inline const char *printFormatStringLiteral(std::ostream &out,
// necessary to pull out variable width and precision . The function returns a
// necessary to pull out variable width and precision . The function returns a
// pointer to the character after the end of the current format spec.
// pointer to the character after the end of the current format spec.
inline
const
char
*
streamStateFromFormat
(
std
::
ostream
&
out
,
inline
const
char
*
streamStateFromFormat
(
std
::
ostream
&
out
,
bool
&
spacePadPositive
,
bool
&
spacePadPositive
,
int
&
ntrunc
,
int
&
ntrunc
,
const
char
*
fmtStart
,
const
char
*
fmtStart
,
const
detail
::
FormatArg
*
formatters
,
const
detail
::
FormatArg
*
formatters
,
int
&
argIndex
,
int
&
argIndex
,
int
numFormatters
)
{
int
numFormatters
)
{
if
(
*
fmtStart
!=
'%'
)
{
if
(
*
fmtStart
!=
'%'
)
{
TINYFORMAT_ERROR
(
TINYFORMAT_ERROR
(
"tinyformat: Not enough conversion specifiers in format string"
);
"tinyformat: Not enough conversion specifiers in format string"
);
...
@@ -750,10 +733,8 @@ inline const char *streamStateFromFormat(std::ostream &out,
...
@@ -750,10 +733,8 @@ inline const char *streamStateFromFormat(std::ostream &out,
}
}
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
inline
void
formatImpl
(
std
::
ostream
&
out
,
inline
void
formatImpl
(
std
::
ostream
&
out
,
const
char
*
fmt
,
const
char
*
fmt
,
const
detail
::
FormatArg
*
formatters
,
int
numFormatters
)
{
const
detail
::
FormatArg
*
formatters
,
int
numFormatters
)
{
// Saved stream state
// Saved stream state
std
::
streamsize
origWidth
=
out
.
width
();
std
::
streamsize
origWidth
=
out
.
width
();
std
::
streamsize
origPrecision
=
out
.
precision
();
std
::
streamsize
origPrecision
=
out
.
precision
();
...
@@ -765,13 +746,9 @@ inline void formatImpl(std::ostream &out,
...
@@ -765,13 +746,9 @@ inline void formatImpl(std::ostream &out,
fmt
=
printFormatStringLiteral
(
out
,
fmt
);
fmt
=
printFormatStringLiteral
(
out
,
fmt
);
bool
spacePadPositive
=
false
;
bool
spacePadPositive
=
false
;
int
ntrunc
=
-
1
;
int
ntrunc
=
-
1
;
const
char
*
fmtEnd
=
streamStateFromFormat
(
out
,
const
char
*
fmtEnd
=
spacePadPositive
,
streamStateFromFormat
(
out
,
spacePadPositive
,
ntrunc
,
fmt
,
formatters
,
ntrunc
,
argIndex
,
numFormatters
);
fmt
,
formatters
,
argIndex
,
numFormatters
);
if
(
argIndex
>=
numFormatters
)
{
if
(
argIndex
>=
numFormatters
)
{
// Check args remain after reading any variable width/precision
// Check args remain after reading any variable width/precision
TINYFORMAT_ERROR
(
"tinyformat: Not enough format arguments"
);
TINYFORMAT_ERROR
(
"tinyformat: Not enough format arguments"
);
...
@@ -820,15 +797,14 @@ inline void formatImpl(std::ostream &out,
...
@@ -820,15 +797,14 @@ inline void formatImpl(std::ostream &out,
/// information has been stripped from the arguments, leaving just enough of a
/// information has been stripped from the arguments, leaving just enough of a
/// common interface to perform formatting as required.
/// common interface to perform formatting as required.
class
FormatList
{
class
FormatList
{
public:
public:
FormatList
(
detail
::
FormatArg
*
formatters
,
int
N
)
FormatList
(
detail
::
FormatArg
*
formatters
,
int
N
)
:
m_formatters
(
formatters
),
m_N
(
N
)
{}
:
m_formatters
(
formatters
),
m_N
(
N
)
{}
friend
void
vformat
(
std
::
ostream
&
out
,
friend
void
vformat
(
std
::
ostream
&
out
,
const
char
*
fmt
,
const
char
*
fmt
,
const
FormatList
&
list
);
const
FormatList
&
list
);
private:
private:
const
detail
::
FormatArg
*
m_formatters
;
const
detail
::
FormatArg
*
m_formatters
;
int
m_N
;
int
m_N
;
};
};
...
@@ -841,7 +817,7 @@ namespace detail {
...
@@ -841,7 +817,7 @@ namespace detail {
// Format list subclass with fixed storage to avoid dynamic allocation
// Format list subclass with fixed storage to avoid dynamic allocation
template
<
int
N
>
template
<
int
N
>
class
FormatListN
:
public
FormatList
{
class
FormatListN
:
public
FormatList
{
public:
public:
template
<
typename
...
Args
>
template
<
typename
...
Args
>
FormatListN
(
const
Args
&
...
args
)
FormatListN
(
const
Args
&
...
args
)
:
FormatList
(
&
m_formatterStore
[
0
],
N
),
:
FormatList
(
&
m_formatterStore
[
0
],
N
),
...
@@ -849,14 +825,14 @@ public:
...
@@ -849,14 +825,14 @@ public:
static_assert
(
sizeof
...(
args
)
==
N
,
"Number of args must be N"
);
static_assert
(
sizeof
...(
args
)
==
N
,
"Number of args must be N"
);
}
}
private:
private:
FormatArg
m_formatterStore
[
N
];
FormatArg
m_formatterStore
[
N
];
};
};
// Special 0-arg version - MSVC says zero-sized C array in struct is nonstandard
// Special 0-arg version - MSVC says zero-sized C array in struct is nonstandard
template
<
>
template
<
>
class
FormatListN
<
0
>
:
public
FormatList
{
class
FormatListN
<
0
>
:
public
FormatList
{
public:
public:
FormatListN
()
:
FormatList
(
0
,
0
)
{}
FormatListN
()
:
FormatList
(
0
,
0
)
{}
};
};
...
...
paddle/string/to_string.h
→
paddle/
fluid/
string/to_string.h
浏览文件 @
11bcb43a
文件已移动
paddle/string/to_string_test.cc
→
paddle/
fluid/
string/to_string_test.cc
浏览文件 @
11bcb43a
...
@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "
paddle/string/
to_string.h"
#include "to_string.h"
#include <gtest/gtest.h>
#include <gtest/gtest.h>
constexpr
char
kOutputString
[]
=
"User Defined Output"
;
constexpr
char
kOutputString
[]
=
"User Defined Output"
;
class
UserDefinedClass
{
class
UserDefinedClass
{
public:
public:
};
};
std
::
ostream
&
operator
<<
(
std
::
ostream
&
s
,
const
UserDefinedClass
&
ins
)
{
std
::
ostream
&
operator
<<
(
std
::
ostream
&
s
,
const
UserDefinedClass
&
ins
)
{
...
...
paddle/scripts/docker/build.sh
浏览文件 @
11bcb43a
...
@@ -115,8 +115,8 @@ EOF
...
@@ -115,8 +115,8 @@ EOF
-DWITH_AVX
=
${
WITH_AVX
:-
ON
}
\
-DWITH_AVX
=
${
WITH_AVX
:-
ON
}
\
-DWITH_SWIG_PY
=
ON
\
-DWITH_SWIG_PY
=
ON
\
-DWITH_STYLE_CHECK
=
OFF
-DWITH_STYLE_CHECK
=
OFF
make
-j
`
nproc
`
gen_proto_py
make
-j
`
nproc
`
gen_proto_py
framework_py_proto
make
-j
`
nproc
`
paddle_python
make
-j
`
nproc
`
copy_paddle_pybind
make
-j
`
nproc
`
paddle_docs paddle_docs_cn paddle_api_docs
make
-j
`
nproc
`
paddle_docs paddle_docs_cn paddle_api_docs
popd
popd
fi
fi
...
...
paddle/scripts/travis/build_doc.sh
浏览文件 @
11bcb43a
...
@@ -6,9 +6,9 @@ mkdir -p $TRAVIS_BUILD_DIR/build
...
@@ -6,9 +6,9 @@ mkdir -p $TRAVIS_BUILD_DIR/build
cd
$TRAVIS_BUILD_DIR
/build
cd
$TRAVIS_BUILD_DIR
/build
# Compile Documentation only.
# Compile Documentation only.
cmake ..
-DCMAKE_BUILD_TYPE
=
Debug
-DWITH_GPU
=
OFF
-DWITH_MKL
=
OFF
-DWITH_DOC
=
ON
cmake ..
-DCMAKE_BUILD_TYPE
=
Release
-DWITH_GPU
=
OFF
-DWITH_MKL
=
OFF
-DWITH_DOC
=
ON
-DWITH_STYLE_CHECK
=
OFF
make
-j
`
nproc
`
gen_proto_py
make
-j
`
nproc
`
gen_proto_py
framework_py_proto
make
-j
`
nproc
`
paddle_python
make
-j
`
nproc
`
copy_paddle_pybind
make
-j
`
nproc
`
paddle_docs paddle_docs_cn paddle_api_docs
make
-j
`
nproc
`
paddle_docs paddle_docs_cn paddle_api_docs
# check websites for broken links
# check websites for broken links
...
...
python/paddle/v2/fluid/distribute_transpiler.py
浏览文件 @
11bcb43a
...
@@ -33,6 +33,57 @@ class VarBlock:
...
@@ -33,6 +33,57 @@ class VarBlock:
return
"%s:%d:%d"
%
(
self
.
varname
,
self
.
offset
,
self
.
size
)
return
"%s:%d:%d"
%
(
self
.
varname
,
self
.
offset
,
self
.
size
)
class
UnionFind
(
object
):
""" Union-find data struct.
Union-find is a data struct that keeps track of a set of elements partitioned
into a number of disjoint (non-overlapping) subsets.
Reference:
https://en.wikipedia.org/wiki/Disjoint-set_data_structure
Args:
elements(list): The initialize element list.
"""
def
__init__
(
self
,
elementes
=
None
):
self
.
_parents
=
[]
# index -> parent index
self
.
_index
=
{}
# element -> index
self
.
_curr_idx
=
0
if
not
elementes
:
elementes
=
[]
for
ele
in
elementes
:
self
.
_parents
.
append
(
self
.
_curr_idx
)
self
.
_index
.
update
({
ele
:
self
.
_curr_idx
})
self
.
_curr_idx
+=
1
def
find
(
self
,
x
):
# Find the root index of given element x,
# execute the path compress while findind the root index
if
not
x
in
self
.
_index
:
return
-
1
idx
=
self
.
_index
[
x
]
while
idx
!=
self
.
_parents
[
idx
]:
t
=
self
.
_parents
[
idx
]
self
.
_parents
[
idx
]
=
self
.
_parents
[
t
]
idx
=
t
return
idx
def
union
(
self
,
x
,
y
):
# Union two given element
x_root
=
self
.
find
(
x
)
y_root
=
self
.
find
(
y
)
if
x_root
==
y_root
:
return
self
.
_parents
[
x_root
]
=
y_root
def
is_connected
(
self
,
x
,
y
):
# If two given elements have the same root index,
# then they are connected.
return
self
.
find
(
x
)
==
self
.
find
(
y
)
def
same_or_split_var
(
p_name
,
var_name
):
def
same_or_split_var
(
p_name
,
var_name
):
return
p_name
==
var_name
or
p_name
.
startswith
(
var_name
+
".block"
)
return
p_name
==
var_name
or
p_name
.
startswith
(
var_name
+
".block"
)
...
@@ -203,6 +254,21 @@ class DistributeTranspiler:
...
@@ -203,6 +254,21 @@ class DistributeTranspiler:
(
varname
,
self
.
trainer_id
)
(
varname
,
self
.
trainer_id
)
startup_prog
.
global_block
().
rename_var
(
varname
,
new_var_name
)
startup_prog
.
global_block
().
rename_var
(
varname
,
new_var_name
)
# self.lr_param_mapping = self._create_lr_param_mapping()
# def _create_lr_param_mapping(self):
# lr_mapping = dict()
# for _, opt_op in enumerate(self.optimize_ops):
# if not opt_op.inputs or not opt_op.inputs.has_key("LearningRate") \
# or not opt_op.inputs.has_key("Param"):
# continue
# lr = opt_op.inputs["LearningRate"].name
# param = opt_op.inputs["Param"].name
# if not lr_mapping.has_key(lr):
# lr_mapping.update({lr: list()})
# lr_mapping[lr].append(param)
# return lr_mapping
def
_create_vars_from_blocklist
(
self
,
program
,
block_list
):
def
_create_vars_from_blocklist
(
self
,
program
,
block_list
):
# Create respective variables using the block_list
# Create respective variables using the block_list
block_map
=
dict
()
block_map
=
dict
()
...
@@ -333,50 +399,15 @@ class DistributeTranspiler:
...
@@ -333,50 +399,15 @@ class DistributeTranspiler:
pass
pass
return
orig_shape
return
orig_shape
def
_op_input_var
(
self
,
op
,
varname
):
def
_fetch_var_names
(
self
,
param_dict
):
pass
res
=
[]
if
not
param_dict
:
def
_is_op_on_pserver
(
self
,
endpoint
,
all_ops
,
idx
):
return
res
"""
for
_
,
values
in
param_dict
.
iteritems
():
Recursively check if the op need to run on current server.
if
not
isinstance
(
values
,
list
):
Assume that ops are in the execution order.
values
=
[
values
]
"""
res
+=
[
v
.
name
for
v
in
values
]
param_names
=
[
return
res
p
.
name
for
p
in
self
.
param_grad_ep_mapping
[
endpoint
][
"params"
]
]
op
=
all_ops
[
idx
]
input_names
=
set
(
op
.
input_names
)
# TODO(typhoonzero): using Param and Grad input name to identify
# that the operator is an optimization operator, need a better way.
if
"Param"
in
input_names
:
if
op
.
input
(
"Param"
)[
0
]
in
param_names
:
return
True
else
:
for
n
in
param_names
:
if
same_or_split_var
(
n
,
op
.
input
(
"Param"
)[
0
])
\
and
n
!=
op
.
input
(
"Param"
)[
0
]:
return
True
return
False
else
:
j
=
idx
-
1
while
j
>=
0
:
prev_op
=
all_ops
[
j
]
# NOTE(typhoonzero): consider list input/output
prev_output_names
=
prev_op
.
desc
.
output_arg_names
()
prev_input_names
=
prev_op
.
desc
.
input_arg_names
()
found1
=
False
found2
=
False
for
varname
in
op
.
desc
.
input_arg_names
():
if
varname
in
prev_output_names
:
found1
=
self
.
_is_op_on_pserver
(
endpoint
,
all_ops
,
j
)
# later ops may produce output for prev op's next batch use.
for
varname
in
op
.
desc
.
output_arg_names
():
if
varname
in
prev_input_names
:
found2
=
self
.
_is_op_on_pserver
(
endpoint
,
all_ops
,
j
)
if
found1
or
found2
:
return
True
j
-=
1
return
False
def
_append_pserver_ops
(
self
,
optimize_block
,
opt_op
,
endpoint
):
def
_append_pserver_ops
(
self
,
optimize_block
,
opt_op
,
endpoint
):
program
=
optimize_block
.
program
program
=
optimize_block
.
program
...
@@ -394,11 +425,7 @@ class DistributeTranspiler:
...
@@ -394,11 +425,7 @@ class DistributeTranspiler:
# do not append this op if current endpoint
# do not append this op if current endpoint
# is not dealing with this grad block
# is not dealing with this grad block
return
return
merged_var
=
program
.
global_block
().
create_var
(
merged_var
=
program
.
global_block
().
vars
[
grad_block
.
name
]
name
=
grad_block
.
name
,
persistable
=
grad_block
.
persistable
,
dtype
=
grad_block
.
dtype
,
shape
=
grad_block
.
shape
)
# append merging ops if trainers > 1
# append merging ops if trainers > 1
if
self
.
trainers
>
1
:
if
self
.
trainers
>
1
:
vars2merge
=
self
.
_create_var_for_trainers
(
vars2merge
=
self
.
_create_var_for_trainers
(
...
@@ -429,13 +456,19 @@ class DistributeTranspiler:
...
@@ -429,13 +456,19 @@ class DistributeTranspiler:
shape
=
param_block
.
shape
)
shape
=
param_block
.
shape
)
new_inputs
[
key
]
=
tmpvar
new_inputs
[
key
]
=
tmpvar
elif
key
==
"LearningRate"
:
# leraning rate variable has already be created by non-optimize op,
# don't create it once again.
new_inputs
[
key
]
=
program
.
global_block
().
vars
[
opt_op
.
input
(
key
)[
0
]]
for
key
in
opt_op
.
input_names
:
for
key
in
opt_op
.
input_names
:
if
key
in
[
"Param"
,
"Grad"
]:
new_shape
=
None
if
key
in
[
"Param"
,
"Grad"
,
"LearningRate"
]:
continue
continue
var
=
program
.
global_block
().
vars
[
opt_op
.
input
(
key
)[
0
]]
# update accumulator variable shape
# update accumulator variable shape
param_shape
=
new_inputs
[
"Param"
].
shape
param_shape
=
new_inputs
[
"Param"
].
shape
var
=
program
.
global_block
().
vars
[
opt_op
.
input
(
key
)[
0
]]
new_shape
=
self
.
_get_optimizer_input_shape
(
opt_op
.
type
,
key
,
new_shape
=
self
.
_get_optimizer_input_shape
(
opt_op
.
type
,
key
,
var
.
shape
,
param_shape
)
var
.
shape
,
param_shape
)
tmpvar
=
program
.
global_block
().
create_var
(
tmpvar
=
program
.
global_block
().
create_var
(
...
@@ -446,12 +479,11 @@ class DistributeTranspiler:
...
@@ -446,12 +479,11 @@ class DistributeTranspiler:
new_inputs
[
key
]
=
tmpvar
new_inputs
[
key
]
=
tmpvar
# change output's ParamOut variable
# change output's ParamOut variable
outputs
=
self
.
_get_output_map_from_op
(
program
.
global_block
(),
opt_op
)
opt_op
.
outputs
[
"ParamOut"
]
=
new_inputs
[
"Param"
]
outputs
[
"ParamOut"
]
=
new_inputs
[
"Param"
]
optimize_block
.
append_op
(
optimize_block
.
append_op
(
type
=
opt_op
.
type
,
type
=
opt_op
.
type
,
inputs
=
new_inputs
,
inputs
=
new_inputs
,
outputs
=
outputs
,
outputs
=
o
pt_op
.
o
utputs
,
attrs
=
opt_op
.
attrs
)
attrs
=
opt_op
.
attrs
)
def
_append_pserver_non_opt_ops
(
self
,
optimize_block
,
opt_op
):
def
_append_pserver_non_opt_ops
(
self
,
optimize_block
,
opt_op
):
...
@@ -459,11 +491,10 @@ class DistributeTranspiler:
...
@@ -459,11 +491,10 @@ class DistributeTranspiler:
# Append the ops for parameters that do not need to be optimized/updated
# Append the ops for parameters that do not need to be optimized/updated
inputs
=
self
.
_get_input_map_from_op
(
self
.
program
.
global_block
().
vars
,
inputs
=
self
.
_get_input_map_from_op
(
self
.
program
.
global_block
().
vars
,
opt_op
)
opt_op
)
for
var
in
inputs
.
itervalues
():
for
varlist
in
inputs
.
itervalues
():
if
type
(
var
)
==
list
:
if
not
isinstance
(
varlist
,
list
):
varlist
=
var
varlist
=
[
varlist
]
else
:
varlist
=
[
var
]
for
var
in
varlist
:
for
var
in
varlist
:
if
not
program
.
global_block
().
vars
.
has_key
(
var
.
name
):
if
not
program
.
global_block
().
vars
.
has_key
(
var
.
name
):
program
.
global_block
().
create_var
(
program
.
global_block
().
create_var
(
...
@@ -475,12 +506,70 @@ class DistributeTranspiler:
...
@@ -475,12 +506,70 @@ class DistributeTranspiler:
outputs
=
self
.
_get_output_map_from_op
(
self
.
program
.
global_block
().
vars
,
outputs
=
self
.
_get_output_map_from_op
(
self
.
program
.
global_block
().
vars
,
opt_op
)
opt_op
)
for
varlist
in
outputs
.
itervalues
():
if
not
isinstance
(
varlist
,
list
):
varlist
=
[
varlist
]
for
var
in
varlist
:
program
.
global_block
().
create_var
(
name
=
var
.
name
,
persistable
=
var
.
persistable
,
dtype
=
var
.
dtype
,
shape
=
var
.
shape
)
optimize_block
.
append_op
(
optimize_block
.
append_op
(
type
=
opt_op
.
type
,
type
=
opt_op
.
type
,
inputs
=
inputs
,
inputs
=
inputs
,
outputs
=
outputs
,
outputs
=
outputs
,
attrs
=
opt_op
.
attrs
)
attrs
=
opt_op
.
attrs
)
def
_is_op_connected
(
self
,
op1
,
op2
):
# If one op's input is another op's output or
# one op's output is another op's input, we say
# the two operator is connected.
op1_input_names
=
self
.
_fetch_var_names
(
op1
.
inputs
)
op1_output_names
=
self
.
_fetch_var_names
(
op1
.
outputs
)
op2_input_names
=
self
.
_fetch_var_names
(
op2
.
inputs
)
op2_output_names
=
self
.
_fetch_var_names
(
op2
.
outputs
)
if
set
(
op1_output_names
)
&
set
(
op2_input_names
)
or
\
set
(
op1_input_names
)
&
set
(
op2_output_names
):
return
True
return
False
def
_create_ufind
(
self
,
optimize_ops
):
# Create a unit find data struct by optimize ops
ufind
=
UnionFind
(
optimize_ops
)
for
i
in
xrange
(
len
(
optimize_ops
)):
for
j
in
xrange
(
i
,
len
(
optimize_ops
)):
op1
=
optimize_ops
[
i
]
op2
=
optimize_ops
[
j
]
if
self
.
_is_op_connected
(
op1
,
op2
):
ufind
.
union
(
op1
,
op2
)
return
ufind
def
_is_opt_op
(
self
,
op
):
# NOTE: It's a HACK implement.
# optimize op: SGDOptimize, MomentumOptimizer, AdamOptimizer and etc...
if
"Param"
in
op
.
input_names
and
\
"LearningRate"
in
op
.
input_names
:
return
True
return
False
def
_is_opt_op_on_pserver
(
self
,
endpoint
,
op
):
param_names
=
[
p
.
name
for
p
in
self
.
param_grad_ep_mapping
[
endpoint
][
"params"
]
]
if
op
.
input
(
"Param"
)
in
param_names
:
return
True
else
:
for
n
in
param_names
:
param
=
op
.
input
(
"Param"
)
if
same_or_split_var
(
n
,
param
)
and
n
!=
param
:
return
True
return
False
return
False
def
get_pserver_program
(
self
,
endpoint
):
def
get_pserver_program
(
self
,
endpoint
):
"""
"""
Get pserver side program using the endpoint
Get pserver side program using the endpoint
...
@@ -514,17 +603,30 @@ class DistributeTranspiler:
...
@@ -514,17 +603,30 @@ class DistributeTranspiler:
recv_inputs
.
append
(
var
)
recv_inputs
.
append
(
var
)
# step6
# step6
optimize_block
=
pserver_program
.
create_block
(
0
)
optimize_block
=
pserver_program
.
create_block
(
0
)
# Iterate through the ops and append ops as needed
# step 6.1
for
idx
,
opt_op
in
enumerate
(
self
.
optimize_ops
):
# Create a union-find data struct by optimize ops,
is_op_on_pserver
=
self
.
_is_op_on_pserver
(
endpoint
,
# If two ops are connected, we could add these two ops
self
.
optimize_ops
,
idx
)
# into one set.
if
not
is_op_on_pserver
:
ufind
=
self
.
_create_ufind
(
self
.
optimize_ops
)
continue
# step 6.2
if
"Grad"
in
opt_op
.
desc
.
input_arg_names
():
# Iterate through the ops and append optimize op which
self
.
_append_pserver_ops
(
optimize_block
,
opt_op
,
endpoint
)
# located on current pserver
else
:
opt_op_on_pserver
=
[]
self
.
_append_pserver_non_opt_ops
(
optimize_block
,
opt_op
)
for
_
,
op
in
enumerate
(
self
.
optimize_ops
):
if
self
.
_is_opt_op
(
op
)
and
self
.
_is_opt_op_on_pserver
(
endpoint
,
op
):
opt_op_on_pserver
.
append
(
op
)
# step 6.3
# Iterate through the ops, and if an op and the optimize ops
# which located on current pserver are in one set, then
# append it into the sub program.
for
_
,
op
in
enumerate
(
self
.
optimize_ops
):
for
_
,
opt_op
in
enumerate
(
opt_op_on_pserver
):
if
ufind
.
is_connected
(
op
,
opt_op
):
if
self
.
_is_opt_op
(
op
):
self
.
_append_pserver_ops
(
optimize_block
,
op
,
endpoint
)
else
:
self
.
_append_pserver_non_opt_ops
(
optimize_block
,
op
)
break
# Append the listen_and_serv op
# Append the listen_and_serv op
pserver_program
.
global_block
().
append_op
(
pserver_program
.
global_block
().
append_op
(
type
=
"listen_and_serv"
,
type
=
"listen_and_serv"
,
...
...
python/paddle/v2/fluid/layers/math_op_patch.py
浏览文件 @
11bcb43a
...
@@ -117,6 +117,7 @@ def monkey_patch_variable():
...
@@ -117,6 +117,7 @@ def monkey_patch_variable():
tmp_name
=
unique_tmp_name
()
tmp_name
=
unique_tmp_name
()
out
=
self
.
block
.
create_var
(
name
=
tmp_name
,
dtype
=
lhs_dtype
)
out
=
self
.
block
.
create_var
(
name
=
tmp_name
,
dtype
=
lhs_dtype
)
self
.
block
.
append_op
(
self
.
block
.
append_op
(
type
=
op_type
,
type
=
op_type
,
inputs
=
{
'X'
:
[
self
],
inputs
=
{
'X'
:
[
self
],
...
...
python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py
浏览文件 @
11bcb43a
...
@@ -99,7 +99,7 @@ elif training_role == "TRAINER":
...
@@ -99,7 +99,7 @@ elif training_role == "TRAINER":
exe
.
run
(
fluid
.
default_startup_program
())
exe
.
run
(
fluid
.
default_startup_program
())
for
pass_id
in
range
(
PASS_NUM
):
for
pass_id
in
range
(
PASS_NUM
):
for
data
in
train_reader
():
for
data
in
train_reader
():
avg_cost_np
=
exe
.
run
(
fluid
.
default_main
_program
(),
avg_cost_np
=
exe
.
run
(
t
.
get_trainer
_program
(),
feed
=
feeder
.
feed
(
data
),
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
])
fetch_list
=
[
avg_cost
])
print
(
"avg_cost_np"
,
avg_cost_np
)
print
(
"avg_cost_np"
,
avg_cost_np
)
...
...
python/paddle/v2/fluid/tests/test_cpp_reader.py
浏览文件 @
11bcb43a
...
@@ -64,9 +64,7 @@ exe = fluid.Executor(place)
...
@@ -64,9 +64,7 @@ exe = fluid.Executor(place)
[
res1
,
res2
]
=
exe
.
run
(
prog
,
fetch_list
=
[
out1
,
out2
])
[
res1
,
res2
]
=
exe
.
run
(
prog
,
fetch_list
=
[
out1
,
out2
])
test_pass
=
res1
.
shape
==
(
10
,
2
)
and
res2
.
shape
==
(
10
,
1
)
if
not
(
res1
.
shape
==
(
10
,
2
)
and
res2
.
shape
==
(
10
,
1
)):
if
not
test_pass
:
exit
(
1
)
exit
(
1
)
exit
(
0
)
exit
(
0
)
python/paddle/v2/fluid/tests/test_sequence_expand.py
浏览文件 @
11bcb43a
...
@@ -73,5 +73,20 @@ class TestSequenceExpandCase3(TestSequenceExpand):
...
@@ -73,5 +73,20 @@ class TestSequenceExpandCase3(TestSequenceExpand):
self
.
inputs
=
{
'X'
:
(
x_data
,
x_lod
),
'Y'
:
(
y_data
,
y_lod
)}
self
.
inputs
=
{
'X'
:
(
x_data
,
x_lod
),
'Y'
:
(
y_data
,
y_lod
)}
class
TestSequenceExpandCase4
(
TestSequenceExpand
):
def
set_data
(
self
):
x_data
=
np
.
array
(
[
0.1
,
0.3
,
0.2
,
0.15
,
0.25
,
0.2
,
0.15
,
0.25
,
0.1
,
0.3
]).
reshape
(
[
2
,
5
]).
astype
(
'float32'
)
x_lod
=
[[
0
,
1
,
2
,
]]
y_data
=
np
.
random
.
uniform
(
0.1
,
1
,
[
2
,
1
]).
astype
(
'float32'
)
y_lod
=
[[
0
,
1
,
2
],
[
0
,
1
,
2
]]
self
.
inputs
=
{
'X'
:
(
x_data
,
x_lod
),
'Y'
:
(
y_data
,
y_lod
)}
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/v2/fluid/tests/test_split_op.py
浏览文件 @
11bcb43a
...
@@ -20,11 +20,11 @@ from op_test import OpTest
...
@@ -20,11 +20,11 @@ from op_test import OpTest
class
TestSplitOp
(
OpTest
):
class
TestSplitOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"split"
self
.
op_type
=
"split"
axis
=
0
axis
=
1
x
=
np
.
random
.
random
((
4
,
2
,
5
)).
astype
(
'float32'
)
x
=
np
.
random
.
random
((
4
,
5
,
6
)).
astype
(
'float32'
)
out
=
np
.
split
(
x
,
[
1
,
3
],
axis
)
out
=
np
.
split
(
x
,
[
2
,
3
],
axis
)
self
.
inputs
=
{
'X'
:
x
}
self
.
inputs
=
{
'X'
:
x
}
self
.
attrs
=
{
'axis'
:
axis
,
'sections'
:
[
1
,
2
,
1
]}
self
.
attrs
=
{
'axis'
:
axis
,
'sections'
:
[
2
,
1
,
2
]}
self
.
outputs
=
{
'Out'
:
[(
'out%d'
%
i
,
out
[
i
])
\
self
.
outputs
=
{
'Out'
:
[(
'out%d'
%
i
,
out
[
i
])
\
for
i
in
xrange
(
len
(
out
))]}
for
i
in
xrange
(
len
(
out
))]}
...
...
python/paddle/v2/fluid/tests/test_target_assign_op.py
浏览文件 @
11bcb43a
...
@@ -43,7 +43,7 @@ def gen_match_and_neg_indices(num_prior, gt_lod, neg_lod):
...
@@ -43,7 +43,7 @@ def gen_match_and_neg_indices(num_prior, gt_lod, neg_lod):
def
target_assign
(
encoded_box
,
gt_label
,
match_indices
,
neg_indices
,
gt_lod
,
def
target_assign
(
encoded_box
,
gt_label
,
match_indices
,
neg_indices
,
gt_lod
,
neg_lod
,
background_label
):
neg_lod
,
mismatch_value
):
batch_size
,
num_prior
=
match_indices
.
shape
batch_size
,
num_prior
=
match_indices
.
shape
# init target bbox
# init target bbox
...
@@ -52,7 +52,7 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod,
...
@@ -52,7 +52,7 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod,
trg_box_wt
=
np
.
zeros
((
batch_size
,
num_prior
,
1
)).
astype
(
'float32'
)
trg_box_wt
=
np
.
zeros
((
batch_size
,
num_prior
,
1
)).
astype
(
'float32'
)
# init target label
# init target label
trg_label
=
np
.
ones
((
batch_size
,
num_prior
,
1
)).
astype
(
'int32'
)
trg_label
=
np
.
ones
((
batch_size
,
num_prior
,
1
)).
astype
(
'int32'
)
trg_label
=
trg_label
*
background_label
trg_label
=
trg_label
*
mismatch_value
# init weight for target label
# init weight for target label
trg_label_wt
=
np
.
zeros
((
batch_size
,
num_prior
,
1
)).
astype
(
'float32'
)
trg_label_wt
=
np
.
zeros
((
batch_size
,
num_prior
,
1
)).
astype
(
'float32'
)
...
@@ -65,53 +65,90 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod,
...
@@ -65,53 +65,90 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod,
# target bbox
# target bbox
for
v
,
c
in
zip
(
col_val
+
gt_start
,
col_ids
[
0
].
tolist
()):
for
v
,
c
in
zip
(
col_val
+
gt_start
,
col_ids
[
0
].
tolist
()):
trg_box
[
i
][
c
][:]
=
encoded_box
[
v
][
c
][:]
trg_box
[
i
][
c
][:]
=
encoded_box
[
v
][
c
][:]
# weight for target bbox
# weight for target bbox
trg_box_wt
[
i
][
col_ids
]
=
1.0
trg_box_wt
[
i
][
col_ids
]
=
1.0
trg_label
[
i
][
col_ids
]
=
gt_label
[
col_val
+
gt_start
]
trg_label
[
i
][
col_ids
]
=
gt_label
[
col_val
+
gt_start
]
trg_label_wt
[
i
][
col_ids
]
=
1.0
trg_label_wt
[
i
][
col_ids
]
=
1.0
# set target label weight to 1.0 for the negative samples
# set target label weight to 1.0 for the negative samples
neg_ids
=
neg_indices
[
neg_lod
[
i
]:
neg_lod
[
i
+
1
]]
if
neg_indices
is
not
None
:
trg_label_wt
[
i
][
neg_ids
]
=
1.0
neg_ids
=
neg_indices
[
neg_lod
[
i
]:
neg_lod
[
i
+
1
]]
trg_label_wt
[
i
][
neg_ids
]
=
1.0
return
trg_box
,
trg_box_wt
,
trg_label
,
trg_label_wt
return
trg_box
,
trg_box_wt
,
trg_label
,
trg_label_wt
class
TestTargetAssgin
Op
(
OpTest
):
class
TestTargetAssgin
FloatType
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"target_assign"
self
.
op_type
=
"target_assign"
num_prior
=
120
num_class
=
21
gt_lod
=
[
0
,
5
,
11
,
23
]
neg_lod
=
[
0
,
4
,
7
,
13
]
mismatch_value
=
0
batch_size
=
len
(
gt_lod
)
-
1
num_gt
=
gt_lod
[
-
1
]
encoded_box
=
np
.
random
.
random
((
num_gt
,
num_prior
,
4
)).
astype
(
'float32'
)
gt_label
=
np
.
random
.
randint
(
num_class
,
size
=
(
num_gt
,
1
)).
astype
(
'int32'
)
match_indices
,
neg_indices
=
gen_match_and_neg_indices
(
num_prior
,
gt_lod
,
neg_lod
)
out
,
out_wt
,
_
,
_
=
target_assign
(
encoded_box
,
gt_label
,
match_indices
,
neg_indices
,
gt_lod
,
neg_lod
,
mismatch_value
)
# assign regression targets
x
=
encoded_box
self
.
inputs
=
{
'X'
:
(
x
,
[
gt_lod
]),
'MatchIndices'
:
match_indices
,
}
self
.
attrs
=
{
'mismatch_value'
:
mismatch_value
}
self
.
outputs
=
{
'Out'
:
out
,
'OutWeight'
:
out_wt
,
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestTargetAssginIntType
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"target_assign"
num_prior
=
120
num_prior
=
120
num_class
=
21
num_class
=
21
gt_lod
=
[
0
,
5
,
11
,
23
]
gt_lod
=
[
0
,
5
,
11
,
23
]
neg_lod
=
[
0
,
4
,
7
,
13
]
neg_lod
=
[
0
,
4
,
7
,
13
]
mismatch_value
=
0
batch_size
=
len
(
gt_lod
)
-
1
batch_size
=
len
(
gt_lod
)
-
1
num_gt
=
gt_lod
[
-
1
]
num_gt
=
gt_lod
[
-
1
]
background_label
=
0
encoded_box
=
np
.
random
.
random
((
num_gt
,
num_prior
,
4
)).
astype
(
'float32'
)
encoded_box
=
np
.
random
.
random
((
num_gt
,
num_prior
,
4
)).
astype
(
'float32'
)
gt_label
=
np
.
random
.
randint
(
gt_label
=
np
.
random
.
randint
(
num_class
,
size
=
(
num_gt
,
1
)).
astype
(
'int32'
)
num_class
,
size
=
(
num_gt
,
1
)).
astype
(
'int32'
)
match_indices
,
neg_indices
=
gen_match_and_neg_indices
(
num_prior
,
match_indices
,
neg_indices
=
gen_match_and_neg_indices
(
num_prior
,
gt_lod
,
neg_lod
)
gt_lod
,
neg_lod
)
trg_box
,
trg_box_wt
,
trg_label
,
trg_label_wt
=
target_assign
(
encoded_box
,
gt_label
,
match_indices
,
neg_indices
,
gt_lod
,
neg_lod
,
background_label
)
_
,
_
,
out
,
out_wt
,
=
target_assign
(
encoded_box
,
gt_label
,
match_indices
,
neg_indices
,
gt_lod
,
neg_lod
,
mismatch_value
)
# assign cassification argets
x
=
np
.
reshape
(
gt_label
,
(
num_gt
,
1
,
1
))
self
.
inputs
=
{
self
.
inputs
=
{
'EncodedGTBBox'
:
(
encoded_box
,
[
gt_lod
]),
'X'
:
(
x
,
[
gt_lod
]),
'GTScoreLabel'
:
(
gt_label
,
[
gt_lod
]),
'MatchIndices'
:
match_indices
,
'MatchIndices'
:
(
match_indices
),
'NegIndices'
:
(
neg_indices
,
[
neg_lod
]),
'NegIndices'
:
(
neg_indices
,
[
neg_lod
]),
}
}
self
.
attrs
=
{
'
background_label'
:
background_label
}
self
.
attrs
=
{
'
mismatch_value'
:
mismatch_value
}
self
.
outputs
=
{
self
.
outputs
=
{
'PredBBoxLabel'
:
(
trg_box
),
'Out'
:
out
,
'PredBBoxWeight'
:
(
trg_box_wt
),
'OutWeight'
:
out_wt
,
'PredScoreLabel'
:
(
trg_label
),
'PredScoreWeight'
:
(
trg_label_wt
),
}
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录