Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
11bcb43a
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
11bcb43a
编写于
2月 11, 2018
作者:
T
typhoonzero
浏览文件
操作
浏览文件
下载
差异文件
fix merge issue
上级
a8b630c8
4f4abfa3
变更
43
隐藏空白更改
内联
并排
Showing
43 changed file
with
612 addition
and
465 deletion
+612
-465
AUTHORS.md
AUTHORS.md
+1
-1
doc/templates/conf.py.cn.in
doc/templates/conf.py.cn.in
+1
-1
doc/templates/conf.py.en.in
doc/templates/conf.py.en.in
+1
-1
paddle/CMakeLists.txt
paddle/CMakeLists.txt
+0
-1
paddle/fluid/CMakeLists.txt
paddle/fluid/CMakeLists.txt
+1
-0
paddle/fluid/framework/ddim.cc
paddle/fluid/framework/ddim.cc
+10
-0
paddle/fluid/framework/ddim.h
paddle/fluid/framework/ddim.h
+2
-0
paddle/fluid/framework/init.cc
paddle/fluid/framework/init.cc
+1
-1
paddle/fluid/framework/mixed_vector.h
paddle/fluid/framework/mixed_vector.h
+9
-4
paddle/fluid/framework/mixed_vector_test.cu
paddle/fluid/framework/mixed_vector_test.cu
+11
-4
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+1
-1
paddle/fluid/operators/concat_op.h
paddle/fluid/operators/concat_op.h
+19
-19
paddle/fluid/operators/listen_and_serv_op.cc
paddle/fluid/operators/listen_and_serv_op.cc
+4
-4
paddle/fluid/operators/send_recv_op_test.cc
paddle/fluid/operators/send_recv_op_test.cc
+1
-1
paddle/fluid/operators/sequence_expand_op.cc
paddle/fluid/operators/sequence_expand_op.cc
+3
-1
paddle/fluid/operators/split_op.h
paddle/fluid/operators/split_op.h
+10
-9
paddle/fluid/operators/strided_memcpy.h
paddle/fluid/operators/strided_memcpy.h
+57
-0
paddle/fluid/operators/target_assign_op.cc
paddle/fluid/operators/target_assign_op.cc
+76
-117
paddle/fluid/operators/target_assign_op.cu
paddle/fluid/operators/target_assign_op.cu
+22
-20
paddle/fluid/operators/target_assign_op.h
paddle/fluid/operators/target_assign_op.h
+71
-98
paddle/fluid/platform/cpu_info_test.cc
paddle/fluid/platform/cpu_info_test.cc
+1
-1
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+2
-2
paddle/fluid/platform/enforce_test.cc
paddle/fluid/platform/enforce_test.cc
+1
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+1
-1
paddle/fluid/string/.clang-format
paddle/fluid/string/.clang-format
+0
-0
paddle/fluid/string/CMakeLists.txt
paddle/fluid/string/CMakeLists.txt
+0
-0
paddle/fluid/string/piece.cc
paddle/fluid/string/piece.cc
+1
-1
paddle/fluid/string/piece.h
paddle/fluid/string/piece.h
+2
-2
paddle/fluid/string/piece_test.cc
paddle/fluid/string/piece_test.cc
+1
-1
paddle/fluid/string/printf.h
paddle/fluid/string/printf.h
+1
-1
paddle/fluid/string/printf_test.cc
paddle/fluid/string/printf_test.cc
+3
-3
paddle/fluid/string/tinyformat/tinyformat.h
paddle/fluid/string/tinyformat/tinyformat.h
+41
-65
paddle/fluid/string/to_string.h
paddle/fluid/string/to_string.h
+0
-0
paddle/fluid/string/to_string_test.cc
paddle/fluid/string/to_string_test.cc
+2
-2
paddle/scripts/docker/build.sh
paddle/scripts/docker/build.sh
+2
-2
paddle/scripts/travis/build_doc.sh
paddle/scripts/travis/build_doc.sh
+3
-3
python/paddle/v2/fluid/distribute_transpiler.py
python/paddle/v2/fluid/distribute_transpiler.py
+172
-70
python/paddle/v2/fluid/layers/math_op_patch.py
python/paddle/v2/fluid/layers/math_op_patch.py
+1
-0
python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py
...le/v2/fluid/tests/book_distribute/notest_dist_word2vec.py
+1
-1
python/paddle/v2/fluid/tests/test_cpp_reader.py
python/paddle/v2/fluid/tests/test_cpp_reader.py
+1
-3
python/paddle/v2/fluid/tests/test_sequence_expand.py
python/paddle/v2/fluid/tests/test_sequence_expand.py
+15
-0
python/paddle/v2/fluid/tests/test_split_op.py
python/paddle/v2/fluid/tests/test_split_op.py
+4
-4
python/paddle/v2/fluid/tests/test_target_assign_op.py
python/paddle/v2/fluid/tests/test_target_assign_op.py
+56
-19
未找到文件。
AUTHORS.md
浏览文件 @
11bcb43a
...
...
@@ -2,7 +2,7 @@
|---|---|
| backyes | Yan-Fei Wang |
| beckett1124 | Bin Qi |
|
Canpio
| Jia-Yi Feng |
|
JiayiFeng
| Jia-Yi Feng |
| chengxiaohua1105 | Xiao-Hua Cheng |
| cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang |
| cxysteven | Xing-Yi Cheng |
...
...
doc/templates/conf.py.cn.in
浏览文件 @
11bcb43a
...
...
@@ -82,7 +82,7 @@ language = 'zh_CN'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build', '**/*_en*', '*_en*']
exclude_patterns = ['_build', '**/*_en*', '*_en*'
, 'api/*'
]
# The reST default role (used for this markup: `text`) to use for all
# documents.
...
...
doc/templates/conf.py.en.in
浏览文件 @
11bcb43a
...
...
@@ -82,7 +82,7 @@ language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build', '**/*_cn*', '*_cn*']
exclude_patterns = ['_build', '**/*_cn*', '*_cn*'
, 'api/*'
]
# The reST default role (used for this markup: `text`) to use for all
# documents.
...
...
paddle/CMakeLists.txt
浏览文件 @
11bcb43a
...
...
@@ -11,7 +11,6 @@ if(MOBILE_INFERENCE)
else
()
add_subdirectory
(
pserver
)
add_subdirectory
(
trainer
)
add_subdirectory
(
string
)
add_subdirectory
(
scripts
)
if
(
WITH_C_API
)
...
...
paddle/fluid/CMakeLists.txt
浏览文件 @
11bcb43a
...
...
@@ -4,3 +4,4 @@ add_subdirectory(framework)
add_subdirectory
(
operators
)
add_subdirectory
(
pybind
)
add_subdirectory
(
inference
)
add_subdirectory
(
string
)
paddle/fluid/framework/ddim.cc
浏览文件 @
11bcb43a
...
...
@@ -314,5 +314,15 @@ DDim stride(const DDim& ddim) {
}
return
framework
::
make_ddim
(
strides
);
}
DDim
stride_numel
(
const
framework
::
DDim
&
ddim
)
{
std
::
vector
<
int64_t
>
strides
(
ddim
.
size
());
strides
[
ddim
.
size
()
-
1
]
=
ddim
[
ddim
.
size
()
-
1
];
for
(
int
i
=
ddim
.
size
()
-
2
;
i
>=
0
;
--
i
)
{
strides
[
i
]
=
strides
[
i
+
1
]
*
ddim
[
i
];
}
return
framework
::
make_ddim
(
strides
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ddim.h
浏览文件 @
11bcb43a
...
...
@@ -125,6 +125,8 @@ DDim flatten_to_2d(const DDim& src, int num_col_dims);
DDim
flatten_to_1d
(
const
DDim
&
src
);
DDim
stride
(
const
DDim
&
ddim
);
DDim
stride_numel
(
const
DDim
&
ddim
);
}
// namespace framework
}
// namespace paddle
...
...
paddle/fluid/framework/init.cc
浏览文件 @
11bcb43a
...
...
@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/string/piece.h"
#include "paddle/
fluid/
string/piece.h"
namespace
paddle
{
namespace
framework
{
...
...
paddle/fluid/framework/mixed_vector.h
浏览文件 @
11bcb43a
...
...
@@ -37,9 +37,8 @@ class Vector {
// Fill vector with value. The vector size is `count`.
explicit
Vector
(
size_t
count
,
const
T
&
value
=
T
())
{
if
(
count
==
0
)
{
InitEmpty
();
}
else
{
InitEmpty
();
if
(
count
!=
0
)
{
resize
(
count
);
T
*
ptr
=
begin
();
for
(
size_t
i
=
0
;
i
<
count
;
++
i
)
{
...
...
@@ -122,6 +121,10 @@ class Vector {
const
T
*
begin
()
const
{
return
&
this
->
operator
[](
0
);
}
const
T
*
end
()
const
{
return
&
this
->
operator
[](
size
());
}
const
T
*
cbegin
()
const
{
return
begin
();
}
const
T
*
cend
()
const
{
return
end
();
}
const
T
&
back
()
const
{
auto
it
=
end
();
--
it
;
...
...
@@ -244,7 +247,9 @@ class Vector {
bool
operator
==
(
const
Vector
<
T
>&
other
)
const
{
if
(
size
()
!=
other
.
size
())
return
false
;
for
(
auto
it1
=
begin
(),
it2
=
other
.
begin
();
it1
<
end
();
++
it1
,
++
it2
)
{
auto
it1
=
cbegin
();
auto
it2
=
other
.
cbegin
();
for
(;
it1
<
cend
();
++
it1
,
++
it2
)
{
if
(
*
it1
!=
*
it2
)
{
return
false
;
}
...
...
paddle/fluid/framework/mixed_vector_test.cu
浏览文件 @
11bcb43a
...
...
@@ -26,10 +26,10 @@ TEST(mixed_vector, CPU_VECTOR) {
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
tmp
.
push_back
(
i
);
}
ASSERT_EQ
(
tmp
.
size
(),
10
);
ASSERT_EQ
(
tmp
.
size
(),
10
UL
);
vec
<
int
>
tmp2
;
tmp2
=
tmp
;
ASSERT_EQ
(
tmp2
.
size
(),
10
);
ASSERT_EQ
(
tmp2
.
size
(),
10
UL
);
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
ASSERT_EQ
(
tmp2
[
i
],
i
);
ASSERT_EQ
(
tmp2
[
i
],
tmp
[
i
]);
...
...
@@ -58,7 +58,7 @@ TEST(mixed_vector, GPU_VECTOR) {
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
tmp
.
push_back
(
i
);
}
ASSERT_EQ
(
tmp
.
size
(),
10
);
ASSERT_EQ
(
tmp
.
size
(),
10
UL
);
paddle
::
platform
::
CUDAPlace
gpu
(
0
);
multiply_10
<<<
1
,
1
,
0
,
GetCUDAStream
(
gpu
)
>>>
(
tmp
.
MutableData
(
gpu
));
...
...
@@ -79,7 +79,7 @@ TEST(mixed_vector, MultiGPU) {
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
tmp
.
push_back
(
i
);
}
ASSERT_EQ
(
tmp
.
size
(),
10
);
ASSERT_EQ
(
tmp
.
size
(),
10
UL
);
paddle
::
platform
::
CUDAPlace
gpu0
(
0
);
paddle
::
platform
::
SetDeviceId
(
0
);
multiply_10
<<<
1
,
1
,
0
,
GetCUDAStream
(
gpu0
)
>>>
(
tmp
.
MutableData
(
gpu0
));
...
...
@@ -91,3 +91,10 @@ TEST(mixed_vector, MultiGPU) {
ASSERT_EQ
(
tmp
[
i
],
i
*
100
);
}
}
TEST
(
mixed_vector
,
InitWithCount
)
{
paddle
::
framework
::
Vector
<
int
>
vec
(
10
,
10
);
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
ASSERT_EQ
(
vec
[
i
],
10
);
}
}
paddle/fluid/framework/scope.cc
浏览文件 @
11bcb43a
...
...
@@ -18,7 +18,7 @@ limitations under the License. */
#include <mutex> // for call_once
#include "glog/logging.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/string/printf.h"
#include "paddle/
fluid/
string/printf.h"
DEFINE_bool
(
benchmark
,
false
,
"Doing memory benchmark. It will make deleting scope synchronized, "
...
...
paddle/fluid/operators/concat_op.h
浏览文件 @
11bcb43a
...
...
@@ -28,17 +28,18 @@ class ConcatKernel : public framework::OpKernel<T> {
auto
ins
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
int64_t
axis
=
static_cast
<
int64_t
>
(
ctx
.
Attr
<
int
>
(
"axis"
));
const
size_t
n
=
ins
.
size
();
auto
place
=
ctx
.
GetPlace
();
out
->
mutable_data
<
T
>
(
place
);
auto
out_stride
=
framework
::
stride_numel
(
out
->
dims
());
size_t
output_offset
=
0
;
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
out_stride
=
framework
::
stride
(
out
->
dims
());
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
auto
&
in
=
ins
[
i
];
auto
axis_dim
=
in
->
dims
()[
axis
];
auto
in_stride
=
framework
::
stride
(
in
->
dims
());
StridedMemcpy
<
T
>
(
ctx
.
device_context
(),
in
->
data
<
T
>
(),
in_stride
,
in
->
dims
(),
out_stride
,
out
->
data
<
T
>
()
+
output_offset
);
output_offset
+=
axis_dim
*
in_stride
[
axis
];
for
(
auto
*
in
:
ins
)
{
auto
in_stride
=
framework
::
stride_numel
(
in
->
dims
());
StridedNumelCopyWithAxis
<
T
>
(
ctx
.
device_context
(),
axis
,
out
->
data
<
T
>
()
+
output_offset
,
out_stride
,
in
->
data
<
T
>
(),
in_stride
);
output_offset
+=
in_stride
[
axis
];
}
}
};
...
...
@@ -50,17 +51,16 @@ class ConcatGradKernel : public framework::OpKernel<T> {
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
outs
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
int64_t
axis
=
static_cast
<
int64_t
>
(
ctx
.
Attr
<
int
>
(
"axis"
));
const
size_t
n
=
outs
.
size
();
size_t
input_offset
=
0
;
auto
in_stride
=
framework
::
stride
(
in
->
dims
());
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
auto
&
out
=
outs
[
i
];
auto
in_stride
=
framework
::
stride
_numel
(
in
->
dims
());
for
(
auto
&
out
:
outs
)
{
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
size_t
axis_dim
=
out
->
dims
()[
axis
]
;
auto
out_stride
=
framework
::
stride
(
out
->
dims
());
StridedMemcpy
<
T
>
(
ctx
.
device_context
()
,
in
->
data
<
T
>
()
+
input_offset
,
in_stride
,
out
->
dims
(),
out_stride
,
out
->
data
<
T
>
()
);
input_offset
+=
axis_dim
*
in
_stride
[
axis
];
auto
out_stride
=
framework
::
stride_numel
(
out
->
dims
())
;
StridedNumelCopyWithAxis
<
T
>
(
ctx
.
device_context
(),
axis
,
out
->
data
<
T
>
(),
out_stride
,
in
->
data
<
T
>
()
+
input_offset
,
in_stride
);
input_offset
+=
out
_stride
[
axis
];
}
}
};
...
...
paddle/fluid/operators/listen_and_serv_op.cc
浏览文件 @
11bcb43a
...
...
@@ -27,7 +27,7 @@ limitations under the License. */
#include "paddle/fluid/operators/detail/grpc_server.h"
#include "paddle/fluid/operators/detail/sendrecvop_utils.h"
#include "paddle/fluid/operators/detail/simple_block_queue.h"
#include "paddle/string/printf.h"
#include "paddle/
fluid/
string/printf.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -98,6 +98,7 @@ class ListenAndServOp : public framework::OperatorBase {
// the gradients arrives, just add suffix 0~n and merge the gradient.
rpc_service_
->
SetCond
(
0
);
size_t
recv_var_cnt
=
0
;
size_t
update_param_cnt
=
0
;
int
batch_barrier
=
0
;
while
(
batch_barrier
!=
fan_in
)
{
const
detail
::
MessageWithName
&
v
=
rpc_service_
->
Get
();
...
...
@@ -122,11 +123,10 @@ class ListenAndServOp : public framework::OperatorBase {
}
}
VLOG
(
3
)
<<
"recv "
<<
recv_var_cnt
<<
" parmeters for one barrier."
;
// TODO(Yancey1989): merge SelectedRows variables here
if
(
exit_flag
)
{
rpc_service_
->
ShutDown
();
}
VLOG
(
3
)
<<
"run optimize graph..."
;
try
{
executor
.
Run
(
*
program
,
&
recv_scope
,
block
->
ID
(),
/*global_block*/
false
/*create_local_scope*/
,
false
/*create_vars*/
);
...
...
@@ -134,7 +134,7 @@ class ListenAndServOp : public framework::OperatorBase {
LOG
(
ERROR
)
<<
"run sub program error "
<<
e
.
what
();
}
rpc_service_
->
SetCond
(
1
);
rpc_service_
->
WaitClientGet
(
recv_var
_cnt
);
rpc_service_
->
WaitClientGet
(
update_param
_cnt
);
grads_counter_
.
clear
();
}
// while(true)
}
...
...
paddle/fluid/operators/send_recv_op_test.cc
浏览文件 @
11bcb43a
...
...
@@ -22,7 +22,7 @@ limitations under the License. */
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/string/printf.h"
#include "paddle/
fluid/
string/printf.h"
USE_NO_KERNEL_OP
(
send
);
USE_NO_KERNEL_OP
(
listen_and_serv
);
...
...
paddle/fluid/operators/sequence_expand_op.cc
浏览文件 @
11bcb43a
...
...
@@ -29,7 +29,9 @@ class SequenceExpandOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Y"
));
framework
::
DDim
out_dim
;
out_dim
=
ctx
->
GetInputDim
(
"Y"
);
auto
y_dim
=
ctx
->
GetInputDim
(
"Y"
);
out_dim
=
ctx
->
GetInputDim
(
"X"
);
out_dim
[
0
]
=
y_dim
[
0
];
ctx
->
ShareLoD
(
"Y"
,
"Out"
);
ctx
->
SetOutputDim
(
"Out"
,
out_dim
);
}
...
...
paddle/fluid/operators/split_op.h
浏览文件 @
11bcb43a
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <chrono>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/strided_memcpy.h"
...
...
@@ -27,18 +28,18 @@ class SplitOpKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
outs
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
"Out"
);
auto
in_stride
=
framework
::
stride
(
in
->
dims
());
auto
in_stride
=
framework
::
stride
_numel
(
in
->
dims
());
int64_t
axis
=
static_cast
<
int64_t
>
(
ctx
.
Attr
<
int
>
(
"axis"
));
const
size_t
n
=
outs
.
size
();
auto
place
=
ctx
.
GetPlace
();
size_t
input_offset
=
0
;
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
auto
&
out
=
outs
[
i
];
for
(
auto
&
out
:
outs
)
{
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
size_t
axis_dim
=
out
->
dims
()[
axis
]
;
auto
out_stride
=
framework
::
stride
(
out
->
dims
());
StridedMemcpy
<
T
>
(
ctx
.
device_context
()
,
in
->
data
<
T
>
()
+
input_offset
,
in_stride
,
out
->
dims
(),
out_stride
,
out
->
data
<
T
>
()
);
input_offset
+=
axis_dim
*
in
_stride
[
axis
];
auto
out_stride
=
framework
::
stride_numel
(
out
->
dims
())
;
StridedNumelCopyWithAxis
<
T
>
(
ctx
.
device_context
(),
axis
,
out
->
data
<
T
>
(),
out_stride
,
in
->
data
<
T
>
()
+
input_offset
,
in_stride
);
input_offset
+=
out
_stride
[
axis
];
}
}
};
...
...
paddle/fluid/operators/strided_memcpy.h
浏览文件 @
11bcb43a
...
...
@@ -41,5 +41,62 @@ inline void StridedMemcpy(const platform::DeviceContext& dev_ctx, const T* src,
StridedCopyDimVisitor
<
T
>
func
(
dev_ctx
,
src
,
src_stride
,
dst_stride
,
dst
);
boost
::
apply_visitor
(
func
,
dst_dim
);
}
// Strided numel memory copy from src to dst by the specified axis
//
// For example, for a tensor dims [4, 20, 100], the strieded numel is
// [8000, 2000, 100]
//
// NOTE: The src and dst tensor should have the same elements
// except the specified axis.
template
<
typename
T
>
inline
void
StridedNumelCopyWithAxis
(
const
platform
::
DeviceContext
&
ctx
,
int64_t
axis
,
T
*
dst
,
const
framework
::
DDim
&
dst_stride_numel
,
const
T
*
src
,
const
framework
::
DDim
&
src_stride_numel
)
{
int64_t
before
=
dst_stride_numel
[
0
]
/
dst_stride_numel
[
axis
];
int64_t
src_after
=
src_stride_numel
[
axis
];
int64_t
dst_after
=
dst_stride_numel
[
axis
];
auto
place
=
ctx
.
GetPlace
();
PADDLE_ENFORCE_EQ
(
src_stride_numel
.
size
(),
dst_stride_numel
.
size
(),
"src and dst tensor should have the same dims size."
);
for
(
int64_t
i
=
0
;
i
<
axis
;
++
i
)
{
if
(
i
<
axis
)
{
PADDLE_ENFORCE_EQ
(
src_stride_numel
[
i
]
/
src_stride_numel
[
axis
],
dst_stride_numel
[
i
]
/
dst_stride_numel
[
axis
],
"src and dst should have the same elements "
"except the specified axis."
);
}
else
if
(
i
==
axis
)
{
continue
;
}
else
{
PADDLE_ENFORCE_EQ
(
src_stride_numel
[
i
],
dst_stride_numel
[
i
],
"src and dst should have the same elements "
"except the specified axis."
);
}
}
for
(
int64_t
i
=
0
;
i
<
before
;
++
i
)
{
if
(
platform
::
is_cpu_place
(
place
))
{
auto
&
cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
place
);
memory
::
Copy
(
cpu_place
,
dst
+
i
*
dst_after
,
cpu_place
,
src
+
i
*
src_after
,
sizeof
(
T
)
*
src_after
);
}
else
{
#ifdef PADDLE_WITH_CUDA
auto
&
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
);
auto
&
cuda_ctx
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
);
memory
::
Copy
(
gpu_place
,
dst
+
i
*
dst_after
,
gpu_place
,
src
+
i
*
src_after
,
sizeof
(
T
)
*
src_after
,
cuda_ctx
.
stream
());
#else
PADDLE_THROW
(
"Paddle is not compiled with GPU"
);
#endif
}
}
}
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/target_assign_op.cc
浏览文件 @
11bcb43a
...
...
@@ -22,69 +22,43 @@ class TargetAssignOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
// checkout inputs
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"EncodedGTBBox"
),
"Input(EncodedGTBBox) of TargetAssignOp should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"GTScoreLabel"
),
"Input(GTScoreLabel) of TargetAssignOp should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of TargetAssignOp should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"MatchIndices"
),
"Input(MatchIndices) of TargetAssignOp should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"NegIndices"
),
"Input(NegIndices) of TargetAssignOp should not be null"
);
// checkout outputs
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"PredBBoxLabel"
),
"Output(PredBBoxLabel) of TargetAssignOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"PredBBoxWeight"
),
"Output(PredBBoxWeight) of TargetAssignOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"PredScoreLabel"
),
"Output(PredScoreLabel) of TargetAssignOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"PredScoreWeight"
),
"Output(PredScoreWeight) of TargetAssignOp should not be null."
);
auto
blabel_dims
=
ctx
->
GetInputDim
(
"EncodedGTBBox"
);
auto
slabel_dims
=
ctx
->
GetInputDim
(
"GTScoreLabel"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of TargetAssignOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"OutWeight"
),
"Output(OutWeight) of TargetAssignOp should not be null."
);
auto
in_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
mi_dims
=
ctx
->
GetInputDim
(
"MatchIndices"
);
auto
neg_dims
=
ctx
->
GetInputDim
(
"NegIndices"
);
PADDLE_ENFORCE_EQ
(
blabel_dims
.
size
(),
3UL
,
"The rank of Input(EncodedGTBBox) must be 3."
);
PADDLE_ENFORCE_EQ
(
slabel_dims
.
size
(),
2UL
,
"The rank of Input(GTScoreLabel) must be 2."
);
PADDLE_ENFORCE_EQ
(
mi_dims
.
size
(),
2UL
,
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
3
,
"The rank of Input(X) must be 3."
);
PADDLE_ENFORCE_EQ
(
mi_dims
.
size
(),
2
,
"The rank of Input(MatchIndices) must be 2."
);
PADDLE_ENFORCE_EQ
(
neg_dims
.
size
(),
2UL
,
"The rank of Input(NegIndices) must be 2."
);
PADDLE_ENFORCE_EQ
(
blabel_dims
[
0
],
slabel_dims
[
0
],
"The 1st dimension (means the total number of "
"ground-truth bounding boxes) of Input(EncodedGTBBox) "
"and Input(GTScoreLabel) must be the same."
);
PADDLE_ENFORCE_EQ
(
blabel_dims
[
1
],
mi_dims
[
1
],
"The 2nd dimension (means the number of priod boxes) "
"of Input(EncodedGTBBox) and "
"Input(MatchIndices) must be the same."
);
PADDLE_ENFORCE_EQ
(
blabel_dims
[
2
],
4
,
"The 3rd dimension of Input(EncodedGTBBox) must be 4."
);
if
(
ctx
->
HasInput
(
"NegIndices"
))
{
auto
neg_dims
=
ctx
->
GetInputDim
(
"NegIndices"
);
PADDLE_ENFORCE_EQ
(
neg_dims
.
size
(),
2
,
"The rank of Input(NegIndices) must be 2."
);
PADDLE_ENFORCE_EQ
(
neg_dims
[
1
],
1
,
"The last dimenstion of Out(NegIndices) must be 1."
);
}
auto
n
=
mi_dims
[
0
];
auto
np
=
mi_dims
[
1
];
ctx
->
SetOutputDim
(
"PredBBoxLabel"
,
{
n
,
np
,
4
});
ctx
->
SetOutputDim
(
"PredBBoxWeight"
,
{
n
,
np
,
1
});
ctx
->
SetOutputDim
(
"PredScoreLabel"
,
{
n
,
np
,
1
});
ctx
->
SetOutputDim
(
"PredScoreWeight"
,
{
n
,
np
,
1
});
auto
m
=
mi_dims
[
1
];
auto
k
=
in_dims
[
in_dims
.
size
()
-
1
];
ctx
->
SetOutputDim
(
"Out"
,
{
n
,
m
,
k
});
ctx
->
SetOutputDim
(
"OutWeight"
,
{
n
,
m
,
1
});
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"EncodedGTBBox"
)
->
type
()),
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
)
->
type
()),
ctx
.
device_context
());
}
};
...
...
@@ -93,102 +67,87 @@ class TargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
public:
TargetAssignOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"EncodedGTBBox"
,
"(LoDTensor), The encoded ground-truth bounding boxes with shape "
"[Ng, Np, 4], where Ng is the total number of ground-truth boxes "
"in this mini-batch, Np the number of predictions, 4 is the "
"number of coordinate in [xmin, ymin, xmax, ymax] layout."
);
AddInput
(
"GTScoreLabel"
,
"(LoDTensor, default LoDTensor<int>), The input ground-truth "
"labels with shape [Ng, 1], where the Ng is the same as it in "
"the input of EncodedGTBBox."
);
AddInput
(
"X"
,
"(LoDTensor), This input is a 3D LoDTensor with shape [M, P, K]. "
"Some elements in X will be assigned to Out based on the "
"MatchIndices and NegIndices."
);
AddInput
(
"MatchIndices"
,
"(Tensor, default Tensor<int>), The input matched indices "
"with shape [N, Np], where N is the batch size, Np is the same "
"as it in the input of EncodedGTBBox. If MatchIndices[i][j] "
"is -1, the j-th prior box is not matched to any ground-truh "
"box in i-th instance."
);
"with shape [N, P], If MatchIndices[i][j] is -1, the j-th entity "
"of column is not matched to any entity of row in i-th instance."
);
AddInput
(
"NegIndices"
,
"(LoDTensor, default LoDTensor<int>), The input negative example "
"indices with shape [Neg, 1], where is the total number of "
"negative example indices."
);
AddAttr
<
int
>
(
"background_label"
,
"(int, default 0), Label index of background class."
)
"indices are an optional input with shape [Neg, 1], where Neg is "
"the total number of negative example indices."
)
.
AsDispensable
();
AddAttr
<
int
>
(
"mismatch_value"
,
"(int, default 0), Fill this value to the "
"mismatched location."
)
.
SetDefault
(
0
);
AddOutput
(
"PredBBoxLabel"
,
"(Tensor), The output encoded ground-truth labels "
"with shape [N, Np, 4], N is the batch size and Np, 4 is the "
"same as they in input of EncodedGTBBox. If MatchIndices[i][j] "
"is -1, the PredBBoxLabel[i][j][:] is the encoded ground-truth "
"box for background_label in i-th instance."
);
AddOutput
(
"PredBBoxWeight"
,
"(Tensor), The weight for PredBBoxLabel with the shape "
"of [N, Np, 1]"
);
AddOutput
(
"PredScoreLabel"
,
"(Tensor, default Tensor<int>), The output score labels for "
"each predictions with shape [N, Np, 1]. If MatchIndices[i][j] "
"is -1, PredScoreLabel[i][j] = background_label."
);
AddOutput
(
"PredScoreWeight"
,
"(Tensor), The weight for PredScoreLabel with the shape "
"of [N, Np, 1]"
);
AddOutput
(
"Out"
,
"(Tensor), The output is a 3D Tensor with shape [N, P, K], "
"N and P is the same as they are in NegIndices, K is the "
"same as it in input of X. If MatchIndices[i][j] "
"is -1, the Out[i][j][0 : K] is the mismatch_value."
);
AddOutput
(
"OutWeight"
,
"(Tensor), The weight for output with the shape of [N, P, 1]"
);
AddComment
(
R"DOC(
This operator is, for given the encoded boxes between prior boxes and
ground-truth boxes and ground-truth class labels, to assign classification
and regression targets to each prior box as well as weights to each
prior box. The weights is used to specify which prior box would not contribute
to training loss.
For each instance, the output `PredBBoxLabel`, `PredBBoxWeight`,
`PredScoreLabel` and `PredScoreWeight` are assigned based on `MatchIndices`.
Assumed that the row offset for each instance in `EncodedGTBBox` is called lod,
this operato assigns classification/regression targets by performing the
This operator can be, for given the target bounding boxes or labels,
to assign classification and regression targets to each prediction as well as
weights to prediction. The weights is used to specify which prediction would
not contribute to training loss.
For each instance, the output `Out` and`OutWeight` are assigned based on
`MatchIndices` and `NegIndices`.
Assumed that the row offset for each instance in `X` is called lod,
this operator assigns classification/regression targets by performing the
following steps:
1. Assigning all outpts based on `MatchIndices`:
If id = MatchIndices[i][j] > 0,
PredBBoxLabel[i][j] = EncodedGTBBox[lod[i] + id][j]
PredBBoxWeight[i][j] = 1.
PredScoreLabel[i][j] = GTScoreLabel[lod[i] + id]
PredScoreWeight[i][j] = 1.
Out[i][j][0 : K] = X[lod[i] + id][j % P][0 : K]
OutWeight[i][j] = 1.
Otherwise,
PredBBoxLabel[j][j] = [0., 0., 0., 0.]
PredBBoxWeight[i][j] = 0.
PredScoreLabel[i][j] = background_label
PredScoreWeight[i][j] = 0.
Out[j][j][0 : K] = {mismatch_value, mismatch_value, ...}
OutWeight[i][j] = 0.
2. Assigning
PredScoreWeight based on `NegIndices`
:
2. Assigning
OutWeight based on `NegIndices` if `NegIndices` is provided
:
Assumed that the row offset for each instance in `NegIndices` is cal
e
ed neg_lod,
for i-th instance and
all ids
of NegIndices in this instance:
Assumed that the row offset for each instance in `NegIndices` is cal
l
ed neg_lod,
for i-th instance and
each `id`
of NegIndices in this instance:
PredScoreLabel[i][id] = background_label
PredScore
Weight[i][id] = 1.0
Out[i][id][0 : K] = {mismatch_value, mismatch_value, ...}
Out
Weight[i][id] = 1.0
)DOC"
);
}
};
template
<
typename
T
>
struct
NegTargetAssignFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
template
<
typename
T
,
typename
WT
>
struct
NegTargetAssignFunctor
<
platform
::
CPUDeviceContext
,
T
,
WT
>
{
void
operator
()(
const
platform
::
CPUDeviceContext
&
ctx
,
const
int
*
neg_indices
,
const
size_t
*
lod
,
const
int
num
,
const
int
num_prior_box
,
const
int
background_label
,
int
*
out_label
,
T
*
out_label
_wt
)
{
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
const
size_t
*
lod
,
const
int
N
,
const
int
M
,
const
int
K
,
const
int
mismatch_value
,
T
*
out
,
WT
*
out
_wt
)
{
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
for
(
size_t
j
=
lod
[
i
];
j
<
lod
[
i
+
1
];
++
j
)
{
int
id
=
neg_indices
[
j
];
out_label
[
i
*
num_prior_box
+
id
]
=
background_label
;
out_label_wt
[
i
*
num_prior_box
+
id
]
=
static_cast
<
T
>
(
1.0
);
int
off
=
(
i
*
M
+
id
)
*
K
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
out
[
off
+
k
]
=
mismatch_value
;
out_wt
[
off
+
k
]
=
static_cast
<
WT
>
(
1.0
);
}
}
}
}
};
template
struct
NegTargetAssignFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
struct
NegTargetAssignFunctor
<
platform
::
CPUDeviceContext
,
double
>;
template
struct
NegTargetAssignFunctor
<
platform
::
CPUDeviceContext
,
int
,
float
>;
template
struct
NegTargetAssignFunctor
<
platform
::
CPUDeviceContext
,
float
,
float
>;
}
// namespace operators
}
// namespace paddle
...
...
@@ -198,5 +157,5 @@ REGISTER_OP_WITHOUT_GRADIENT(target_assign, ops::TargetAssignOp,
ops
::
TargetAssignOpMaker
);
REGISTER_OP_CPU_KERNEL
(
target_assign
,
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
,
float
>
,
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
,
float
>
);
paddle/fluid/operators/target_assign_op.cu
浏览文件 @
11bcb43a
...
...
@@ -17,39 +17,41 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
template
<
typename
T
,
typename
WT
>
__global__
void
NegTargetAssignKernel
(
const
int
*
neg_indices
,
const
size_t
*
lod
,
const
int
num
,
const
int
num_prior_box
,
const
int
background_label
,
int
*
out_label
,
T
*
out_label
_wt
)
{
const
int
N
,
const
int
M
,
const
int
K
,
const
int
mismatch_value
,
T
*
out
,
WT
*
out
_wt
)
{
int
bidx
=
blockIdx
.
x
;
int
st
=
lod
[
bidx
];
int
ed
=
lod
[
bidx
+
1
];
int
row_start
=
bidx
*
num_prior_box
;
int
row_start
=
bidx
*
M
;
for
(
int
i
=
st
+
threadIdx
.
x
;
i
<
ed
;
i
+=
blockDim
.
x
)
{
int
id
=
row_start
+
neg_indices
[
i
];
out_label
[
id
]
=
background_label
;
out_label_wt
[
id
]
=
1.
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
out
[
id
*
K
+
k
]
=
T
(
mismatch_value
);
out_wt
[
id
*
K
+
k
]
=
WT
(
1.
);
}
}
}
template
<
typename
T
>
struct
NegTargetAssignFunctor
<
platform
::
CUDADeviceContext
,
T
>
{
template
<
typename
T
,
typename
WT
>
struct
NegTargetAssignFunctor
<
platform
::
CUDADeviceContext
,
T
,
WT
>
{
void
operator
()(
const
platform
::
CUDADeviceContext
&
ctx
,
const
int
*
neg_indices
,
const
size_t
*
lod
,
const
int
num
,
const
int
num_prior_box
,
const
int
background_label
,
int
*
out_label
,
T
*
out_label
_wt
)
{
const
int
*
neg_indices
,
const
size_t
*
lod
,
const
int
N
,
const
int
M
,
const
int
K
,
const
int
mismatch_value
,
T
*
out
,
WT
*
out
_wt
)
{
const
int
block_size
=
256
;
const
int
grid_size
=
num
;
NegTargetAssignKernel
<
T
><<<
grid_size
,
block_size
,
0
,
ctx
.
stream
()
>>>
(
neg_indices
,
lod
,
num
,
num_prior_box
,
background_label
,
out_label
,
out_label_wt
);
const
int
grid_size
=
N
;
NegTargetAssignKernel
<
T
,
WT
><<<
grid_size
,
block_size
,
0
,
ctx
.
stream
()
>>>
(
neg_indices
,
lod
,
N
,
M
,
K
,
mismatch_value
,
out
,
out_wt
);
}
};
template
struct
NegTargetAssignFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
struct
NegTargetAssignFunctor
<
platform
::
CUDADeviceContext
,
double
>;
template
struct
NegTargetAssignFunctor
<
platform
::
CUDADeviceContext
,
int
,
float
>;
template
struct
NegTargetAssignFunctor
<
platform
::
CUDADeviceContext
,
float
,
float
>;
}
// namespace operators
}
// namespace paddle
...
...
@@ -57,5 +59,5 @@ template struct NegTargetAssignFunctor<platform::CUDADeviceContext, double>;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
target_assign
,
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
,
float
>
,
ops
::
TargetAssignKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
float
>
);
paddle/fluid/operators/target_assign_op.h
浏览文件 @
11bcb43a
...
...
@@ -19,140 +19,113 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
template
<
typename
T
,
typename
WT
>
struct
TargetAssignFunctor
{
const
T
*
gt_box_
;
const
int
*
gt_label_
;
const
T
*
in_
;
const
int
*
match_indices_
;
const
size_t
*
lod_
;
const
int
background_label_
;
const
int64_t
num_
;
const
int64_t
num_prior_box_
;
T
*
out_box_
;
T
*
out_box_wt_
;
int
*
out_label_
;
T
*
out_label_wt_
;
TargetAssignFunctor
(
const
T
*
gt_box
,
const
int
*
gt_label
,
const
int
*
match_indices
,
const
size_t
*
lod
,
const
int
background_label
,
const
int64_t
num
,
const
int64_t
np
,
T
*
out_box
,
T
*
out_box_wt
,
int
*
out_label
,
T
*
out_label_wt
)
:
gt_box_
(
gt_box
),
gt_label_
(
gt_label
),
const
int
mismatch_value_
;
const
int64_t
N_
;
const
int64_t
M_
;
const
int64_t
P_
;
const
int64_t
K_
;
T
*
out_
;
WT
*
out_wt_
;
TargetAssignFunctor
(
const
T
*
input
,
const
int
*
match_indices
,
const
size_t
*
lod
,
const
int
mismatch_value
,
const
int64_t
N
,
const
int64_t
M
,
const
int64_t
P
,
const
int64_t
K
,
T
*
out
,
WT
*
out_wt
)
:
in_
(
input
),
match_indices_
(
match_indices
),
lod_
(
lod
),
background_label_
(
background_label
),
num_
(
num
),
num_prior_box_
(
np
),
out_box_
(
out_box
),
out_box_wt_
(
out_box_wt
),
out_
label_
(
out_label
),
out_
label_wt_
(
out_label
_wt
)
{}
mismatch_value_
(
mismatch_value
),
N_
(
N
),
M_
(
M
),
P_
(
P
),
K_
(
K
),
out_
(
out
),
out_
wt_
(
out
_wt
)
{}
HOSTDEVICE
void
operator
()(
size_t
i
)
const
{
int
row
=
i
/
num_prior_box
_
;
int
col
=
i
-
row
*
num_prior_box
_
;
int
h
=
i
/
M
_
;
int
w
=
i
-
h
*
M
_
;
size_t
row_off
=
lod_
[
row
];
int
offset
=
row
*
num_prior_box_
+
col
;
size_t
off
=
lod_
[
h
];
int
id
=
match_indices_
[
i
]
;
int
id
=
match_indices_
[
offset
];
T
*
obox
=
out_box_
+
offset
*
4
;
int
*
olabel
=
out_label_
+
offset
;
T
*
obox_wt
=
out_box_wt_
+
offset
;
T
*
olabel_wt
=
out_label_wt_
+
offset
;
T
*
out
=
out_
+
i
*
K_
;
WT
*
out_wt
=
out_wt_
+
i
;
if
(
id
>
-
1
)
{
const
T
*
gtbox
=
gt_box_
+
((
row_off
+
id
)
*
num_prior_box_
+
col
)
*
4
;
obox
[
0
]
=
gtbox
[
0
];
obox
[
1
]
=
gtbox
[
1
];
obox
[
2
]
=
gtbox
[
2
];
obox
[
3
]
=
gtbox
[
3
];
olabel
[
0
]
=
gt_label_
[
row_off
+
id
];
obox_wt
[
0
]
=
static_cast
<
T
>
(
1.
);
olabel_wt
[
0
]
=
static_cast
<
T
>
(
1.
);
int
w_off
=
w
%
P_
;
const
T
*
in
=
in_
+
((
off
+
id
)
*
P_
+
w_off
)
*
K_
;
for
(
int64_t
k
=
0
;
k
<
K_
;
++
k
)
{
out
[
k
]
=
in
[
k
];
}
out_wt
[
0
]
=
static_cast
<
WT
>
(
1.
);
}
else
{
obox
[
0
]
=
static_cast
<
T
>
(
0.
);
obox
[
1
]
=
static_cast
<
T
>
(
0.
);
obox
[
2
]
=
static_cast
<
T
>
(
0.
);
obox
[
3
]
=
static_cast
<
T
>
(
0.
);
olabel
[
0
]
=
background_label_
;
obox_wt
[
0
]
=
static_cast
<
T
>
(
0.
);
olabel_wt
[
0
]
=
static_cast
<
T
>
(
0.
);
for
(
int64_t
k
=
0
;
k
<
K_
;
++
k
)
{
out
[
k
]
=
static_cast
<
T
>
(
mismatch_value_
);
}
out_wt
[
0
]
=
static_cast
<
WT
>
(
0.
);
}
}
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
,
typename
WT
>
struct
NegTargetAssignFunctor
{
void
operator
()(
const
platform
::
DeviceContext
&
ctx
,
const
int
*
neg_indices
,
const
size_t
*
lod
,
const
int
num
,
const
int
num_prior_box
,
const
int
background_label
,
int
*
out_label
,
T
*
out_label_wt
)
const
;
const
size_t
*
lod
,
const
int
N
,
const
int
M
,
const
int
K
,
const
int
mismatch_value
,
T
*
out
,
WT
*
out_wt
)
const
;
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
,
typename
WT
>
class
TargetAssignKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
enc_gt_box
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"EncodedGTBBox"
);
auto
*
gt_label
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"GTScoreLabel"
);
auto
*
x
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
);
auto
*
match_indices
=
ctx
.
Input
<
framework
::
Tensor
>
(
"MatchIndices"
);
auto
*
neg_indices
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"NegIndices"
);
auto
*
out_box
=
ctx
.
Output
<
framework
::
Tensor
>
(
"PredBBoxLabel"
);
auto
*
out_box_wt
=
ctx
.
Output
<
framework
::
Tensor
>
(
"PredBBoxWeight"
);
auto
*
out_label
=
ctx
.
Output
<
framework
::
Tensor
>
(
"PredScoreLabel"
);
auto
*
out_label_wt
=
ctx
.
Output
<
framework
::
Tensor
>
(
"PredScoreWeight"
);
PADDLE_ENFORCE_EQ
(
enc_gt_box
->
lod
().
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
gt_label
->
lod
().
size
(),
1UL
);
PADDLE_ENFORCE_EQ
(
neg_indices
->
lod
().
size
(),
1UL
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
*
out_wt
=
ctx
.
Output
<
framework
::
Tensor
>
(
"OutWeight"
);
int
background_label
=
ctx
.
Attr
<
int
>
(
"background_label"
);
PADDLE_ENFORCE_EQ
(
x
->
lod
().
size
(),
1UL
);
int
mismatch_value
=
ctx
.
Attr
<
int
>
(
"mismatch_value"
);
const
T
*
box_data
=
enc_gt_box
->
data
<
T
>
();
const
int
*
label_data
=
gt_label
->
data
<
int
>
();
const
T
*
x_data
=
x
->
data
<
T
>
();
const
int
*
match_idx_data
=
match_indices
->
data
<
int
>
();
const
int
*
neg_idx_data
=
neg_indices
->
data
<
int
>
();
T
*
obox_data
=
out_box
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
obox_wt_data
=
out_box_wt
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int
*
olabel_data
=
out_label
->
mutable_data
<
int
>
(
ctx
.
GetPlace
());
T
*
olabel_wt_data
=
out_label_wt
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
out_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
WT
*
out_wt_data
=
out_wt
->
mutable_data
<
WT
>
(
ctx
.
GetPlace
());
int64_t
num
=
match_indices
->
dims
()[
0
];
int64_t
num_prior_box
=
match_indices
->
dims
()[
1
];
int64_t
n
=
match_indices
->
dims
()[
0
];
int64_t
m
=
match_indices
->
dims
()[
1
];
int64_t
p
=
x
->
dims
()[
1
];
int64_t
k
=
x
->
dims
()[
2
];
auto
gt_lod
=
enc_gt_box
->
lod
().
back
();
auto
gt_label_lod
=
gt_label
->
lod
().
back
();
auto
neg_lod
=
neg_indices
->
lod
().
back
();
for
(
size_t
i
=
0
;
i
<
gt_lod
.
size
();
++
i
)
{
PADDLE_ENFORCE_EQ
(
gt_lod
.
data
()[
i
],
gt_label_lod
.
data
()[
i
]);
}
size_t
*
gt_lod_data
=
gt_lod
.
MutableData
(
ctx
.
GetPlace
());
size_t
*
neg_lod_data
=
neg_lod
.
MutableData
(
ctx
.
GetPlace
());
auto
x_lod
=
x
->
lod
().
back
();
size_t
*
x_lod_data
=
x_lod
.
MutableData
(
ctx
.
GetPlace
());
TargetAssignFunctor
<
T
>
functor
(
box_data
,
label_data
,
match_idx_data
,
gt_lod_data
,
background_label
,
num
,
num_prior_box
,
obox_data
,
obox_wt_data
,
olabel_data
,
olabel_wt_data
);
TargetAssignFunctor
<
T
,
WT
>
functor
(
x_data
,
match_idx_data
,
x_lod_data
,
mismatch_value
,
n
,
m
,
p
,
k
,
out_data
,
out_wt_data
);
auto
&
device_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
platform
::
ForRange
<
DeviceContext
>
for_range
(
device_ctx
,
num
*
num_prior_box
);
platform
::
ForRange
<
DeviceContext
>
for_range
(
device_ctx
,
n
*
m
);
for_range
(
functor
);
NegTargetAssignFunctor
<
DeviceContext
,
T
>
neg_trg_functor
;
neg_trg_functor
(
device_ctx
,
neg_idx_data
,
neg_lod_data
,
num
,
num_prior_box
,
background_label
,
olabel_data
,
olabel_wt_data
);
auto
*
neg_indices
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"NegIndices"
);
if
(
neg_indices
)
{
PADDLE_ENFORCE_EQ
(
neg_indices
->
lod
().
size
(),
1UL
);
const
int
*
neg_idx_data
=
neg_indices
->
data
<
int
>
();
auto
neg_lod
=
neg_indices
->
lod
().
back
();
size_t
*
neg_lod_data
=
neg_lod
.
MutableData
(
ctx
.
GetPlace
());
NegTargetAssignFunctor
<
DeviceContext
,
T
,
WT
>
neg_trg_functor
;
neg_trg_functor
(
device_ctx
,
neg_idx_data
,
neg_lod_data
,
n
,
m
,
k
,
mismatch_value
,
out_data
,
out_wt_data
);
}
}
};
...
...
paddle/fluid/platform/cpu_info_test.cc
浏览文件 @
11bcb43a
...
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/string/printf.h"
#include "paddle/
fluid/
string/printf.h"
#include <ostream>
#include <sstream>
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
11bcb43a
...
...
@@ -23,8 +23,8 @@ limitations under the License. */
#include <string>
#include "paddle/fluid/platform/macros.h"
#include "paddle/string/printf.h"
#include "paddle/string/to_string.h"
#include "paddle/
fluid/
string/printf.h"
#include "paddle/
fluid/
string/to_string.h"
#ifdef __GNUC__
#include <cxxabi.h> // for __cxa_demangle
...
...
paddle/fluid/platform/enforce_test.cc
浏览文件 @
11bcb43a
...
...
@@ -15,7 +15,7 @@ limitations under the License. */
#include "gtest/gtest.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/string/piece.h"
#include "paddle/
fluid/
string/piece.h"
using
StringPiece
=
paddle
::
string
::
Piece
;
using
paddle
::
string
::
HasPrefix
;
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
11bcb43a
...
...
@@ -35,7 +35,7 @@ limitations under the License. */
#include "paddle/fluid/pybind/exception.h"
#include "paddle/fluid/pybind/pybind.h"
#include "paddle/fluid/pybind/tensor_py.h"
#include "paddle/string/to_string.h"
#include "paddle/
fluid/
string/to_string.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
...
...
paddle/string/.clang-format
→
paddle/
fluid/
string/.clang-format
浏览文件 @
11bcb43a
文件已移动
paddle/string/CMakeLists.txt
→
paddle/
fluid/
string/CMakeLists.txt
浏览文件 @
11bcb43a
文件已移动
paddle/string/piece.cc
→
paddle/
fluid/
string/piece.cc
浏览文件 @
11bcb43a
...
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "p
addle/string/p
iece.h"
#include "piece.h"
#include <string.h>
...
...
paddle/string/piece.h
→
paddle/
fluid/
string/piece.h
浏览文件 @
11bcb43a
...
...
@@ -28,7 +28,7 @@ namespace string {
// its syntax is simple as it doesn't own/manage the string, it is
// cheap to construct Pieces and pass them around.
class
Piece
{
public:
public:
static
const
size_t
npos
=
static_cast
<
size_t
>
(
-
1
);
// We provide non-explicit singleton constructors so users can
...
...
@@ -55,7 +55,7 @@ public:
// Return a string that contains the copy of the referenced data.
std
::
string
ToString
()
const
{
return
std
::
string
(
data_
,
size_
);
}
private:
private:
const
char
*
data_
;
size_t
size_
;
...
...
paddle/string/piece_test.cc
→
paddle/
fluid/
string/piece_test.cc
浏览文件 @
11bcb43a
...
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/string/piece.h"
#include "paddle/
fluid/
string/piece.h"
#include <sstream>
...
...
paddle/string/printf.h
→
paddle/
fluid/
string/printf.h
浏览文件 @
11bcb43a
...
...
@@ -71,7 +71,7 @@
#include <iostream>
#include <sstream>
#include "
paddle/string/
tinyformat/tinyformat.h" // https://github.com/c42f/tinyformat
#include "tinyformat/tinyformat.h" // https://github.com/c42f/tinyformat
namespace
paddle
{
namespace
string
{
...
...
paddle/string/printf_test.cc
→
paddle/
fluid/
string/printf_test.cc
浏览文件 @
11bcb43a
...
...
@@ -11,7 +11,7 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "p
addle/string/p
rintf.h"
#include "printf.h"
#include <string>
...
...
@@ -24,6 +24,6 @@ TEST(StringPrintf, StringPrintf) {
long
hour
=
14
;
int
min
=
44
;
EXPECT_EQ
(
std
::
string
(
"Wednesday, July 27, 14:44"
),
paddle
::
string
::
Sprintf
(
"%s, %s %d, %.2d:%.2d"
,
weekday
,
month
,
day
,
hour
,
min
));
paddle
::
string
::
Sprintf
(
"%s, %s %d, %.2d:%.2d"
,
weekday
,
month
,
day
,
hour
,
min
));
}
paddle/string/tinyformat/tinyformat.h
→
paddle/
fluid/
string/tinyformat/tinyformat.h
浏览文件 @
11bcb43a
...
...
@@ -147,7 +147,7 @@ namespace detail {
// Test whether type T1 is convertible to type T2
template
<
typename
T1
,
typename
T2
>
struct
is_convertible
{
private:
private:
// two types of different size
struct
fail
{
char
dummy
[
2
];
...
...
@@ -160,7 +160,7 @@ private:
static
succeed
tryConvert
(
const
T2
&
);
static
const
T1
&
makeT1
();
public:
public:
// Standard trick: the (...) version of tryConvert will be chosen from
// the overload set only if the version taking a T2 doesn't match.
// Then we compare the sizes of the return types to check which
...
...
@@ -170,8 +170,7 @@ public:
// Format the value by casting to type fmtT. This default implementation
// should never be called.
template
<
typename
T
,
typename
fmtT
,
template
<
typename
T
,
typename
fmtT
,
bool
convertible
=
is_convertible
<
T
,
fmtT
>
::
value
>
struct
formatValueAsType
{
static
void
invoke
(
std
::
ostream
&
/*out*/
,
const
T
&
/*value*/
)
{
assert
(
0
);
}
...
...
@@ -241,11 +240,8 @@ TINYFORMAT_DEFINE_FORMAT_TRUNCATED_CSTR(char)
/// operator<< to format the type T, with special cases for the %c and %p
/// conversions.
template
<
typename
T
>
inline
void
formatValue
(
std
::
ostream
&
out
,
const
char
*
/*fmtBegin*/
,
const
char
*
fmtEnd
,
int
ntrunc
,
const
T
&
value
)
{
inline
void
formatValue
(
std
::
ostream
&
out
,
const
char
*
/*fmtBegin*/
,
const
char
*
fmtEnd
,
int
ntrunc
,
const
T
&
value
)
{
// The mess here is to support the %c and %p conversions: if these
// conversions are active we try to convert the type to a char or const
// void* respectively and format that instead of the value itself. For the
...
...
@@ -267,25 +263,22 @@ inline void formatValue(std::ostream &out,
}
// Overloaded version for char types to support printing as an integer
#define TINYFORMAT_DEFINE_FORMATVALUE_CHAR(charType) \
inline void formatValue(std::ostream &out, \
const char *
/*fmtBegin*/
, \
const char *fmtEnd, \
int
/**/
, \
charType value) { \
switch (*(fmtEnd - 1)) { \
case 'u': \
case 'd': \
case 'i': \
case 'o': \
case 'X': \
case 'x': \
out << static_cast<int>(value); \
break; \
default: \
out << value; \
break; \
} \
#define TINYFORMAT_DEFINE_FORMATVALUE_CHAR(charType) \
inline void formatValue(std::ostream &out, const char *
/*fmtBegin*/
, \
const char *fmtEnd, int
/**/
, charType value) { \
switch (*(fmtEnd - 1)) { \
case 'u': \
case 'd': \
case 'i': \
case 'o': \
case 'X': \
case 'x': \
out << static_cast<int>(value); \
break; \
default: \
out << value; \
break; \
} \
}
// per 3.9.1: char, signed char and unsigned char are all distinct types
TINYFORMAT_DEFINE_FORMATVALUE_CHAR
(
char
)
...
...
@@ -482,7 +475,7 @@ namespace detail {
// each argument to be allocated as a homogenous array inside FormatList
// whereas a naive implementation based on inheritance does not.
class
FormatArg
{
public:
public:
FormatArg
()
{}
template
<
typename
T
>
...
...
@@ -491,22 +484,17 @@ public:
m_formatImpl
(
&
formatImpl
<
T
>
),
m_toIntImpl
(
&
toIntImpl
<
T
>
)
{}
void
format
(
std
::
ostream
&
out
,
const
char
*
fmtBegin
,
const
char
*
fmtEnd
,
void
format
(
std
::
ostream
&
out
,
const
char
*
fmtBegin
,
const
char
*
fmtEnd
,
int
ntrunc
)
const
{
m_formatImpl
(
out
,
fmtBegin
,
fmtEnd
,
ntrunc
,
m_value
);
}
int
toInt
()
const
{
return
m_toIntImpl
(
m_value
);
}
private:
private:
template
<
typename
T
>
static
void
formatImpl
(
std
::
ostream
&
out
,
const
char
*
fmtBegin
,
const
char
*
fmtEnd
,
int
ntrunc
,
const
void
*
value
)
{
static
void
formatImpl
(
std
::
ostream
&
out
,
const
char
*
fmtBegin
,
const
char
*
fmtEnd
,
int
ntrunc
,
const
void
*
value
)
{
formatValue
(
out
,
fmtBegin
,
fmtEnd
,
ntrunc
,
*
static_cast
<
const
T
*>
(
value
));
}
...
...
@@ -516,11 +504,8 @@ private:
}
const
void
*
m_value
;
void
(
*
m_formatImpl
)(
std
::
ostream
&
out
,
const
char
*
fmtBegin
,
const
char
*
fmtEnd
,
int
ntrunc
,
const
void
*
value
);
void
(
*
m_formatImpl
)(
std
::
ostream
&
out
,
const
char
*
fmtBegin
,
const
char
*
fmtEnd
,
int
ntrunc
,
const
void
*
value
);
int
(
*
m_toIntImpl
)(
const
void
*
value
);
};
...
...
@@ -569,12 +554,10 @@ inline const char *printFormatStringLiteral(std::ostream &out,
// necessary to pull out variable width and precision . The function returns a
// pointer to the character after the end of the current format spec.
inline
const
char
*
streamStateFromFormat
(
std
::
ostream
&
out
,
bool
&
spacePadPositive
,
int
&
ntrunc
,
bool
&
spacePadPositive
,
int
&
ntrunc
,
const
char
*
fmtStart
,
const
detail
::
FormatArg
*
formatters
,
int
&
argIndex
,
int
numFormatters
)
{
int
&
argIndex
,
int
numFormatters
)
{
if
(
*
fmtStart
!=
'%'
)
{
TINYFORMAT_ERROR
(
"tinyformat: Not enough conversion specifiers in format string"
);
...
...
@@ -750,10 +733,8 @@ inline const char *streamStateFromFormat(std::ostream &out,
}
//------------------------------------------------------------------------------
inline
void
formatImpl
(
std
::
ostream
&
out
,
const
char
*
fmt
,
const
detail
::
FormatArg
*
formatters
,
int
numFormatters
)
{
inline
void
formatImpl
(
std
::
ostream
&
out
,
const
char
*
fmt
,
const
detail
::
FormatArg
*
formatters
,
int
numFormatters
)
{
// Saved stream state
std
::
streamsize
origWidth
=
out
.
width
();
std
::
streamsize
origPrecision
=
out
.
precision
();
...
...
@@ -765,13 +746,9 @@ inline void formatImpl(std::ostream &out,
fmt
=
printFormatStringLiteral
(
out
,
fmt
);
bool
spacePadPositive
=
false
;
int
ntrunc
=
-
1
;
const
char
*
fmtEnd
=
streamStateFromFormat
(
out
,
spacePadPositive
,
ntrunc
,
fmt
,
formatters
,
argIndex
,
numFormatters
);
const
char
*
fmtEnd
=
streamStateFromFormat
(
out
,
spacePadPositive
,
ntrunc
,
fmt
,
formatters
,
argIndex
,
numFormatters
);
if
(
argIndex
>=
numFormatters
)
{
// Check args remain after reading any variable width/precision
TINYFORMAT_ERROR
(
"tinyformat: Not enough format arguments"
);
...
...
@@ -820,15 +797,14 @@ inline void formatImpl(std::ostream &out,
/// information has been stripped from the arguments, leaving just enough of a
/// common interface to perform formatting as required.
class
FormatList
{
public:
public:
FormatList
(
detail
::
FormatArg
*
formatters
,
int
N
)
:
m_formatters
(
formatters
),
m_N
(
N
)
{}
friend
void
vformat
(
std
::
ostream
&
out
,
const
char
*
fmt
,
friend
void
vformat
(
std
::
ostream
&
out
,
const
char
*
fmt
,
const
FormatList
&
list
);
private:
private:
const
detail
::
FormatArg
*
m_formatters
;
int
m_N
;
};
...
...
@@ -841,7 +817,7 @@ namespace detail {
// Format list subclass with fixed storage to avoid dynamic allocation
template
<
int
N
>
class
FormatListN
:
public
FormatList
{
public:
public:
template
<
typename
...
Args
>
FormatListN
(
const
Args
&
...
args
)
:
FormatList
(
&
m_formatterStore
[
0
],
N
),
...
...
@@ -849,14 +825,14 @@ public:
static_assert
(
sizeof
...(
args
)
==
N
,
"Number of args must be N"
);
}
private:
private:
FormatArg
m_formatterStore
[
N
];
};
// Special 0-arg version - MSVC says zero-sized C array in struct is nonstandard
template
<
>
class
FormatListN
<
0
>
:
public
FormatList
{
public:
public:
FormatListN
()
:
FormatList
(
0
,
0
)
{}
};
...
...
paddle/string/to_string.h
→
paddle/
fluid/
string/to_string.h
浏览文件 @
11bcb43a
文件已移动
paddle/string/to_string_test.cc
→
paddle/
fluid/
string/to_string_test.cc
浏览文件 @
11bcb43a
...
...
@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "
paddle/string/
to_string.h"
#include "to_string.h"
#include <gtest/gtest.h>
constexpr
char
kOutputString
[]
=
"User Defined Output"
;
class
UserDefinedClass
{
public:
public:
};
std
::
ostream
&
operator
<<
(
std
::
ostream
&
s
,
const
UserDefinedClass
&
ins
)
{
...
...
paddle/scripts/docker/build.sh
浏览文件 @
11bcb43a
...
...
@@ -115,8 +115,8 @@ EOF
-DWITH_AVX
=
${
WITH_AVX
:-
ON
}
\
-DWITH_SWIG_PY
=
ON
\
-DWITH_STYLE_CHECK
=
OFF
make
-j
`
nproc
`
gen_proto_py
make
-j
`
nproc
`
paddle_python
make
-j
`
nproc
`
gen_proto_py
framework_py_proto
make
-j
`
nproc
`
copy_paddle_pybind
make
-j
`
nproc
`
paddle_docs paddle_docs_cn paddle_api_docs
popd
fi
...
...
paddle/scripts/travis/build_doc.sh
浏览文件 @
11bcb43a
...
...
@@ -6,9 +6,9 @@ mkdir -p $TRAVIS_BUILD_DIR/build
cd
$TRAVIS_BUILD_DIR
/build
# Compile Documentation only.
cmake ..
-DCMAKE_BUILD_TYPE
=
Debug
-DWITH_GPU
=
OFF
-DWITH_MKL
=
OFF
-DWITH_DOC
=
ON
make
-j
`
nproc
`
gen_proto_py
make
-j
`
nproc
`
paddle_python
cmake ..
-DCMAKE_BUILD_TYPE
=
Release
-DWITH_GPU
=
OFF
-DWITH_MKL
=
OFF
-DWITH_DOC
=
ON
-DWITH_STYLE_CHECK
=
OFF
make
-j
`
nproc
`
gen_proto_py
framework_py_proto
make
-j
`
nproc
`
copy_paddle_pybind
make
-j
`
nproc
`
paddle_docs paddle_docs_cn paddle_api_docs
# check websites for broken links
...
...
python/paddle/v2/fluid/distribute_transpiler.py
浏览文件 @
11bcb43a
...
...
@@ -33,6 +33,57 @@ class VarBlock:
return
"%s:%d:%d"
%
(
self
.
varname
,
self
.
offset
,
self
.
size
)
class
UnionFind
(
object
):
""" Union-find data struct.
Union-find is a data struct that keeps track of a set of elements partitioned
into a number of disjoint (non-overlapping) subsets.
Reference:
https://en.wikipedia.org/wiki/Disjoint-set_data_structure
Args:
elements(list): The initialize element list.
"""
def
__init__
(
self
,
elementes
=
None
):
self
.
_parents
=
[]
# index -> parent index
self
.
_index
=
{}
# element -> index
self
.
_curr_idx
=
0
if
not
elementes
:
elementes
=
[]
for
ele
in
elementes
:
self
.
_parents
.
append
(
self
.
_curr_idx
)
self
.
_index
.
update
({
ele
:
self
.
_curr_idx
})
self
.
_curr_idx
+=
1
def
find
(
self
,
x
):
# Find the root index of given element x,
# execute the path compress while findind the root index
if
not
x
in
self
.
_index
:
return
-
1
idx
=
self
.
_index
[
x
]
while
idx
!=
self
.
_parents
[
idx
]:
t
=
self
.
_parents
[
idx
]
self
.
_parents
[
idx
]
=
self
.
_parents
[
t
]
idx
=
t
return
idx
def
union
(
self
,
x
,
y
):
# Union two given element
x_root
=
self
.
find
(
x
)
y_root
=
self
.
find
(
y
)
if
x_root
==
y_root
:
return
self
.
_parents
[
x_root
]
=
y_root
def
is_connected
(
self
,
x
,
y
):
# If two given elements have the same root index,
# then they are connected.
return
self
.
find
(
x
)
==
self
.
find
(
y
)
def
same_or_split_var
(
p_name
,
var_name
):
return
p_name
==
var_name
or
p_name
.
startswith
(
var_name
+
".block"
)
...
...
@@ -203,6 +254,21 @@ class DistributeTranspiler:
(
varname
,
self
.
trainer_id
)
startup_prog
.
global_block
().
rename_var
(
varname
,
new_var_name
)
# self.lr_param_mapping = self._create_lr_param_mapping()
# def _create_lr_param_mapping(self):
# lr_mapping = dict()
# for _, opt_op in enumerate(self.optimize_ops):
# if not opt_op.inputs or not opt_op.inputs.has_key("LearningRate") \
# or not opt_op.inputs.has_key("Param"):
# continue
# lr = opt_op.inputs["LearningRate"].name
# param = opt_op.inputs["Param"].name
# if not lr_mapping.has_key(lr):
# lr_mapping.update({lr: list()})
# lr_mapping[lr].append(param)
# return lr_mapping
def
_create_vars_from_blocklist
(
self
,
program
,
block_list
):
# Create respective variables using the block_list
block_map
=
dict
()
...
...
@@ -333,50 +399,15 @@ class DistributeTranspiler:
pass
return
orig_shape
def
_op_input_var
(
self
,
op
,
varname
):
pass
def
_is_op_on_pserver
(
self
,
endpoint
,
all_ops
,
idx
):
"""
Recursively check if the op need to run on current server.
Assume that ops are in the execution order.
"""
param_names
=
[
p
.
name
for
p
in
self
.
param_grad_ep_mapping
[
endpoint
][
"params"
]
]
op
=
all_ops
[
idx
]
input_names
=
set
(
op
.
input_names
)
# TODO(typhoonzero): using Param and Grad input name to identify
# that the operator is an optimization operator, need a better way.
if
"Param"
in
input_names
:
if
op
.
input
(
"Param"
)[
0
]
in
param_names
:
return
True
else
:
for
n
in
param_names
:
if
same_or_split_var
(
n
,
op
.
input
(
"Param"
)[
0
])
\
and
n
!=
op
.
input
(
"Param"
)[
0
]:
return
True
return
False
else
:
j
=
idx
-
1
while
j
>=
0
:
prev_op
=
all_ops
[
j
]
# NOTE(typhoonzero): consider list input/output
prev_output_names
=
prev_op
.
desc
.
output_arg_names
()
prev_input_names
=
prev_op
.
desc
.
input_arg_names
()
found1
=
False
found2
=
False
for
varname
in
op
.
desc
.
input_arg_names
():
if
varname
in
prev_output_names
:
found1
=
self
.
_is_op_on_pserver
(
endpoint
,
all_ops
,
j
)
# later ops may produce output for prev op's next batch use.
for
varname
in
op
.
desc
.
output_arg_names
():
if
varname
in
prev_input_names
:
found2
=
self
.
_is_op_on_pserver
(
endpoint
,
all_ops
,
j
)
if
found1
or
found2
:
return
True
j
-=
1
return
False
def
_fetch_var_names
(
self
,
param_dict
):
res
=
[]
if
not
param_dict
:
return
res
for
_
,
values
in
param_dict
.
iteritems
():
if
not
isinstance
(
values
,
list
):
values
=
[
values
]
res
+=
[
v
.
name
for
v
in
values
]
return
res
def
_append_pserver_ops
(
self
,
optimize_block
,
opt_op
,
endpoint
):
program
=
optimize_block
.
program
...
...
@@ -394,11 +425,7 @@ class DistributeTranspiler:
# do not append this op if current endpoint
# is not dealing with this grad block
return
merged_var
=
program
.
global_block
().
create_var
(
name
=
grad_block
.
name
,
persistable
=
grad_block
.
persistable
,
dtype
=
grad_block
.
dtype
,
shape
=
grad_block
.
shape
)
merged_var
=
program
.
global_block
().
vars
[
grad_block
.
name
]
# append merging ops if trainers > 1
if
self
.
trainers
>
1
:
vars2merge
=
self
.
_create_var_for_trainers
(
...
...
@@ -429,13 +456,19 @@ class DistributeTranspiler:
shape
=
param_block
.
shape
)
new_inputs
[
key
]
=
tmpvar
elif
key
==
"LearningRate"
:
# leraning rate variable has already be created by non-optimize op,
# don't create it once again.
new_inputs
[
key
]
=
program
.
global_block
().
vars
[
opt_op
.
input
(
key
)[
0
]]
for
key
in
opt_op
.
input_names
:
if
key
in
[
"Param"
,
"Grad"
]:
new_shape
=
None
if
key
in
[
"Param"
,
"Grad"
,
"LearningRate"
]:
continue
var
=
program
.
global_block
().
vars
[
opt_op
.
input
(
key
)[
0
]]
# update accumulator variable shape
param_shape
=
new_inputs
[
"Param"
].
shape
var
=
program
.
global_block
().
vars
[
opt_op
.
input
(
key
)[
0
]]
new_shape
=
self
.
_get_optimizer_input_shape
(
opt_op
.
type
,
key
,
var
.
shape
,
param_shape
)
tmpvar
=
program
.
global_block
().
create_var
(
...
...
@@ -446,12 +479,11 @@ class DistributeTranspiler:
new_inputs
[
key
]
=
tmpvar
# change output's ParamOut variable
outputs
=
self
.
_get_output_map_from_op
(
program
.
global_block
(),
opt_op
)
outputs
[
"ParamOut"
]
=
new_inputs
[
"Param"
]
opt_op
.
outputs
[
"ParamOut"
]
=
new_inputs
[
"Param"
]
optimize_block
.
append_op
(
type
=
opt_op
.
type
,
inputs
=
new_inputs
,
outputs
=
outputs
,
outputs
=
o
pt_op
.
o
utputs
,
attrs
=
opt_op
.
attrs
)
def
_append_pserver_non_opt_ops
(
self
,
optimize_block
,
opt_op
):
...
...
@@ -459,11 +491,10 @@ class DistributeTranspiler:
# Append the ops for parameters that do not need to be optimized/updated
inputs
=
self
.
_get_input_map_from_op
(
self
.
program
.
global_block
().
vars
,
opt_op
)
for
var
in
inputs
.
itervalues
():
if
type
(
var
)
==
list
:
varlist
=
var
else
:
varlist
=
[
var
]
for
varlist
in
inputs
.
itervalues
():
if
not
isinstance
(
varlist
,
list
):
varlist
=
[
varlist
]
for
var
in
varlist
:
if
not
program
.
global_block
().
vars
.
has_key
(
var
.
name
):
program
.
global_block
().
create_var
(
...
...
@@ -475,12 +506,70 @@ class DistributeTranspiler:
outputs
=
self
.
_get_output_map_from_op
(
self
.
program
.
global_block
().
vars
,
opt_op
)
for
varlist
in
outputs
.
itervalues
():
if
not
isinstance
(
varlist
,
list
):
varlist
=
[
varlist
]
for
var
in
varlist
:
program
.
global_block
().
create_var
(
name
=
var
.
name
,
persistable
=
var
.
persistable
,
dtype
=
var
.
dtype
,
shape
=
var
.
shape
)
optimize_block
.
append_op
(
type
=
opt_op
.
type
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
opt_op
.
attrs
)
def
_is_op_connected
(
self
,
op1
,
op2
):
# If one op's input is another op's output or
# one op's output is another op's input, we say
# the two operator is connected.
op1_input_names
=
self
.
_fetch_var_names
(
op1
.
inputs
)
op1_output_names
=
self
.
_fetch_var_names
(
op1
.
outputs
)
op2_input_names
=
self
.
_fetch_var_names
(
op2
.
inputs
)
op2_output_names
=
self
.
_fetch_var_names
(
op2
.
outputs
)
if
set
(
op1_output_names
)
&
set
(
op2_input_names
)
or
\
set
(
op1_input_names
)
&
set
(
op2_output_names
):
return
True
return
False
def
_create_ufind
(
self
,
optimize_ops
):
# Create a unit find data struct by optimize ops
ufind
=
UnionFind
(
optimize_ops
)
for
i
in
xrange
(
len
(
optimize_ops
)):
for
j
in
xrange
(
i
,
len
(
optimize_ops
)):
op1
=
optimize_ops
[
i
]
op2
=
optimize_ops
[
j
]
if
self
.
_is_op_connected
(
op1
,
op2
):
ufind
.
union
(
op1
,
op2
)
return
ufind
def
_is_opt_op
(
self
,
op
):
# NOTE: It's a HACK implement.
# optimize op: SGDOptimize, MomentumOptimizer, AdamOptimizer and etc...
if
"Param"
in
op
.
input_names
and
\
"LearningRate"
in
op
.
input_names
:
return
True
return
False
def
_is_opt_op_on_pserver
(
self
,
endpoint
,
op
):
param_names
=
[
p
.
name
for
p
in
self
.
param_grad_ep_mapping
[
endpoint
][
"params"
]
]
if
op
.
input
(
"Param"
)
in
param_names
:
return
True
else
:
for
n
in
param_names
:
param
=
op
.
input
(
"Param"
)
if
same_or_split_var
(
n
,
param
)
and
n
!=
param
:
return
True
return
False
return
False
def
get_pserver_program
(
self
,
endpoint
):
"""
Get pserver side program using the endpoint
...
...
@@ -514,17 +603,30 @@ class DistributeTranspiler:
recv_inputs
.
append
(
var
)
# step6
optimize_block
=
pserver_program
.
create_block
(
0
)
# Iterate through the ops and append ops as needed
for
idx
,
opt_op
in
enumerate
(
self
.
optimize_ops
):
is_op_on_pserver
=
self
.
_is_op_on_pserver
(
endpoint
,
self
.
optimize_ops
,
idx
)
if
not
is_op_on_pserver
:
continue
if
"Grad"
in
opt_op
.
desc
.
input_arg_names
():
self
.
_append_pserver_ops
(
optimize_block
,
opt_op
,
endpoint
)
else
:
self
.
_append_pserver_non_opt_ops
(
optimize_block
,
opt_op
)
# step 6.1
# Create a union-find data struct by optimize ops,
# If two ops are connected, we could add these two ops
# into one set.
ufind
=
self
.
_create_ufind
(
self
.
optimize_ops
)
# step 6.2
# Iterate through the ops and append optimize op which
# located on current pserver
opt_op_on_pserver
=
[]
for
_
,
op
in
enumerate
(
self
.
optimize_ops
):
if
self
.
_is_opt_op
(
op
)
and
self
.
_is_opt_op_on_pserver
(
endpoint
,
op
):
opt_op_on_pserver
.
append
(
op
)
# step 6.3
# Iterate through the ops, and if an op and the optimize ops
# which located on current pserver are in one set, then
# append it into the sub program.
for
_
,
op
in
enumerate
(
self
.
optimize_ops
):
for
_
,
opt_op
in
enumerate
(
opt_op_on_pserver
):
if
ufind
.
is_connected
(
op
,
opt_op
):
if
self
.
_is_opt_op
(
op
):
self
.
_append_pserver_ops
(
optimize_block
,
op
,
endpoint
)
else
:
self
.
_append_pserver_non_opt_ops
(
optimize_block
,
op
)
break
# Append the listen_and_serv op
pserver_program
.
global_block
().
append_op
(
type
=
"listen_and_serv"
,
...
...
python/paddle/v2/fluid/layers/math_op_patch.py
浏览文件 @
11bcb43a
...
...
@@ -117,6 +117,7 @@ def monkey_patch_variable():
tmp_name
=
unique_tmp_name
()
out
=
self
.
block
.
create_var
(
name
=
tmp_name
,
dtype
=
lhs_dtype
)
self
.
block
.
append_op
(
type
=
op_type
,
inputs
=
{
'X'
:
[
self
],
...
...
python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py
浏览文件 @
11bcb43a
...
...
@@ -99,7 +99,7 @@ elif training_role == "TRAINER":
exe
.
run
(
fluid
.
default_startup_program
())
for
pass_id
in
range
(
PASS_NUM
):
for
data
in
train_reader
():
avg_cost_np
=
exe
.
run
(
fluid
.
default_main
_program
(),
avg_cost_np
=
exe
.
run
(
t
.
get_trainer
_program
(),
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
])
print
(
"avg_cost_np"
,
avg_cost_np
)
...
...
python/paddle/v2/fluid/tests/test_cpp_reader.py
浏览文件 @
11bcb43a
...
...
@@ -64,9 +64,7 @@ exe = fluid.Executor(place)
[
res1
,
res2
]
=
exe
.
run
(
prog
,
fetch_list
=
[
out1
,
out2
])
test_pass
=
res1
.
shape
==
(
10
,
2
)
and
res2
.
shape
==
(
10
,
1
)
if
not
test_pass
:
if
not
(
res1
.
shape
==
(
10
,
2
)
and
res2
.
shape
==
(
10
,
1
)):
exit
(
1
)
exit
(
0
)
python/paddle/v2/fluid/tests/test_sequence_expand.py
浏览文件 @
11bcb43a
...
...
@@ -73,5 +73,20 @@ class TestSequenceExpandCase3(TestSequenceExpand):
self
.
inputs
=
{
'X'
:
(
x_data
,
x_lod
),
'Y'
:
(
y_data
,
y_lod
)}
class
TestSequenceExpandCase4
(
TestSequenceExpand
):
def
set_data
(
self
):
x_data
=
np
.
array
(
[
0.1
,
0.3
,
0.2
,
0.15
,
0.25
,
0.2
,
0.15
,
0.25
,
0.1
,
0.3
]).
reshape
(
[
2
,
5
]).
astype
(
'float32'
)
x_lod
=
[[
0
,
1
,
2
,
]]
y_data
=
np
.
random
.
uniform
(
0.1
,
1
,
[
2
,
1
]).
astype
(
'float32'
)
y_lod
=
[[
0
,
1
,
2
],
[
0
,
1
,
2
]]
self
.
inputs
=
{
'X'
:
(
x_data
,
x_lod
),
'Y'
:
(
y_data
,
y_lod
)}
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/fluid/tests/test_split_op.py
浏览文件 @
11bcb43a
...
...
@@ -20,11 +20,11 @@ from op_test import OpTest
class
TestSplitOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"split"
axis
=
0
x
=
np
.
random
.
random
((
4
,
2
,
5
)).
astype
(
'float32'
)
out
=
np
.
split
(
x
,
[
1
,
3
],
axis
)
axis
=
1
x
=
np
.
random
.
random
((
4
,
5
,
6
)).
astype
(
'float32'
)
out
=
np
.
split
(
x
,
[
2
,
3
],
axis
)
self
.
inputs
=
{
'X'
:
x
}
self
.
attrs
=
{
'axis'
:
axis
,
'sections'
:
[
1
,
2
,
1
]}
self
.
attrs
=
{
'axis'
:
axis
,
'sections'
:
[
2
,
1
,
2
]}
self
.
outputs
=
{
'Out'
:
[(
'out%d'
%
i
,
out
[
i
])
\
for
i
in
xrange
(
len
(
out
))]}
...
...
python/paddle/v2/fluid/tests/test_target_assign_op.py
浏览文件 @
11bcb43a
...
...
@@ -43,7 +43,7 @@ def gen_match_and_neg_indices(num_prior, gt_lod, neg_lod):
def
target_assign
(
encoded_box
,
gt_label
,
match_indices
,
neg_indices
,
gt_lod
,
neg_lod
,
background_label
):
neg_lod
,
mismatch_value
):
batch_size
,
num_prior
=
match_indices
.
shape
# init target bbox
...
...
@@ -52,7 +52,7 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod,
trg_box_wt
=
np
.
zeros
((
batch_size
,
num_prior
,
1
)).
astype
(
'float32'
)
# init target label
trg_label
=
np
.
ones
((
batch_size
,
num_prior
,
1
)).
astype
(
'int32'
)
trg_label
=
trg_label
*
background_label
trg_label
=
trg_label
*
mismatch_value
# init weight for target label
trg_label_wt
=
np
.
zeros
((
batch_size
,
num_prior
,
1
)).
astype
(
'float32'
)
...
...
@@ -65,53 +65,90 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod,
# target bbox
for
v
,
c
in
zip
(
col_val
+
gt_start
,
col_ids
[
0
].
tolist
()):
trg_box
[
i
][
c
][:]
=
encoded_box
[
v
][
c
][:]
# weight for target bbox
trg_box_wt
[
i
][
col_ids
]
=
1.0
trg_label
[
i
][
col_ids
]
=
gt_label
[
col_val
+
gt_start
]
trg_label_wt
[
i
][
col_ids
]
=
1.0
# set target label weight to 1.0 for the negative samples
neg_ids
=
neg_indices
[
neg_lod
[
i
]:
neg_lod
[
i
+
1
]]
trg_label_wt
[
i
][
neg_ids
]
=
1.0
if
neg_indices
is
not
None
:
neg_ids
=
neg_indices
[
neg_lod
[
i
]:
neg_lod
[
i
+
1
]]
trg_label_wt
[
i
][
neg_ids
]
=
1.0
return
trg_box
,
trg_box_wt
,
trg_label
,
trg_label_wt
class
TestTargetAssgin
Op
(
OpTest
):
class
TestTargetAssgin
FloatType
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"target_assign"
num_prior
=
120
num_class
=
21
gt_lod
=
[
0
,
5
,
11
,
23
]
neg_lod
=
[
0
,
4
,
7
,
13
]
mismatch_value
=
0
batch_size
=
len
(
gt_lod
)
-
1
num_gt
=
gt_lod
[
-
1
]
encoded_box
=
np
.
random
.
random
((
num_gt
,
num_prior
,
4
)).
astype
(
'float32'
)
gt_label
=
np
.
random
.
randint
(
num_class
,
size
=
(
num_gt
,
1
)).
astype
(
'int32'
)
match_indices
,
neg_indices
=
gen_match_and_neg_indices
(
num_prior
,
gt_lod
,
neg_lod
)
out
,
out_wt
,
_
,
_
=
target_assign
(
encoded_box
,
gt_label
,
match_indices
,
neg_indices
,
gt_lod
,
neg_lod
,
mismatch_value
)
# assign regression targets
x
=
encoded_box
self
.
inputs
=
{
'X'
:
(
x
,
[
gt_lod
]),
'MatchIndices'
:
match_indices
,
}
self
.
attrs
=
{
'mismatch_value'
:
mismatch_value
}
self
.
outputs
=
{
'Out'
:
out
,
'OutWeight'
:
out_wt
,
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestTargetAssginIntType
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"target_assign"
num_prior
=
120
num_class
=
21
gt_lod
=
[
0
,
5
,
11
,
23
]
neg_lod
=
[
0
,
4
,
7
,
13
]
mismatch_value
=
0
batch_size
=
len
(
gt_lod
)
-
1
num_gt
=
gt_lod
[
-
1
]
background_label
=
0
encoded_box
=
np
.
random
.
random
((
num_gt
,
num_prior
,
4
)).
astype
(
'float32'
)
gt_label
=
np
.
random
.
randint
(
num_class
,
size
=
(
num_gt
,
1
)).
astype
(
'int32'
)
match_indices
,
neg_indices
=
gen_match_and_neg_indices
(
num_prior
,
gt_lod
,
neg_lod
)
trg_box
,
trg_box_wt
,
trg_label
,
trg_label_wt
=
target_assign
(
encoded_box
,
gt_label
,
match_indices
,
neg_indices
,
gt_lod
,
neg_lod
,
background_label
)
_
,
_
,
out
,
out_wt
,
=
target_assign
(
encoded_box
,
gt_label
,
match_indices
,
neg_indices
,
gt_lod
,
neg_lod
,
mismatch_value
)
# assign cassification argets
x
=
np
.
reshape
(
gt_label
,
(
num_gt
,
1
,
1
))
self
.
inputs
=
{
'EncodedGTBBox'
:
(
encoded_box
,
[
gt_lod
]),
'GTScoreLabel'
:
(
gt_label
,
[
gt_lod
]),
'MatchIndices'
:
(
match_indices
),
'X'
:
(
x
,
[
gt_lod
]),
'MatchIndices'
:
match_indices
,
'NegIndices'
:
(
neg_indices
,
[
neg_lod
]),
}
self
.
attrs
=
{
'
background_label'
:
background_label
}
self
.
attrs
=
{
'
mismatch_value'
:
mismatch_value
}
self
.
outputs
=
{
'PredBBoxLabel'
:
(
trg_box
),
'PredBBoxWeight'
:
(
trg_box_wt
),
'PredScoreLabel'
:
(
trg_label
),
'PredScoreWeight'
:
(
trg_label_wt
),
'Out'
:
out
,
'OutWeight'
:
out_wt
,
}
def
test_check_output
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录