Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
a3addcdc
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a3addcdc
编写于
12月 12, 2017
作者:
S
sweetsky0901
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
modify for some update in trunk
上级
4d8f39b8
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
21 addition
and
17 deletion
+21
-17
paddle/operators/CMakeLists.txt
paddle/operators/CMakeLists.txt
+3
-1
paddle/operators/detection_output_op.cc
paddle/operators/detection_output_op.cc
+2
-2
paddle/operators/detection_output_op.cu.cc
paddle/operators/detection_output_op.cu.cc
+3
-3
paddle/operators/detection_output_op.h
paddle/operators/detection_output_op.h
+13
-11
未找到文件。
paddle/operators/CMakeLists.txt
浏览文件 @
a3addcdc
...
...
@@ -210,7 +210,8 @@ set(DEPS_OPS
save_op
load_op
send_op
recv_op
)
recv_op
detection_output_op
)
if
(
WITH_DISTRIBUTE
)
add_subdirectory
(
detail
)
...
...
@@ -233,6 +234,7 @@ op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op)
op_library
(
cross_entropy_op DEPS cross_entropy
)
op_library
(
softmax_with_cross_entropy_op DEPS cross_entropy softmax
)
op_library
(
softmax_op DEPS softmax
)
op_library
(
detection_output_op DEPS softmax
)
op_library
(
sequence_softmax_op DEPS softmax
)
op_library
(
sum_op DEPS selected_rows_functor
)
op_library
(
sgd_op DEPS selected_rows_functor
)
...
...
paddle/operators/detection_output_op.cc
浏览文件 @
a3addcdc
...
...
@@ -86,5 +86,5 @@ REGISTER_OP_WITHOUT_GRADIENT(detection_output, ops::Detection_output_Op,
ops
::
Detection_output_OpMaker
);
REGISTER_OP_CPU_KERNEL
(
detection_output
,
ops
::
Detection_output_Kernel
<
paddle
::
platform
::
CPU
Place
,
float
>
,
ops
::
Detection_output_Kernel
<
paddle
::
platform
::
CPU
Place
,
double
>
);
ops
::
Detection_output_Kernel
<
paddle
::
platform
::
CPU
DeviceContext
,
float
>
,
ops
::
Detection_output_Kernel
<
paddle
::
platform
::
CPU
DeviceContext
,
double
>
);
paddle/operators/detection_output_op.cu.cc
浏览文件 @
a3addcdc
...
...
@@ -15,7 +15,7 @@ limitations under the License. */
#include "paddle/operators/detection_output_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_
GPU
_KERNEL
(
REGISTER_OP_
CUDA
_KERNEL
(
detection_output
,
ops
::
Detection_output_Kernel
<
paddle
::
platform
::
GPUPlace
,
float
>
,
ops
::
Detection_output_Kernel
<
paddle
::
platform
::
GPUPlace
,
double
>
);
ops
::
Detection_output_Kernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
Detection_output_Kernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
paddle/operators/detection_output_op.h
浏览文件 @
a3addcdc
...
...
@@ -21,8 +21,8 @@ limitations under the License. */
#include "paddle/operators/strided_memcpy.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
Place
,
typename
T
>
inline
void
transpose_fun
(
const
platform
::
Device
Context
&
context
,
template
<
typename
DeviceContext
,
typename
T
>
inline
void
transpose_fun
(
const
framework
::
Execution
Context
&
context
,
const
framework
::
Tensor
&
src
,
framework
::
Tensor
*
dst
)
{
int
input_nums
=
src
.
dims
()[
0
];
...
...
@@ -36,17 +36,18 @@ inline void transpose_fun(const platform::DeviceContext& context,
framework
::
Tensor
in_p_tensor_transpose
;
in_p_tensor_transpose
.
mutable_data
<
T
>
(
shape
,
context
.
GetPlace
());
std
::
vector
<
int
>
shape_axis
({
0
,
1
,
3
,
4
,
2
});
math
::
Transpose
<
Place
,
T
,
5
>
trans5
;
trans5
(
context
,
in_p_tensor
,
&
in_p_tensor_transpose
,
shape_axis
);
math
::
Transpose
<
DeviceContext
,
T
,
5
>
trans5
;
trans5
(
context
.
template
device_context
<
DeviceContext
>(),
in_p_tensor
,
&
in_p_tensor_transpose
,
shape_axis
);
auto
dst_stride
=
framework
::
stride
(
dst
->
dims
());
auto
src_stride
=
framework
::
stride
(
in_p_tensor_transpose
.
dims
());
StridedMemcpy
<
T
>
(
context
,
in_p_tensor_transpose
.
data
<
T
>
(),
src_stride
,
in_p_tensor_transpose
.
dims
(),
dst_stride
,
StridedMemcpy
<
T
>
(
context
.
device_context
(),
in_p_tensor_transpose
.
data
<
T
>
()
,
src_stride
,
in_p_tensor_transpose
.
dims
(),
dst_stride
,
dst
->
data
<
T
>
()
+
offset
);
offset
+=
in_p_tensor_transpose
.
dims
()[
4
]
*
src_stride
[
4
];
}
}
template
<
typename
Place
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
Detection_output_Kernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
...
...
@@ -87,10 +88,11 @@ class Detection_output_Kernel : public framework::OpKernel<T> {
framework
::
Tensor
conf_cpu
;
framework
::
Tensor
priorbox_cpu
;
const
T
*
priorbox_data
=
in_priorbox
->
data
<
T
>
();
transpose_fun
<
Place
,
T
>
(
context
.
device_context
()
,
*
in_loc
,
&
loc_tensor
);
transpose_fun
<
Place
,
T
>
(
context
.
device_context
()
,
*
in_conf
,
&
conf_tensor
);
transpose_fun
<
DeviceContext
,
T
>
(
context
,
*
in_loc
,
&
loc_tensor
);
transpose_fun
<
DeviceContext
,
T
>
(
context
,
*
in_conf
,
&
conf_tensor
);
conf_tensor
.
Resize
(
conf_shape_softmax
);
math
::
SoftmaxFunctor
<
Place
,
T
>
()(
context
.
device_context
(),
&
conf_tensor
,
math
::
SoftmaxFunctor
<
DeviceContext
,
T
>
()(
context
.
template
device_context
<
DeviceContext
>(),
&
conf_tensor
,
&
conf_tensor
);
T
*
loc_data
=
loc_tensor
.
data
<
T
>
();
T
*
conf_data
=
conf_tensor
.
data
<
T
>
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录