Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
367a54e0
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
367a54e0
编写于
9月 26, 2017
作者:
Y
Yibing Liu
提交者:
GitHub
9月 26, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4360 from kuke/multiplex_modify_dev
Modify multiplex_op
上级
aef71a6e
e9dbc85b
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
77 addition
and
63 deletion
+77
-63
paddle/operators/multiplex_op.cc
paddle/operators/multiplex_op.cc
+34
-28
paddle/operators/multiplex_op.cu
paddle/operators/multiplex_op.cu
+23
-20
paddle/operators/multiplex_op.h
paddle/operators/multiplex_op.h
+13
-10
python/paddle/v2/framework/tests/test_multiplex_op.py
python/paddle/v2/framework/tests/test_multiplex_op.py
+7
-5
未找到文件。
paddle/operators/multiplex_op.cc
浏览文件 @
367a54e0
...
...
@@ -18,7 +18,6 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
class
MultiplexOp
:
public
framework
::
OperatorWithKernel
{
public:
...
...
@@ -26,24 +25,31 @@ class MultiplexOp : public framework::OperatorWithKernel {
protected:
void
InferShape
(
const
framework
::
InferShapeContext
&
ctx
)
const
override
{
PADDLE_ENFORCE_NOT_NULL
(
ctx
.
InputVar
(
"Ids"
),
"Input(Ids) shouldn't be null."
);
PADDLE_ENFORCE
(
!
ctx
.
MultiInputVar
(
"X"
).
empty
(),
"
Input(X) should not be null
"
);
"
MultiInput(X) shouldn't be empty.
"
);
PADDLE_ENFORCE_NOT_NULL
(
ctx
.
OutputVar
(
"Out"
),
"Output(Out) shouldn't be null."
);
auto
ids_dim
=
ctx
.
Input
<
Tensor
>
(
"Ids"
)
->
dims
();
PADDLE_ENFORCE
(
ids_dim
.
size
()
==
2
&&
ids_dim
[
1
]
==
1
,
"The index tensor must be a vector with size batchSize x 1."
);
auto
ins
=
ctx
.
MultiInput
<
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
num_ins
=
ins
.
size
();
PADDLE_ENFORCE
(
num_ins
>
2
,
"multiplex operator should have more than 2 inputs."
);
PADDLE_ENFORCE_EQ
(
ins
[
0
]
->
dims
().
size
(),
1
,
"The first input must be a index vector."
);
auto
in_dim
=
ins
[
1
]
->
dims
();
for
(
size_t
i
=
2
;
i
<
num_ins
;
i
++
)
{
PADDLE_ENFORCE
(
num_ins
>
1
,
"multiplex operator should have more than "
"one candidate input tensors."
);
auto
in_dim
=
ins
[
0
]
->
dims
();
PADDLE_ENFORCE
(
in_dim
.
size
()
>=
2
,
"The rank of candidate tensors must be not less than 2."
);
for
(
size_t
i
=
1
;
i
<
num_ins
;
i
++
)
{
auto
dim
=
ins
[
i
]
->
dims
();
PADDLE_ENFORCE
(
in_dim
==
dim
,
"All the input tensors except the first one must have the same size"
);
PADDLE_ENFORCE
(
in_dim
==
dim
,
"All the candidate tensors must have the same size."
);
}
out
->
Resize
(
in_dim
);
}
...
...
@@ -54,25 +60,25 @@ class MultiplexOpMaker : public framework::OpProtoAndCheckerMaker {
MultiplexOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"The input tensors of multiplex operator."
).
AsDuplicable
();
AddInput
(
"Ids"
,
"The index tensor of multiplex operator."
);
AddInput
(
"X"
,
"The candidate tensors of multiplex operator."
)
.
AsDuplicable
();
AddOutput
(
"Out"
,
"The output tensor of multiplex operator."
);
AddComment
(
R"DOC(Multiplex operator
Multiplex multiple tensors according to the index provided by the first
input tensor.
Multiplex multiple tensors according to the index provided by the index tensor.
ins[0]
: the index tensor.
ins[1:N]: the candidate output tensors
.
Ids
: the index tensor.
X[0 : N - 1]: the candidate tensors for output (N >= 2)
.
For each index i from 0 to batchSize - 1, the output is the i-th row of the
the (
index[i] + 1
)-th tensor.
the (
Ids[i]
)-th tensor.
For i-th row of the output tensor:
y[i]
[j] = x_{k}[i][j], j = 0,1, ... , (x_{1}.width - 1)
y[i]
= x_{k}[i]
where y is the output tensor. `x_{k}` is the k-th input tensor
and `k = x{0}[i] + 1`.
and `k = Ids[i]`.
)DOC"
);
}
};
...
...
@@ -84,15 +90,15 @@ class MultiplexGradOp : public framework::OperatorWithKernel {
protected:
void
InferShape
(
const
framework
::
InferShapeContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
!
ctx
.
MultiInputVar
(
"X"
).
empty
(),
"Input(X) should not be null"
);
"Input(X) should not be null
.
"
);
PADDLE_ENFORCE
(
!
ctx
.
MultiOutputVar
(
framework
::
GradVarName
(
"X"
)).
empty
(),
"Output(X@Grad) should not be null"
);
"Output(X@Grad) should not be null
.
"
);
PADDLE_ENFORCE_NOT_NULL
(
ctx
.
InputVar
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) should
n'
t be null."
);
auto
d_ins
=
ctx
.
MultiOutput
<
LoD
Tensor
>
(
framework
::
GradVarName
(
"X"
));
"Input(Out@GRAD) should
no
t be null."
);
auto
d_ins
=
ctx
.
MultiOutput
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
ins
=
ctx
.
MultiInput
<
Tensor
>
(
"X"
);
//
don't compute gradient for index (ins[0]
)
for
(
size_t
i
=
1
;
i
<
ins
.
size
();
i
++
)
{
//
No need to compute gradient for Input(Ids
)
for
(
size_t
i
=
0
;
i
<
ins
.
size
();
i
++
)
{
if
(
d_ins
[
i
])
{
d_ins
[
i
]
->
Resize
(
ins
[
i
]
->
dims
());
}
...
...
paddle/operators/multiplex_op.cu
浏览文件 @
367a54e0
...
...
@@ -18,27 +18,30 @@
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
template
<
typename
Place
,
typename
T
>
class
MultiplexGPUKernel
:
public
framework
::
OpKernel
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
auto
ins
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Out
"
);
auto
ins
=
ctx
.
MultiInput
<
Tensor
>
(
"X"
);
auto
*
ids
=
ctx
.
Input
<
Tensor
>
(
"Ids
"
);
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
rows
=
ins
[
1
]
->
dims
()[
0
];
auto
cols
=
ins
[
1
]
->
dims
()[
1
]
;
auto
rows
=
ins
[
0
]
->
dims
()[
0
];
auto
cols
=
ins
[
0
]
->
numel
()
/
rows
;
// copy index to cpu
framework
::
Tensor
index_t_cpu
;
index_t_cpu
.
CopyFrom
<
T
>
(
*
(
ins
[
0
])
,
platform
::
CPUPlace
());
auto
*
index
=
index_t_cpu
.
data
<
T
>
();
Tensor
index_t_cpu
;
index_t_cpu
.
CopyFrom
<
int32_t
>
(
*
ids
,
platform
::
CPUPlace
());
auto
*
index
=
index_t_cpu
.
data
<
int32_t
>
();
auto
stream
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
.
device_context
())
.
stream
();
Place
place
=
boost
::
get
<
Place
>
(
ctx
.
GetPlace
());
for
(
auto
i
=
0
;
i
<
rows
;
i
++
)
{
int
k
=
(
int
)
index
[
i
]
+
1
;
int32_t
k
=
index
[
i
];
PADDLE_ENFORCE_GE
(
k
,
0
,
"index must be nonnegative."
);
PADDLE_ENFORCE_LT
(
k
,
ins
.
size
(),
"index exceeds the number of candidate tensors."
);
memory
::
Copy
(
place
,
out
->
data
<
T
>
()
+
i
*
cols
,
place
,
...
...
@@ -51,11 +54,11 @@ template <typename Place, typename T>
class
MultiplexGradGPUKernel
:
public
framework
::
OpKernel
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
auto
*
d_out
=
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
ins
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"X"
);
auto
d_ins
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
for
(
size_t
i
=
1
;
i
<
d_ins
.
size
();
i
++
)
{
auto
*
d_out
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
ins
=
ctx
.
MultiInput
<
Tensor
>
(
"X"
);
auto
*
ids
=
ctx
.
Input
<
Tensor
>
(
"Ids"
);
auto
d_ins
=
ctx
.
MultiOutput
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
for
(
size_t
i
=
0
;
i
<
d_ins
.
size
();
i
++
)
{
if
(
d_ins
[
i
])
{
d_ins
[
i
]
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
t
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
d_ins
[
i
]);
...
...
@@ -63,19 +66,19 @@ class MultiplexGradGPUKernel : public framework::OpKernel {
}
}
auto
rows
=
ins
[
1
]
->
dims
()[
0
];
auto
cols
=
ins
[
1
]
->
dims
()[
1
]
;
auto
rows
=
ins
[
0
]
->
dims
()[
0
];
auto
cols
=
ins
[
0
]
->
numel
()
/
rows
;
// copy index to cpu
framework
::
Tensor
index_t_cpu
;
index_t_cpu
.
CopyFrom
<
T
>
(
*
(
ins
[
0
])
,
platform
::
CPUPlace
());
auto
*
index
=
index_t_cpu
.
data
<
T
>
();
Tensor
index_t_cpu
;
index_t_cpu
.
CopyFrom
<
int32_t
>
(
*
ids
,
platform
::
CPUPlace
());
auto
*
index
=
index_t_cpu
.
data
<
int32_t
>
();
auto
stream
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
.
device_context
())
.
stream
();
Place
place
=
boost
::
get
<
Place
>
(
ctx
.
GetPlace
());
for
(
auto
i
=
0
;
i
<
rows
;
i
++
)
{
int
k
=
(
int
)
index
[
i
]
+
1
;
size_t
k
=
static_cast
<
size_t
>
(
index
[
i
])
;
if
(
d_ins
[
k
])
{
memory
::
Copy
(
place
,
d_ins
[
k
]
->
data
<
T
>
()
+
i
*
cols
,
place
,
d_out
->
data
<
T
>
()
+
i
*
cols
,
cols
*
sizeof
(
T
),
stream
);
...
...
paddle/operators/multiplex_op.h
浏览文件 @
367a54e0
...
...
@@ -27,16 +27,18 @@ class MultiplexCPUKernel : public framework::OpKernel {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
auto
ins
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
auto
ids
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Ids"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
rows
=
ins
[
1
]
->
dims
()[
0
];
auto
cols
=
ins
[
1
]
->
dims
()[
1
]
;
auto
*
index
=
ins
[
0
]
->
data
<
T
>
();
auto
rows
=
ins
[
0
]
->
dims
()[
0
];
auto
cols
=
ins
[
0
]
->
numel
()
/
rows
;
auto
index
=
ids
->
data
<
int32_t
>
();
Place
place
=
boost
::
get
<
Place
>
(
ctx
.
GetPlace
());
for
(
auto
i
=
0
;
i
<
rows
;
i
++
)
{
int
k
=
(
int
)
index
[
i
]
+
1
;
int32_t
k
=
index
[
i
];
PADDLE_ENFORCE_GE
(
k
,
0
,
"index must be nonnegative."
);
PADDLE_ENFORCE_LT
(
static_cast
<
size_t
>
(
k
),
ins
.
size
(),
"index exceeds the number of candidate tensors."
);
memory
::
Copy
(
place
,
out
->
data
<
T
>
()
+
i
*
cols
,
place
,
...
...
@@ -50,10 +52,11 @@ class MultiplexGradCPUKernel : public framework::OpKernel {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
auto
*
d_out
=
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
ids
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Ids"
);
auto
ins
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"X"
);
auto
d_ins
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
for
(
size_t
i
=
1
;
i
<
d_ins
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
d_ins
.
size
();
i
++
)
{
if
(
d_ins
[
i
])
{
d_ins
[
i
]
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
t
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
d_ins
[
i
]);
...
...
@@ -61,12 +64,12 @@ class MultiplexGradCPUKernel : public framework::OpKernel {
}
}
auto
rows
=
ins
[
1
]
->
dims
()[
0
];
auto
cols
=
ins
[
1
]
->
dims
()[
1
]
;
auto
*
index
=
i
ns
[
0
]
->
data
<
T
>
();
auto
rows
=
ins
[
0
]
->
dims
()[
0
];
auto
cols
=
ins
[
0
]
->
numel
()
/
rows
;
auto
*
index
=
i
ds
->
data
<
int32_t
>
();
Place
place
=
boost
::
get
<
Place
>
(
ctx
.
GetPlace
());
for
(
auto
i
=
0
;
i
<
rows
;
i
++
)
{
int
k
=
(
int
)
index
[
i
]
+
1
;
size_t
k
=
static_cast
<
size_t
>
(
index
[
i
])
;
if
(
d_ins
[
k
])
{
memory
::
Copy
(
place
,
d_ins
[
k
]
->
data
<
T
>
()
+
i
*
cols
,
place
,
d_out
->
data
<
T
>
()
+
i
*
cols
,
cols
*
sizeof
(
T
));
...
...
python/paddle/v2/framework/tests/test_multiplex_op.py
浏览文件 @
367a54e0
...
...
@@ -6,20 +6,22 @@ from op_test import OpTest
class
TestMultiplexOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"multiplex"
rows
=
3
index
=
np
.
array
([
3
,
1
,
0
])
rows
=
4
index
=
np
.
arange
(
0
,
rows
).
astype
(
'int32'
)
np
.
random
.
shuffle
(
index
)
index
=
np
.
reshape
(
index
,
(
rows
,
1
))
ins1
=
np
.
random
.
random
((
rows
,
10
)).
astype
(
"float32"
)
ins2
=
np
.
random
.
random
((
rows
,
10
)).
astype
(
"float32"
)
ins3
=
np
.
random
.
random
((
rows
,
10
)).
astype
(
"float32"
)
ins4
=
np
.
random
.
random
((
rows
,
10
)).
astype
(
"float32"
)
self
.
inputs
=
{
'
X'
:
[(
'index'
,
index
),
(
'x1'
,
ins1
),
(
'x2'
,
ins2
),
(
'x3'
,
ins3
)
,
(
'x4'
,
ins4
)]
'
Ids'
:
index
,
'X'
:
[(
'x1'
,
ins1
),
(
'x2'
,
ins2
),
(
'x3'
,
ins3
),
(
'x4'
,
ins4
)]
}
# multiplex output
output
=
np
.
zeros_like
(
ins1
)
for
i
in
range
(
0
,
rows
):
k
=
index
[
i
]
+
1
k
=
index
[
i
]
[
0
]
output
[
i
]
=
self
.
inputs
[
'X'
][
k
][
1
][
i
]
self
.
outputs
=
{
'Out'
:
output
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录