Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
b187e372
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b187e372
编写于
2月 28, 2019
作者:
F
flame
提交者:
GitHub
2月 28, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add anakin fc op converter (#15965)
上级
87248281
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
121 addition
and
48 deletion
+121
-48
paddle/fluid/inference/anakin/convert/fc.cc
paddle/fluid/inference/anakin/convert/fc.cc
+37
-3
paddle/fluid/inference/anakin/convert/test_fc_op.cc
paddle/fluid/inference/anakin/convert/test_fc_op.cc
+5
-3
paddle/fluid/inference/anakin/convert/ut_helper.h
paddle/fluid/inference/anakin/convert/ut_helper.h
+36
-3
paddle/fluid/inference/anakin/test_anakin_engine.cc
paddle/fluid/inference/anakin/test_anakin_engine.cc
+43
-39
未找到文件。
paddle/fluid/inference/anakin/convert/fc.cc
浏览文件 @
b187e372
...
@@ -13,6 +13,16 @@
...
@@ -13,6 +13,16 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/fc.h"
#include "paddle/fluid/inference/anakin/convert/fc.h"
#include <algorithm>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
...
@@ -23,15 +33,39 @@ void FcOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -23,15 +33,39 @@ void FcOpConverter::operator()(const framework::proto::OpDesc &op,
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
In
put
(
"Out"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Out
put
(
"Out"
).
size
(),
1
);
auto
x_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
x_name
=
op_desc
.
Input
(
"X"
).
front
();
PADDLE_ENFORCE
(
x_name
.
size
()
>
0
);
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
(
);
auto
*
y_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Y"
).
front
());
auto
*
y_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Y"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
y_v
);
PADDLE_ENFORCE_NOT_NULL
(
y_v
);
auto
*
y_t
=
y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
y_t
=
y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
shape
=
framework
::
vectorize2int
(
y_t
->
dims
());
auto
input_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
weight_shape
=
framework
::
vectorize2int
(
y_t
->
dims
());
engine_
->
AddOp
(
op_name
,
"Dense"
,
{
input_name
},
{
output_name
});
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
1
);
int
out_dim
=
weight_shape
[
1
];
engine_
->
AddOpAttr
(
op_name
,
"out_dim"
,
out_dim
);
weight_shape
.
push_back
(
1
);
weight_shape
.
push_back
(
1
);
Shape
anakin_shape
(
weight_shape
);
framework
::
LoDTensor
weight_tensor
;
weight_tensor
.
Resize
(
y_t
->
dims
());
TensorCopySync
((
*
y_t
),
platform
::
CPUPlace
(),
&
weight_tensor
);
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
anakin_shape
);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
std
::
copy_n
(
weight_tensor
.
data
<
float
>
(),
weight_tensor
.
numel
(),
cpu_data
);
weight1
->
d_tensor
().
set_shape
(
anakin_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
}
}
}
// namespace anakin
}
// namespace anakin
...
...
paddle/fluid/inference/anakin/convert/test_fc_op.cc
浏览文件 @
b187e372
...
@@ -22,14 +22,16 @@ namespace inference {
...
@@ -22,14 +22,16 @@ namespace inference {
namespace
anakin
{
namespace
anakin
{
TEST
(
fc_op
,
test
)
{
TEST
(
fc_op
,
test
)
{
auto
it
=
OpRegister
::
instance
()
->
Get
(
"fc"
);
auto
fc_converter
=
OpRegister
::
instance
()
->
Get
(
"fc"
);
ASSERT_TRUE
(
it
!=
nullptr
);
ASSERT_TRUE
(
fc_converter
!=
nullptr
);
// Registrar<FcOpConverter> register_fc("fc");
// auto fc = std::make_shared<FcOpConverter>();
std
::
unordered_set
<
std
::
string
>
parameters
({
"mul_y"
});
std
::
unordered_set
<
std
::
string
>
parameters
({
"mul_y"
});
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
scope
);
AnakinConvertValidation
validator
(
parameters
,
scope
);
validator
.
DeclInputVar
(
"mul_x"
,
{
1
,
1
,
1
,
1
});
validator
.
DeclInputVar
(
"mul_x"
,
{
1
,
1
,
1
,
1
});
validator
.
DeclParamVar
(
"mul_y"
,
{
1
,
1
,
1
,
2
});
validator
.
DeclParamVar
(
"mul_y"
,
{
1
,
2
});
validator
.
DeclOutputVar
(
"mul_out"
,
{
1
,
1
,
1
,
2
});
validator
.
DeclOutputVar
(
"mul_out"
,
{
1
,
1
,
1
,
2
});
// Prepare Op description
// Prepare Op description
...
...
paddle/fluid/inference/anakin/convert/ut_helper.h
浏览文件 @
b187e372
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <map>
#include <memory>
#include <memory>
#include <string>
#include <string>
#include <unordered_map>
#include <unordered_map>
...
@@ -127,6 +128,7 @@ class AnakinConvertValidation {
...
@@ -127,6 +128,7 @@ class AnakinConvertValidation {
engine_
->
SetInputShape
(
input
,
t_shape
);
engine_
->
SetInputShape
(
input
,
t_shape
);
}
}
engine_
->
Optimize
();
engine_
->
Optimize
();
engine_
->
InitGraph
();
}
}
// We use the set 'neglected_output' here, because some Ops like batch norm,
// We use the set 'neglected_output' here, because some Ops like batch norm,
...
@@ -138,16 +140,47 @@ class AnakinConvertValidation {
...
@@ -138,16 +140,47 @@ class AnakinConvertValidation {
platform
::
CUDADeviceContext
ctx
(
place_
);
platform
::
CUDADeviceContext
ctx
(
place_
);
op_
->
Run
(
scope_
,
place_
);
op_
->
Run
(
scope_
,
place_
);
// std::vector<framework::LoDTensor> input_vector;
// std::vector<framework::LoDTensor> output_vector;
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
inputs
;
for
(
const
auto
&
input
:
op_desc_
->
InputArgumentNames
())
{
if
(
parameters_
.
count
(
input
))
continue
;
auto
*
var
=
scope_
.
FindVar
(
input
);
auto
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
inputs
.
insert
({
input
,
tensor
});
}
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
outputs
;
std
::
vector
<
std
::
vector
<
float
>>
fluid_outputs
;
for
(
const
auto
&
output
:
op_desc_
->
OutputArgumentNames
())
{
for
(
const
auto
&
output
:
op_desc_
->
OutputArgumentNames
())
{
if
(
neglected_output
.
count
(
output
))
continue
;
if
(
neglected_output
.
count
(
output
))
continue
;
std
::
vector
<
float
>
fluid_out
;
std
::
vector
<
float
>
fluid_out
;
auto
*
var
=
scope_
.
FindVar
(
output
);
auto
*
var
=
scope_
.
FindVar
(
output
);
auto
*
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
TensorToVector
(
*
tensor
,
ctx
,
&
fluid_out
);
framework
::
TensorToVector
(
*
tensor
,
ctx
,
&
fluid_out
);
fluid_outputs
.
push_back
(
fluid_out
);
size_t
fluid_out_size
=
fluid_out
.
size
();
//
size_t fluid_out_size = fluid_out.size();
for
(
size_t
i
=
0
;
i
<
fluid_out_size
;
i
++
)
{
/*
for (size_t i = 0; i < fluid_out_size; i++) {
std::cout << fluid_out[i] << std::endl;
std::cout << fluid_out[i] << std::endl;
}*/
outputs
.
insert
({
output
,
tensor
});
}
engine_
->
Execute
(
inputs
,
outputs
);
int
i_output
=
0
;
for
(
const
auto
&
output
:
op_desc_
->
OutputArgumentNames
())
{
if
(
neglected_output
.
count
(
output
))
continue
;
std
::
vector
<
float
>
anakin_out
;
auto
*
var
=
scope_
.
FindVar
(
output
);
auto
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
TensorToVector
(
*
tensor
,
ctx
,
&
anakin_out
);
size_t
anakin_out_size
=
anakin_out
.
size
();
auto
fluid_out
=
fluid_outputs
[
i_output
++
];
for
(
size_t
i
=
0
;
i
<
anakin_out_size
;
i
++
)
{
LOG
(
INFO
)
<<
"Output["
<<
i
<<
"]: anakin["
<<
anakin_out
[
i
]
<<
"], "
<<
"fluid["
<<
fluid_out
[
i
]
<<
"]"
;
}
}
}
}
}
}
...
...
paddle/fluid/inference/anakin/test_anakin_engine.cc
浏览文件 @
b187e372
...
@@ -46,47 +46,51 @@ class TestAnakinEngine : public ::testing::Test {
...
@@ -46,47 +46,51 @@ class TestAnakinEngine : public ::testing::Test {
void
TestAnakinEngine
::
SetUp
()
{
void
TestAnakinEngine
::
SetUp
()
{
engine_
.
reset
(
new
AnakinEngine
<
NV
,
Precision
::
FP32
>
(
true
));
engine_
.
reset
(
new
AnakinEngine
<
NV
,
Precision
::
FP32
>
(
true
));
}
TEST_F
(
TestAnakinEngine
,
Execute
)
{
engine_
->
AddOp
(
"op1"
,
"Dense"
,
{
"x"
},
{
"y"
});
engine_
->
AddOpAttr
(
"op1"
,
"out_dim"
,
2
);
engine_
->
AddOpAttr
(
"op1"
,
"bias_term"
,
false
);
engine_
->
AddOpAttr
(
"op1"
,
"axis"
,
1
);
std
::
vector
<
int
>
shape
=
{
1
,
1
,
1
,
2
};
Shape
tmp_shape
(
shape
);
// PBlock<NV> weight1(tmp_shape);
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
tmp_shape
);
// auto *weight1 = new PBlock<NV>(tmp_shape, AK_FLOAT);
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
cpu_data
[
0
]
=
2.
;
weight1
->
d_tensor
().
set_shape
(
tmp_shape
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
engine_
->
AddOpAttr
(
"op1"
,
"weight_1"
,
*
weight1
);
TEST_F
(
TestAnakinEngine
,
Execute
)
{
engine_
->
Freeze
();
engine_
->
AddOp
(
"op1"
,
"Dense"
,
{
"x"
},
{
"y"
});
// PTuple<int> input_shape = {1};
engine_
->
AddOpAttr
(
"op1"
,
"out_dim"
,
2
);
// engine_->AddOpAttr("x", "input_shape", input_shape);
engine_
->
AddOpAttr
(
"op1"
,
"bias_term"
,
false
);
engine_
->
SetInputShape
(
"x"
,
{
1
,
1
,
1
,
1
});
engine_
->
AddOpAttr
(
"op1"
,
"axis"
,
1
);
engine_
->
Optimize
();
std
::
vector
<
int
>
shape
=
{
1
,
1
,
1
,
2
};
engine_
->
InitGraph
();
Shape
tmp_shape
(
shape
);
framework
::
LoDTensor
x
;
auto
*
weight1
=
framework
::
LoDTensor
y
;
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
tmp_shape
);
x
.
Resize
({
1
,
1
,
1
,
1
});
y
.
Resize
({
1
,
1
,
1
,
2
});
float
*
cpu_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
auto
*
x_data
=
x
.
mutable_data
<
float
>
(
platform
::
CUDAPlace
());
cpu_data
[
0
]
=
2.
;
float
x_data_cpu
[]
=
{
1.
};
weight1
->
d_tensor
().
set_shape
(
tmp_shape
);
cudaMemcpy
(
x_data
,
x_data_cpu
,
sizeof
(
float
),
cudaMemcpyHostToDevice
);
weight1
->
d_tensor
().
copy_from
(
weight1
->
h_tensor
());
engine_
->
AddOpAttr
(
"op1"
,
"weight_1"
,
*
weight1
);
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
inputs
=
{{
"x"
,
&
x
}};
auto
*
y_data
=
y
.
mutable_data
<
float
>
(
platform
::
CUDAPlace
());
engine_
->
Freeze
();
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
outputs
=
{{
"y"
,
&
y
}};
engine_
->
SetInputShape
(
"x"
,
{
1
,
1
,
1
,
1
});
engine_
->
Optimize
();
engine_
->
Execute
(
inputs
,
outputs
);
engine_
->
InitGraph
();
auto
*
y_data_gpu
=
y_data
;
framework
::
LoDTensor
x
;
float
y_data_cpu
[
2
];
framework
::
LoDTensor
y
;
cudaMemcpy
(
y_data_cpu
,
y_data_gpu
,
sizeof
(
float
)
*
2
,
cudaMemcpyDeviceToHost
);
x
.
Resize
({
1
,
1
,
1
,
1
});
LOG
(
INFO
)
<<
"output value: "
<<
y_data_cpu
[
0
]
<<
", "
<<
y_data_cpu
[
1
];
y
.
Resize
({
1
,
1
,
1
,
2
});
auto
*
x_data
=
x
.
mutable_data
<
float
>
(
platform
::
CUDAPlace
());
float
x_data_cpu
[]
=
{
1.
};
cudaMemcpy
(
x_data
,
x_data_cpu
,
sizeof
(
float
),
cudaMemcpyHostToDevice
);
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
inputs
=
{{
"x"
,
&
x
}};
auto
*
y_data
=
y
.
mutable_data
<
float
>
(
platform
::
CUDAPlace
());
std
::
map
<
std
::
string
,
framework
::
LoDTensor
*>
outputs
=
{{
"y"
,
&
y
}};
engine_
->
Execute
(
inputs
,
outputs
);
auto
*
y_data_gpu
=
y_data
;
float
y_data_cpu
[
2
];
cudaMemcpy
(
y_data_cpu
,
y_data_gpu
,
sizeof
(
float
)
*
2
,
cudaMemcpyDeviceToHost
);
LOG
(
INFO
)
<<
"output value: "
<<
y_data_cpu
[
0
]
<<
", "
<<
y_data_cpu
[
1
];
}
}
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录