Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
07dcf285
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
07dcf285
编写于
3月 20, 2019
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
git cherry-pick from feature/anakin-engine: update anakin subgraph #16278
上级
c407dfa3
变更
22
显示空白变更内容
内联
并排
Showing
22 changed file
with
546 addition
and
29 deletion
+546
-29
paddle/fluid/inference/anakin/convert/CMakeLists.txt
paddle/fluid/inference/anakin/convert/CMakeLists.txt
+5
-5
paddle/fluid/inference/anakin/convert/dropout.cc
paddle/fluid/inference/anakin/convert/dropout.cc
+66
-0
paddle/fluid/inference/anakin/convert/dropout.h
paddle/fluid/inference/anakin/convert/dropout.h
+37
-0
paddle/fluid/inference/anakin/convert/elementwise.cc
paddle/fluid/inference/anakin/convert/elementwise.cc
+32
-1
paddle/fluid/inference/anakin/convert/elementwise.h
paddle/fluid/inference/anakin/convert/elementwise.h
+12
-0
paddle/fluid/inference/anakin/convert/im2sequence.cc
paddle/fluid/inference/anakin/convert/im2sequence.cc
+62
-0
paddle/fluid/inference/anakin/convert/im2sequence.h
paddle/fluid/inference/anakin/convert/im2sequence.h
+37
-0
paddle/fluid/inference/anakin/convert/pool2d.cc
paddle/fluid/inference/anakin/convert/pool2d.cc
+5
-1
paddle/fluid/inference/anakin/convert/sum.cc
paddle/fluid/inference/anakin/convert/sum.cc
+54
-0
paddle/fluid/inference/anakin/convert/sum.h
paddle/fluid/inference/anakin/convert/sum.h
+37
-0
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
+53
-0
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
+14
-8
paddle/fluid/inference/anakin/convert/test_im2sequence_op.cc
paddle/fluid/inference/anakin/convert/test_im2sequence_op.cc
+55
-0
paddle/fluid/inference/anakin/convert/test_sum_op.cc
paddle/fluid/inference/anakin/convert/test_sum_op.cc
+48
-0
paddle/fluid/inference/anakin/op_teller.cc
paddle/fluid/inference/anakin/op_teller.cc
+4
-1
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
...luid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
+1
-1
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+2
-2
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+3
-2
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+1
-1
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+1
-0
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+13
-6
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+4
-1
未找到文件。
paddle/fluid/inference/anakin/convert/CMakeLists.txt
浏览文件 @
07dcf285
cc_library
(
anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
cc_library
(
anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc DEPS anakin_engine framework_proto scope op_registry
)
batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc
detection_out.cc scale.cc DEPS anakin_engine framework_proto scope op_registry
)
cc_test
(
test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op
)
cc_test
(
test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op
)
cc_test
(
test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv
)
cc_test
(
test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv
)
...
@@ -9,11 +7,13 @@ cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS activation_op ana
...
@@ -9,11 +7,13 @@ cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS activation_op ana
cc_test
(
test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling
)
cc_test
(
test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling
)
cc_test
(
test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split
)
cc_test
(
test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split
)
cc_test
(
test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split
)
cc_test
(
test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split
)
cc_test
(
test_anakin_elementwise SRCS test_elementwise_op.cc DEPS anakin_op_converter elementwise_add_op
)
cc_test
(
test_anakin_elementwise SRCS test_elementwise_op.cc DEPS anakin_op_converter elementwise_add_op
elementwise_mul_op
)
cc_test
(
test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter SERIAL
)
cc_test
(
test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter SERIAL
)
cc_test
(
test_anakin_softmax SRCS test_softmax_op.cc DEPS anakin_op_converter softmax_op softmax
)
cc_test
(
test_anakin_softmax SRCS test_softmax_op.cc DEPS anakin_op_converter softmax_op softmax
)
cc_test
(
test_anakin_reshape SRCS test_reshape_op.cc DEPS anakin_op_converter reshape_op
)
cc_test
(
test_anakin_reshape SRCS test_reshape_op.cc DEPS anakin_op_converter reshape_op
)
cc_test
(
test_anakin_flatten SRCS test_flatten_op.cc DEPS anakin_op_converter flatten_op reshape_op
)
cc_test
(
test_anakin_flatten SRCS test_flatten_op.cc DEPS anakin_op_converter flatten_op reshape_op
)
cc_test
(
test_anakin_transpose SRCS test_transpose_op.cc DEPS anakin_op_converter transpose_op
)
cc_test
(
test_anakin_transpose SRCS test_transpose_op.cc DEPS anakin_op_converter transpose_op
)
cc_test
(
test_anakin_batch_norm SRCS test_batch_norm_op.cc DEPS anakin_op_converter batch_norm_op
)
cc_test
(
test_anakin_batch_norm SRCS test_batch_norm_op.cc DEPS anakin_op_converter batch_norm_op
)
cc_test
(
test_anakin_scale SRCS test_scale_op.cc DEPS anakin_op_converter scale_op math_function
)
cc_test
(
test_anakin_dropout SRCS test_dropout_op.cc DEPS anakin_op_converter dropout_op
)
cc_test
(
test_anakin_im2sequence SRCS test_im2sequence_op.cc DEPS anakin_op_converter im2sequence_op im2col
)
cc_test
(
test_anakin_sum SRCS test_sum_op.cc DEPS anakin_op_converter sum_op selected_rows_functor
)
paddle/fluid/inference/anakin/convert/dropout.cc
0 → 100644
浏览文件 @
07dcf285
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/dropout.h"
#include <algorithm>
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
DropoutOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Mask"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
x_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Scale"
,
{
x_name
},
{
out_name
});
auto
dropout_prob
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"dropout_prob"
));
auto
factor
=
1
-
dropout_prob
;
Shape
shape1
(
std
::
vector
<
int
>
({
1
,
1
,
1
,
1
}));
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape1
);
auto
*
factor_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
float
weight1_data
[]
=
{
factor
};
std
::
copy
(
std
::
begin
(
weight1_data
),
std
::
end
(
weight1_data
),
factor_data
);
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
0
);
engine_
->
AddOpAttr
(
op_name
,
"num_axes"
,
0
);
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
dropout
,
DropoutOpConverter
);
paddle/fluid/inference/anakin/convert/dropout.h
0 → 100644
浏览文件 @
07dcf285
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
class
DropoutOpConverter
:
public
AnakinOpConverter
{
public:
DropoutOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
DropoutOpConverter
()
{}
private:
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/elementwise.cc
浏览文件 @
07dcf285
...
@@ -35,7 +35,7 @@ void ElementwiseAddOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -35,7 +35,7 @@ void ElementwiseAddOpConverter::operator()(const framework::proto::OpDesc &op,
bool
test_mode
)
{
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1
);
// Y is a weight
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
x_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
x_name
=
op_desc
.
Input
(
"X"
).
front
();
...
@@ -50,8 +50,39 @@ void ElementwiseAddOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -50,8 +50,39 @@ void ElementwiseAddOpConverter::operator()(const framework::proto::OpDesc &op,
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"coeff"
,
coeff
);
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"coeff"
,
coeff
);
}
}
void
ElementwiseMulOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
x_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
y_name
=
op_desc
.
Input
(
"Y"
).
front
();
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Scale"
,
{
x_name
,
y_name
},
{
out_name
});
// Fill a number to weight_1 as a placeholder.
Shape
shape1
(
std
::
vector
<
int
>
({
1
,
1
,
1
,
1
}));
auto
*
weight1
=
GraphGlobalMem
<
NV
>::
Global
().
template
new_block
<
AK_FLOAT
>(
shape1
);
auto
*
placeholder_data
=
static_cast
<
float
*>
(
weight1
->
h_tensor
().
mutable_data
());
float
weight1_data
[]
=
{
1
};
std
::
copy
(
std
::
begin
(
weight1_data
),
std
::
end
(
weight1_data
),
placeholder_data
);
engine_
->
AddOpAttr
(
op_name
,
"weight_1"
,
*
weight1
);
auto
axis
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"axis"
));
engine_
->
AddOpAttr
(
op_name
,
"axis"
,
axis
);
engine_
->
AddOpAttr
(
op_name
,
"num_axes"
,
1
);
engine_
->
AddOpAttr
(
op_name
,
"bias_term"
,
false
);
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
elementwise_add
,
ElementwiseAddOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
elementwise_add
,
ElementwiseAddOpConverter
);
REGISTER_ANAKIN_OP_CONVERTER
(
elementwise_mul
,
ElementwiseMulOpConverter
);
paddle/fluid/inference/anakin/convert/elementwise.h
浏览文件 @
07dcf285
...
@@ -32,6 +32,18 @@ class ElementwiseAddOpConverter : public AnakinOpConverter {
...
@@ -32,6 +32,18 @@ class ElementwiseAddOpConverter : public AnakinOpConverter {
private:
private:
};
};
class
ElementwiseMulOpConverter
:
public
AnakinOpConverter
{
public:
ElementwiseMulOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
ElementwiseMulOpConverter
()
{}
private:
};
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/anakin/convert/im2sequence.cc
0 → 100644
浏览文件 @
07dcf285
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/im2sequence.h"
#include <algorithm>
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
Im2SequenceConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
1
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Y"
).
size
(),
0
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
x_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
AddOp
(
op_name
,
"Im2Sequence"
,
{
x_name
},
{
out_name
});
std
::
vector
<
int
>
dilations
=
{
1
,
1
};
auto
paddings
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"paddings"
));
auto
strides
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"strides"
));
auto
kernels
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"kernels"
));
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"paddings"
,
paddings
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"strides"
,
strides
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"window_size"
,
kernels
);
engine_
->
AddOpAttr
<
PTuple
<
int
>>
(
op_name
,
"dilations"
,
dilations
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
im2sequence
,
Im2SequenceConverter
);
paddle/fluid/inference/anakin/convert/im2sequence.h
0 → 100644
浏览文件 @
07dcf285
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
class
Im2SequenceConverter
:
public
AnakinOpConverter
{
public:
Im2SequenceConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
Im2SequenceConverter
()
{}
private:
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/pool2d.cc
浏览文件 @
07dcf285
...
@@ -55,7 +55,11 @@ void Pool2dOpConverter::operator()(const framework::proto::OpDesc &op,
...
@@ -55,7 +55,11 @@ void Pool2dOpConverter::operator()(const framework::proto::OpDesc &op,
if
(
pool_type
==
"max"
)
{
if
(
pool_type
==
"max"
)
{
anakin_pool_type
=
"MAX"
;
anakin_pool_type
=
"MAX"
;
}
else
if
(
pool_type
==
"avg"
)
{
}
else
if
(
pool_type
==
"avg"
)
{
if
(
paddings
[
0
]
||
paddings
[
1
])
{
anakin_pool_type
=
"AVGEXC"
;
anakin_pool_type
=
"AVGEXC"
;
}
else
{
anakin_pool_type
=
"AVG"
;
}
}
else
{
}
else
{
PADDLE_THROW
(
"TensorRT unsupported pooling type!"
);
PADDLE_THROW
(
"TensorRT unsupported pooling type!"
);
}
}
...
...
paddle/fluid/inference/anakin/convert/sum.cc
0 → 100644
浏览文件 @
07dcf285
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/sum.h"
#include <algorithm>
#include <string>
#include <vector>
using
anakin
::
graph
::
GraphGlobalMem
;
using
anakin
::
AK_FLOAT
;
using
anakin
::
Precision
;
using
anakin
::
saber
::
NV
;
using
anakin
::
saber
::
X86
;
using
anakin
::
saber
::
Shape
;
using
anakin
::
PBlock
;
using
anakin
::
PTuple
;
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
void
SumOpConverter
::
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"X"
).
size
(),
2
);
PADDLE_ENFORCE_EQ
(
op_desc
.
Output
(
"Out"
).
size
(),
1
);
auto
input_names
=
op_desc
.
Input
(
"X"
);
auto
out_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
op_name
=
op_desc
.
Type
()
+
":"
+
op_desc
.
Output
(
"Out"
).
front
();
std
::
vector
<
float
>
coeff
=
{
1
,
1
};
std
::
string
elementwise_type
=
"Add"
;
engine_
->
AddOp
(
op_name
,
"Eltwise"
,
input_names
,
{
out_name
});
engine_
->
AddOpAttr
<
PTuple
<
float
>>
(
op_name
,
"coeff"
,
coeff
);
engine_
->
AddOpAttr
<
std
::
string
>
(
op_name
,
"type"
,
elementwise_type
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
REGISTER_ANAKIN_OP_CONVERTER
(
sum
,
SumOpConverter
);
paddle/fluid/inference/anakin/convert/sum.h
0 → 100644
浏览文件 @
07dcf285
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
class
SumOpConverter
:
public
AnakinOpConverter
{
public:
SumOpConverter
()
=
default
;
virtual
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
;
virtual
~
SumOpConverter
()
{}
private:
};
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/anakin/convert/test_dropout_op.cc
0 → 100644
浏览文件 @
07dcf285
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/dropout.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
TEST
(
dropout_op
,
native
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
scope
);
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"mask"
,
{
1
,
1
,
2
,
2
});
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"dropout"
);
desc
.
SetInput
(
"X"
,
{
"x"
});
desc
.
SetOutput
(
"Out"
,
{
"out"
});
desc
.
SetOutput
(
"Mask"
,
{
"mask"
});
float
dropout_prob
=
0.5
;
desc
.
SetAttr
(
"dropout_prob"
,
dropout_prob
);
desc
.
SetAttr
(
"is_test"
,
true
);
validator
.
SetOp
(
*
desc
.
Proto
());
std
::
unordered_set
<
std
::
string
>
neglected_output
=
{
"mask"
};
validator
.
Execute
(
1
,
neglected_output
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
dropout
);
USE_ANAKIN_CONVERTER
(
dropout
);
paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
浏览文件 @
07dcf285
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/elementwise.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
...
@@ -20,20 +21,20 @@ namespace paddle {
...
@@ -20,20 +21,20 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
anakin
{
namespace
anakin
{
TEST
(
elementwise_op
,
nativ
e
)
{
static
void
test_elementwise_op
(
const
std
::
string
&
op_typ
e
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
scope
);
AnakinConvertValidation
validator
(
parameters
,
scope
);
validator
.
DeclInputVar
(
"
elementwise_add_
x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclInputVar
(
"
elementwise_
y"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclInputVar
(
"y"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"
elementwise_
out"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
,
2
,
2
});
// Prepare Op description
// Prepare Op description
framework
::
OpDesc
desc
;
framework
::
OpDesc
desc
;
desc
.
SetType
(
"elementwise_add"
);
desc
.
SetType
(
op_type
);
desc
.
SetInput
(
"X"
,
{
"
elementwise_add_
x"
});
desc
.
SetInput
(
"X"
,
{
"x"
});
desc
.
SetInput
(
"Y"
,
{
"
elementwise_
y"
});
desc
.
SetInput
(
"Y"
,
{
"y"
});
desc
.
SetOutput
(
"Out"
,
{
"
elementwise_
out"
});
desc
.
SetOutput
(
"Out"
,
{
"out"
});
int
axis
=
-
1
;
int
axis
=
-
1
;
desc
.
SetAttr
(
"axis"
,
axis
);
desc
.
SetAttr
(
"axis"
,
axis
);
...
@@ -42,9 +43,14 @@ TEST(elementwise_op, native) {
...
@@ -42,9 +43,14 @@ TEST(elementwise_op, native) {
validator
.
Execute
(
1
);
validator
.
Execute
(
1
);
}
}
TEST
(
elementwise_op
,
native_add
)
{
test_elementwise_op
(
"elementwise_add"
);
}
TEST
(
elementwise_op
,
native_mul
)
{
test_elementwise_op
(
"elementwise_mul"
);
}
}
// namespace anakin
}
// namespace anakin
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
elementwise_add
);
USE_OP
(
elementwise_add
);
USE_ANAKIN_CONVERTER
(
elementwise_add
);
USE_ANAKIN_CONVERTER
(
elementwise_add
);
USE_OP
(
elementwise_mul
);
USE_ANAKIN_CONVERTER
(
elementwise_mul
);
paddle/fluid/inference/anakin/convert/test_im2sequence_op.cc
0 → 100644
浏览文件 @
07dcf285
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/im2sequence.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
TEST
(
im2sequence_op
,
native
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
scope
);
std
::
vector
<
int
>
kernels
=
{
6
,
1
};
std
::
vector
<
int
>
strides
=
{
1
,
1
};
std
::
vector
<
int
>
paddings
=
{
0
,
0
,
0
,
0
};
validator
.
DeclInputVar
(
"x"
,
{
1
,
1
,
2
,
2
});
validator
.
DeclOutputVar
(
"out"
,
{
1
,
1
*
kernels
[
0
]
*
kernels
[
1
]});
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"im2sequence"
);
desc
.
SetInput
(
"X"
,
{
"x"
});
desc
.
SetOutput
(
"Out"
,
{
"out"
});
desc
.
SetAttr
(
"kernels"
,
kernels
);
desc
.
SetAttr
(
"strides"
,
strides
);
desc
.
SetAttr
(
"paddings"
,
paddings
);
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
1
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
im2sequence
);
USE_ANAKIN_CONVERTER
(
im2sequence
);
paddle/fluid/inference/anakin/convert/test_sum_op.cc
0 → 100644
浏览文件 @
07dcf285
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/sum.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
#include "paddle/fluid/operators/sum_op.h"
namespace
paddle
{
namespace
inference
{
namespace
anakin
{
TEST
(
sum
,
native
)
{
std
::
unordered_set
<
std
::
string
>
parameters
;
framework
::
Scope
scope
;
AnakinConvertValidation
validator
(
parameters
,
scope
);
validator
.
DeclInputVar
(
"sum_x1"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclInputVar
(
"sum_x2"
,
{
1
,
2
,
1
,
2
});
validator
.
DeclOutputVar
(
"sum_out"
,
{
1
,
2
,
1
,
2
});
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"sum"
);
desc
.
SetInput
(
"X"
,
{
"sum_x1"
,
"sum_x2"
});
desc
.
SetOutput
(
"Out"
,
{
"sum_out"
});
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
1
);
}
}
// namespace anakin
}
// namespace inference
}
// namespace paddle
USE_OP
(
sum
);
USE_ANAKIN_CONVERTER
(
sum
);
paddle/fluid/inference/anakin/op_teller.cc
浏览文件 @
07dcf285
...
@@ -28,6 +28,7 @@ struct SimpleOpTypeSetTeller : public Teller {
...
@@ -28,6 +28,7 @@ struct SimpleOpTypeSetTeller : public Teller {
teller_set
.
insert
(
"relu"
);
teller_set
.
insert
(
"relu"
);
teller_set
.
insert
(
"pool2d"
);
teller_set
.
insert
(
"pool2d"
);
teller_set
.
insert
(
"elementwise_add"
);
teller_set
.
insert
(
"elementwise_add"
);
teller_set
.
insert
(
"elementwise_mul"
);
teller_set
.
insert
(
"concat"
);
teller_set
.
insert
(
"concat"
);
teller_set
.
insert
(
"tanh"
);
teller_set
.
insert
(
"tanh"
);
teller_set
.
insert
(
"conv2d"
);
teller_set
.
insert
(
"conv2d"
);
...
@@ -38,7 +39,9 @@ struct SimpleOpTypeSetTeller : public Teller {
...
@@ -38,7 +39,9 @@ struct SimpleOpTypeSetTeller : public Teller {
teller_set
.
insert
(
"transpose2"
);
teller_set
.
insert
(
"transpose2"
);
teller_set
.
insert
(
"density_prior_box"
);
teller_set
.
insert
(
"density_prior_box"
);
teller_set
.
insert
(
"detection_out"
);
teller_set
.
insert
(
"detection_out"
);
teller_set
.
insert
(
"scale"
);
teller_set
.
insert
(
"dropout"
);
teller_set
.
insert
(
"sigmoid"
);
teller_set
.
insert
(
"sum"
);
}
}
bool
operator
()(
const
std
::
string
&
op_type
,
bool
operator
()(
const
std
::
string
&
op_type
,
...
...
paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
浏览文件 @
07dcf285
...
@@ -47,7 +47,7 @@ std::unique_ptr<framework::ir::Graph> analysis::AnakinSubgraphPass::ApplyImpl(
...
@@ -47,7 +47,7 @@ std::unique_ptr<framework::ir::Graph> analysis::AnakinSubgraphPass::ApplyImpl(
return
anakin
::
OpTeller
::
Global
().
Tell
(
node
->
Op
()
->
Type
(),
*
node
->
Op
());
return
anakin
::
OpTeller
::
Global
().
Tell
(
node
->
Op
()
->
Type
(),
*
node
->
Op
());
};
};
SubGraphFuser
fuser
(
graph
.
get
(),
teller
,
0
/* min_subgraph_size */
);
SubGraphFuser
fuser
(
graph
.
get
(),
teller
,
6
/* min_subgraph_size */
);
fuser
();
fuser
();
std
::
vector
<
std
::
string
>
graph_param_names
=
std
::
vector
<
std
::
string
>
graph_param_names
=
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
07dcf285
...
@@ -210,13 +210,14 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
...
@@ -210,13 +210,14 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
SetAttr
(
op_desc
->
Proto
(),
"parameters"
,
params
);
SetAttr
(
op_desc
->
Proto
(),
"parameters"
,
params
);
auto
enable_int8
=
Get
<
bool
>
(
"enable_int8"
);
auto
enable_int8
=
Get
<
bool
>
(
"enable_int8"
);
auto
use_static_engine
=
Get
<
bool
>
(
"use_static_engine"
);
auto
engine_key
=
GenerateEngineKey
(
input_names_with_id
,
output_names_with_id
,
auto
engine_key
=
GenerateEngineKey
(
input_names_with_id
,
output_names_with_id
,
std
::
to_string
(
0
));
std
::
to_string
(
0
));
// Get "" when there is no cached calibration table data.
// Get "" when there is no cached calibration table data.
bool
load_from_memory
=
Get
<
bool
>
(
"model_from_memory"
);
bool
load_from_memory
=
Get
<
bool
>
(
"model_from_memory"
);
std
::
string
calibration_data
=
""
;
std
::
string
calibration_data
=
""
;
if
(
!
load_from_memory
)
{
if
(
!
load_from_memory
&&
use_static_engine
)
{
calibration_data
=
GetTrtCalibTableData
(
calibration_data
=
GetTrtCalibTableData
(
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
,
enable_int8
);
Get
<
std
::
string
>
(
"model_opt_cache_dir"
),
engine_key
,
enable_int8
);
}
}
...
@@ -240,7 +241,6 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
...
@@ -240,7 +241,6 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
calibrator
.
reset
(
new
tensorrt
::
TRTInt8Calibrator
(
calibration_data
));
calibrator
.
reset
(
new
tensorrt
::
TRTInt8Calibrator
(
calibration_data
));
}
}
bool
use_static_engine
=
Get
<
bool
>
(
"use_static_engine"
);
// When in int8 mode and calibration_mode, the program just produce the
// When in int8 mode and calibration_mode, the program just produce the
// calibration table data.
// calibration table data.
bool
calibration_mode
=
(
enable_int8
&&
calibration_data
.
size
()
==
0
);
bool
calibration_mode
=
(
enable_int8
&&
calibration_data
.
size
()
==
0
);
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
07dcf285
...
@@ -828,12 +828,13 @@ USE_ANAKIN_CONVERTER(sigmoid);
...
@@ -828,12 +828,13 @@ USE_ANAKIN_CONVERTER(sigmoid);
USE_ANAKIN_CONVERTER
(
tanh
);
USE_ANAKIN_CONVERTER
(
tanh
);
USE_ANAKIN_CONVERTER
(
pool2d
);
USE_ANAKIN_CONVERTER
(
pool2d
);
USE_ANAKIN_CONVERTER
(
elementwise_add
);
USE_ANAKIN_CONVERTER
(
elementwise_add
);
USE_ANAKIN_CONVERTER
(
elementwise_mul
);
USE_ANAKIN_CONVERTER
(
batch_norm
);
USE_ANAKIN_CONVERTER
(
batch_norm
);
USE_ANAKIN_CONVERTER
(
flatten
);
USE_ANAKIN_CONVERTER
(
flatten
);
USE_ANAKIN_CONVERTER
(
reshape
);
USE_ANAKIN_CONVERTER
(
reshape
);
USE_ANAKIN_CONVERTER
(
transpose
);
USE_ANAKIN_CONVERTER
(
transpose
);
USE_ANAKIN_CONVERTER
(
softmax
);
USE_ANAKIN_CONVERTER
(
softmax
);
USE_ANAKIN_CONVERTER
(
detection_out
);
USE_ANAKIN_CONVERTER
(
detection_out
);
USE_ANAKIN_CONVERTER
(
density_prior_box
);
USE_ANAKIN_CONVERTER
(
density_prior_box
);
USE_ANAKIN_CONVERTER
(
scale
);
USE_ANAKIN_CONVERTER
(
dropout
);
USE_ANAKIN_CONVERTER
(
sum
);
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
07dcf285
...
@@ -138,7 +138,7 @@ struct AnalysisConfig {
...
@@ -138,7 +138,7 @@ struct AnalysisConfig {
void
EnableTensorRtEngine
(
int
workspace_size
=
1
<<
20
,
void
EnableTensorRtEngine
(
int
workspace_size
=
1
<<
20
,
int
max_batch_size
=
1
,
int
min_subgraph_size
=
3
,
int
max_batch_size
=
1
,
int
min_subgraph_size
=
3
,
Precision
precision
=
Precision
::
kFloat32
,
Precision
precision
=
Precision
::
kFloat32
,
bool
use_static
=
tru
e
);
bool
use_static
=
fals
e
);
/** A boolean state telling whether the TensorRT engine is used.
/** A boolean state telling whether the TensorRT engine is used.
*/
*/
bool
tensorrt_engine_enabled
()
const
{
return
use_tensorrt_
;
}
bool
tensorrt_engine_enabled
()
const
{
return
use_tensorrt_
;
}
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
07dcf285
...
@@ -80,6 +80,7 @@ const std::vector<std::string> kAnakinSubgraphPasses({
...
@@ -80,6 +80,7 @@ const std::vector<std::string> kAnakinSubgraphPasses({
"conv_elementwise_add_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
"fc_gru_fuse_pass"
,
//
"anakin_subgraph_pass"
,
"anakin_subgraph_pass"
,
});
});
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
07dcf285
...
@@ -233,6 +233,8 @@ void CudnnHolder::ReallocateWorkspace(size_t required_workspace_len) {
...
@@ -233,6 +233,8 @@ void CudnnHolder::ReallocateWorkspace(size_t required_workspace_len) {
paddle
::
memory
::
Allocator
::
kScratchpad
);
paddle
::
memory
::
Allocator
::
kScratchpad
);
}
}
std
::
once_flag
CUDADeviceContext
::
init_cudnn_
;
CUDADeviceContext
::
CUDADeviceContext
(
CUDAPlace
place
)
CUDADeviceContext
::
CUDADeviceContext
(
CUDAPlace
place
)
:
place_
(
place
),
cudnn_holder_
(
nullptr
)
{
:
place_
(
place
),
cudnn_holder_
(
nullptr
)
{
CUDADeviceGuard
guard
(
place_
.
device
);
CUDADeviceGuard
guard
(
place_
.
device
);
...
@@ -252,10 +254,6 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
...
@@ -252,10 +254,6 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
#endif
#endif
}
}
if
(
dynload
::
HasCUDNN
())
{
cudnn_holder_
.
reset
(
new
CudnnHolder
(
&
stream_
,
place
));
}
driver_version_
=
GetCUDADriverVersion
(
place_
.
device
);
driver_version_
=
GetCUDADriverVersion
(
place_
.
device
);
runtime_version_
=
GetCUDARuntimeVersion
(
place_
.
device
);
runtime_version_
=
GetCUDARuntimeVersion
(
place_
.
device
);
...
@@ -348,12 +346,21 @@ bool CUDADeviceContext::tensor_core_available() const {
...
@@ -348,12 +346,21 @@ bool CUDADeviceContext::tensor_core_available() const {
return
cublas_tensor_core_handle_
!=
nullptr
;
return
cublas_tensor_core_handle_
!=
nullptr
;
}
}
CudnnHolder
*
CUDADeviceContext
::
cudnn_holder
()
const
{
std
::
call_once
(
init_cudnn_
,
[
&
]()
{
if
(
dynload
::
HasCUDNN
())
{
cudnn_holder_
.
reset
(
new
CudnnHolder
(
&
stream_
,
place_
));
}
});
return
cudnn_holder_
.
get
();
}
cudnnHandle_t
CUDADeviceContext
::
cudnn_handle
()
const
{
cudnnHandle_t
CUDADeviceContext
::
cudnn_handle
()
const
{
return
cudnn_holder
_
->
cudnn_handle
();
return
cudnn_holder
()
->
cudnn_handle
();
}
}
CudnnWorkspaceHandle
CUDADeviceContext
::
cudnn_workspace_handle
()
const
{
CudnnWorkspaceHandle
CUDADeviceContext
::
cudnn_workspace_handle
()
const
{
return
CudnnWorkspaceHandle
(
cudnn_holder
_
.
get
());
return
CudnnWorkspaceHandle
(
cudnn_holder
());
}
}
cudaStream_t
CUDADeviceContext
::
stream
()
const
{
return
stream_
;
}
cudaStream_t
CUDADeviceContext
::
stream
()
const
{
return
stream_
;
}
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
07dcf285
...
@@ -290,9 +290,11 @@ class CUDADeviceContext : public DeviceContext {
...
@@ -290,9 +290,11 @@ class CUDADeviceContext : public DeviceContext {
private:
private:
CUDAPlace
place_
;
CUDAPlace
place_
;
static
std
::
once_flag
init_cudnn_
;
std
::
unique_ptr
<
Eigen
::
GpuDevice
>
eigen_device_
;
std
::
unique_ptr
<
Eigen
::
GpuDevice
>
eigen_device_
;
std
::
unique_ptr
<
EigenCudaStreamDevice
>
eigen_stream_
;
std
::
unique_ptr
<
EigenCudaStreamDevice
>
eigen_stream_
;
std
::
unique_ptr
<
CudnnHolder
>
cudnn_holder_
;
mutable
std
::
unique_ptr
<
CudnnHolder
>
cudnn_holder_
;
cudaStream_t
stream_
;
cudaStream_t
stream_
;
std
::
unique_ptr
<
CublasHandleHolder
>
cublas_handle_
;
std
::
unique_ptr
<
CublasHandleHolder
>
cublas_handle_
;
...
@@ -315,6 +317,7 @@ class CUDADeviceContext : public DeviceContext {
...
@@ -315,6 +317,7 @@ class CUDADeviceContext : public DeviceContext {
// StreamCallbackManager is thread-safe
// StreamCallbackManager is thread-safe
std
::
unique_ptr
<
StreamCallbackManager
>
callback_manager_
;
std
::
unique_ptr
<
StreamCallbackManager
>
callback_manager_
;
CudnnHolder
*
cudnn_holder
()
const
;
DISABLE_COPY_AND_ASSIGN
(
CUDADeviceContext
);
DISABLE_COPY_AND_ASSIGN
(
CUDADeviceContext
);
};
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录