Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
5993155d
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5993155d
编写于
10月 23, 2018
作者:
W
wanghaoshuang
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'dzhwinter/windows/support' into windows/support
上级
f9e7cfb0
e41a3fcd
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
675 addition
and
403 deletion
+675
-403
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+14
-14
paddle/fluid/framework/data_type_transform.cu
paddle/fluid/framework/data_type_transform.cu
+106
-15
paddle/fluid/framework/ir/node.cc
paddle/fluid/framework/ir/node.cc
+5
-2
paddle/fluid/framework/ir/node.h
paddle/fluid/framework/ir/node.h
+4
-0
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+1
-1
paddle/fluid/framework/tensor_util.cu
paddle/fluid/framework/tensor_util.cu
+362
-15
paddle/fluid/inference/api/demo_ci/inference_icnet.cc
paddle/fluid/inference/api/demo_ci/inference_icnet.cc
+157
-246
paddle/fluid/inference/api/demo_ci/naive_model_test.cc
paddle/fluid/inference/api/demo_ci/naive_model_test.cc
+0
-97
paddle/fluid/operators/conv_cudnn_op.cu.cc
paddle/fluid/operators/conv_cudnn_op.cu.cc
+13
-6
paddle/fluid/operators/load_combine_op.cc
paddle/fluid/operators/load_combine_op.cc
+9
-7
paddle/fluid/platform/cudnn_helper.h
paddle/fluid/platform/cudnn_helper.h
+4
-0
未找到文件。
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
5993155d
...
@@ -43,13 +43,13 @@ nv_test(dim_test SRCS dim_test.cu DEPS ddim)
...
@@ -43,13 +43,13 @@ nv_test(dim_test SRCS dim_test.cu DEPS ddim)
cc_library
(
data_type SRCS data_type.cc DEPS framework_proto ddim device_context
)
cc_library
(
data_type SRCS data_type.cc DEPS framework_proto ddim device_context
)
cc_test
(
data_type_test SRCS data_type_test.cc DEPS data_type place tensor
)
cc_test
(
data_type_test SRCS data_type_test.cc DEPS data_type place tensor
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
if
(
WIN32
)
# //
if (WIN32)
windows_symbolic
(
tensor_util SRCS tensor_util.cu
)
# //
windows_symbolic(tensor_util SRCS tensor_util.cu)
nv_library
(
tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context
)
# //
nv_library(tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context)
add_dependencies
(
tensor tensor_util
)
# //
add_dependencies(tensor tensor_util)
else
()
# //
else()
nv_library
(
tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context
)
nv_library
(
tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context
)
endif
(
WIN32
)
#
endif(WIN32)
else
()
else
()
cc_library
(
tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context
)
cc_library
(
tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context
)
endif
()
endif
()
...
@@ -93,15 +93,15 @@ nv_test(data_device_transform_test SRCS data_device_transform_test.cu
...
@@ -93,15 +93,15 @@ nv_test(data_device_transform_test SRCS data_device_transform_test.cu
DEPS operator op_registry device_context math_function
)
DEPS operator op_registry device_context math_function
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
if
(
WIN32
)
#
if (WIN32)
# windows treat symbolic file as a real file, which is different with unix
#
# windows treat symbolic file as a real file, which is different with unix
# We create a hidden file and compile it instead of origin source file.
#
# We create a hidden file and compile it instead of origin source file.
windows_symbolic
(
hidden_file SRCS data_type_transform.cu
)
#
windows_symbolic(hidden_file SRCS data_type_transform.cu)
nv_library
(
data_type_transform SRCS .data_type_transform.cu DEPS tensor
)
#
nv_library(data_type_transform SRCS .data_type_transform.cu DEPS tensor)
add_dependencies
(
data_type_transform hidden_file
)
#
add_dependencies(data_type_transform hidden_file)
else
()
#
else()
nv_library
(
data_type_transform SRCS data_type_transform.cu DEPS tensor
)
nv_library
(
data_type_transform SRCS data_type_transform.cu DEPS tensor
)
endif
(
WIN32
)
#
endif(WIN32)
nv_test
(
data_type_transform_test SRCS data_type_transform_test.cc data_type_transform_test.cu DEPS data_type_transform
)
nv_test
(
data_type_transform_test SRCS data_type_transform_test.cc data_type_transform_test.cu DEPS data_type_transform
)
else
()
else
()
cc_library
(
data_type_transform SRCS data_type_transform.cc DEPS tensor
)
cc_library
(
data_type_transform SRCS data_type_transform.cc DEPS tensor
)
...
...
paddle/fluid/framework/data_type_transform.cu
浏览文件 @
5993155d
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
// limitations under the License.
limitations under the License. */
data_type_transform
.
cc
#include "paddle/fluid/framework/data_type_transform.h"
\ No newline at end of file
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/transform.h"
namespace
paddle
{
namespace
framework
{
template
<
typename
InType
,
typename
OutType
>
struct
CastDataTypeFunctor
{
HOSTDEVICE
inline
OutType
operator
()(
InType
in
)
const
{
return
static_cast
<
OutType
>
(
in
);
}
};
template
<
typename
InType
>
struct
CastDataType
{
CastDataType
(
const
framework
::
Tensor
&
in
,
framework
::
Tensor
*
out
,
const
platform
::
DeviceContext
*
ctx
)
:
in_
(
in
),
out_
(
out
),
ctx_
(
ctx
)
{}
const
framework
::
Tensor
in_
;
framework
::
Tensor
*
out_
;
const
platform
::
DeviceContext
*
ctx_
;
template
<
typename
OutType
>
void
apply
()
{
auto
*
in_begin
=
in_
.
data
<
InType
>
();
auto
*
in_end
=
in_begin
+
in_
.
numel
();
auto
*
out_begin
=
out_
->
mutable_data
<
OutType
>
(
in_
.
place
());
if
(
platform
::
is_cpu_place
(
in_
.
place
()))
{
platform
::
Transform
<
platform
::
CPUDeviceContext
>
trans
;
auto
*
context
=
static_cast
<
const
platform
::
CPUDeviceContext
*>
(
ctx_
);
trans
(
*
context
,
in_begin
,
in_end
,
out_begin
,
CastDataTypeFunctor
<
InType
,
OutType
>
());
#ifdef __NVCC__
}
else
if
(
platform
::
is_gpu_place
(
in_
.
place
()))
{
platform
::
Transform
<
platform
::
CUDADeviceContext
>
trans
;
auto
*
context
=
static_cast
<
const
platform
::
CUDADeviceContext
*>
(
ctx_
);
trans
(
*
context
,
in_begin
,
in_end
,
out_begin
,
CastDataTypeFunctor
<
InType
,
OutType
>
());
context
->
Wait
();
#endif
}
else
{
PADDLE_THROW
(
"Unsupported place!"
);
}
}
};
void
TransDataType
(
const
OpKernelType
&
kernel_type_for_var
,
const
OpKernelType
&
expected_kernel_type
,
const
Tensor
&
in
,
Tensor
*
out
)
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
out
->
Resize
(
in
.
dims
());
auto
src_type
=
kernel_type_for_var
.
data_type_
;
auto
dst_type
=
expected_kernel_type
.
data_type_
;
auto
ctx
=
pool
.
Get
(
in
.
place
());
switch
(
src_type
)
{
case
proto
::
VarType
::
FP16
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
platform
::
float16
>
(
in
,
out
,
ctx
));
break
;
case
proto
::
VarType
::
FP32
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
float
>
(
in
,
out
,
ctx
));
break
;
case
proto
::
VarType
::
FP64
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
double
>
(
in
,
out
,
ctx
));
break
;
case
proto
::
VarType
::
INT32
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
int
>
(
in
,
out
,
ctx
));
break
;
case
proto
::
VarType
::
INT64
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
int64_t
>
(
in
,
out
,
ctx
));
break
;
case
proto
::
VarType
::
BOOL
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
bool
>
(
in
,
out
,
ctx
));
break
;
case
proto
::
VarType
::
INT16
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
bool
>
(
in
,
out
,
ctx
));
break
;
case
proto
::
VarType
::
UINT8
:
framework
::
VisitDataType
(
dst_type
,
CastDataType
<
bool
>
(
in
,
out
,
ctx
));
break
;
default:
PADDLE_THROW
(
"Not support type %d"
,
src_type
);
}
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/node.cc
浏览文件 @
5993155d
...
@@ -17,8 +17,11 @@ limitations under the License. */
...
@@ -17,8 +17,11 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
namespace
ir
{
namespace
ir
{
#if !defined(_WIN32)
constexpr
char
Node
::
kControlDepVarName
[];
constexpr
char
Node
::
kControlDepVarName
[]
=
"__control_var"
;
#else
const
char
Node
::
kControlDepVarName
[]
=
"__control_var"
;
#endif
int
Node
::
count_
=
0
;
int
Node
::
count_
=
0
;
}
// namespace ir
}
// namespace ir
}
// namespace framework
}
// namespace framework
...
...
paddle/fluid/framework/ir/node.h
浏览文件 @
5993155d
...
@@ -27,7 +27,11 @@ namespace ir {
...
@@ -27,7 +27,11 @@ namespace ir {
class
Node
{
class
Node
{
public:
public:
enum
class
Type
{
kOperation
,
kVariable
};
enum
class
Type
{
kOperation
,
kVariable
};
#if !defined(_WIN32) // msvc not support constexpr correctly.
static
constexpr
char
kControlDepVarName
[]
=
"__control_var"
;
static
constexpr
char
kControlDepVarName
[]
=
"__control_var"
;
#else
static
const
char
kControlDepVarName
[];
#endif
explicit
Node
(
const
std
::
string
&
name
,
Type
type
)
explicit
Node
(
const
std
::
string
&
name
,
Type
type
)
:
name_
(
name
),
:
name_
(
name
),
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
5993155d
...
@@ -689,7 +689,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
...
@@ -689,7 +689,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
auto
expected_kernel_key
=
auto
expected_kernel_key
=
this
->
GetExpectedKernelType
(
ExecutionContext
(
*
this
,
scope
,
*
dev_ctx
));
this
->
GetExpectedKernelType
(
ExecutionContext
(
*
this
,
scope
,
*
dev_ctx
));
VLOG
(
3
)
<<
"expected_kernel_key:"
<<
expected_kernel_key
;
VLOG
(
3
)
<<
"expected_kernel_key:
"
<<
expected_kernel_key
;
auto
kernel_iter
=
kernels
.
find
(
expected_kernel_key
);
auto
kernel_iter
=
kernels
.
find
(
expected_kernel_key
);
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
...
...
paddle/fluid/framework/tensor_util.cu
浏览文件 @
5993155d
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
// limitations under the License.
limitations under the License. */
#include "paddle/fluid/framework/tensor_util.h"
tensor_util
.
cc
#include <algorithm>
\ No newline at end of file
#include <limits>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
namespace
paddle
{
namespace
framework
{
void
TensorCopy
(
const
Tensor
&
src
,
const
platform
::
Place
&
dst_place
,
const
platform
::
DeviceContext
&
ctx
,
Tensor
*
dst
)
{
VLOG
(
3
)
<<
"TensorCopy "
<<
src
.
dims
()
<<
" from "
<<
src
.
place
()
<<
" to "
<<
dst_place
;
src
.
check_memory_size
();
dst
->
Resize
(
src
.
dims
());
dst
->
set_layout
(
src
.
layout
());
auto
src_place
=
src
.
place
();
auto
src_ptr
=
src
.
data
<
void
>
();
auto
dst_ptr
=
dst
->
mutable_data
(
dst_place
,
src
.
type
());
auto
size
=
src
.
numel
()
*
SizeOfType
(
src
.
type
());
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_cpu_place
(
dst_place
))
{
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
boost
::
get
<
platform
::
CPUPlace
>
(
src_place
),
src_ptr
,
size
);
}
#ifdef PADDLE_WITH_CUDA
else
if
(
platform
::
is_gpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_cpu_place
(
dst_place
))
{
auto
src_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
src_place
);
auto
dst_cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
);
auto
ctx_place
=
ctx
.
GetPlace
();
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx_place
));
auto
ctx_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx_place
);
PADDLE_ENFORCE_EQ
(
src_gpu_place
,
ctx_gpu_place
);
auto
stream
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
).
stream
();
memory
::
Copy
(
dst_cpu_place
,
dst_ptr
,
src_gpu_place
,
src_ptr
,
size
,
stream
);
}
else
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_gpu_place
(
dst_place
))
{
auto
src_cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
src_place
);
auto
dst_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
dst_place
);
auto
ctx_place
=
ctx
.
GetPlace
();
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx_place
));
auto
ctx_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx_place
);
PADDLE_ENFORCE_EQ
(
dst_gpu_place
,
ctx_gpu_place
);
auto
stream
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
).
stream
();
memory
::
Copy
(
dst_gpu_place
,
dst_ptr
,
src_cpu_place
,
src_ptr
,
size
,
stream
);
}
else
if
(
platform
::
is_gpu_place
(
src_place
)
&&
platform
::
is_gpu_place
(
dst_place
))
{
auto
src_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
src_place
);
auto
dst_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
dst_place
);
auto
ctx_place
=
ctx
.
GetPlace
();
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx_place
));
auto
stream
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
).
stream
();
if
(
platform
::
is_same_place
(
src_place
,
dst_place
))
{
memory
::
Copy
(
dst_gpu_place
,
dst_ptr
,
src_gpu_place
,
src_ptr
,
size
,
stream
);
}
else
{
if
(
platform
::
is_same_place
(
ctx_place
,
src_place
))
{
memory
::
Copy
(
dst_gpu_place
,
dst_ptr
,
src_gpu_place
,
src_ptr
,
size
,
stream
);
platform
::
DeviceContextPool
::
Instance
().
Get
(
src
.
place
())
->
Wait
();
}
else
if
(
platform
::
is_same_place
(
ctx_place
,
dst_place
))
{
platform
::
DeviceContextPool
::
Instance
().
Get
(
src
.
place
())
->
Wait
();
memory
::
Copy
(
dst_gpu_place
,
dst_ptr
,
src_gpu_place
,
src_ptr
,
size
,
stream
);
}
else
{
PADDLE_THROW
(
"ctx is not belong to dst_gpu_place or src_gpu_place."
);
}
}
}
#endif
}
void
TensorCopy
(
const
Tensor
&
src
,
const
platform
::
Place
&
dst_place
,
Tensor
*
dst
)
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
const
platform
::
DeviceContext
*
dev_ctx
;
if
(
platform
::
is_gpu_place
(
dst_place
))
{
dev_ctx
=
pool
.
Get
(
dst_place
);
}
else
{
dev_ctx
=
pool
.
Get
(
src
.
place
());
}
TensorCopy
(
src
,
dst_place
,
*
dev_ctx
,
dst
);
}
void
TensorCopySync
(
const
Tensor
&
src
,
const
platform
::
Place
&
dst_place
,
Tensor
*
dst
)
{
VLOG
(
3
)
<<
"TensorCopySync "
<<
src
.
dims
()
<<
" from "
<<
src
.
place
()
<<
" to "
<<
dst_place
;
src
.
check_memory_size
();
dst
->
Resize
(
src
.
dims
());
dst
->
set_layout
(
src
.
layout
());
auto
src_place
=
src
.
place
();
auto
src_ptr
=
src
.
data
<
void
>
();
auto
dst_ptr
=
dst
->
mutable_data
(
dst_place
,
src
.
type
());
auto
size
=
src
.
numel
()
*
SizeOfType
(
src
.
type
());
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_cpu_place
(
dst_place
))
{
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
boost
::
get
<
platform
::
CPUPlace
>
(
src_place
),
src_ptr
,
size
);
}
#ifdef PADDLE_WITH_CUDA
else
if
(
platform
::
is_gpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_cpu_place
(
dst_place
))
{
auto
src_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
src_place
);
auto
dst_cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
);
memory
::
Copy
(
dst_cpu_place
,
dst_ptr
,
src_gpu_place
,
src_ptr
,
size
,
nullptr
);
}
else
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_gpu_place
(
dst_place
))
{
auto
src_cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
src_place
);
auto
dst_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
dst_place
);
memory
::
Copy
(
dst_gpu_place
,
dst_ptr
,
src_cpu_place
,
src_ptr
,
size
,
nullptr
);
}
else
if
(
platform
::
is_gpu_place
(
src_place
)
&&
platform
::
is_gpu_place
(
dst_place
))
{
auto
src_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
src_place
);
auto
dst_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
dst_place
);
memory
::
Copy
(
dst_gpu_place
,
dst_ptr
,
src_gpu_place
,
src_ptr
,
size
,
nullptr
);
}
#endif
}
template
<
typename
Predicate
,
typename
DevCtx
>
struct
AnyDTypeVisitor
{
Predicate
predicate_
;
const
Tensor
&
tensor_
;
const
DevCtx
&
ctx_
;
Tensor
*
out_
;
AnyDTypeVisitor
(
Predicate
predicate
,
const
Tensor
&
tensor
,
const
DevCtx
&
ctx
,
Tensor
*
out
)
:
predicate_
(
predicate
),
tensor_
(
tensor
),
ctx_
(
ctx
),
out_
(
out
)
{}
template
<
typename
T
>
void
apply
()
const
{
auto
t
=
EigenVector
<
T
>::
Flatten
(
tensor_
);
auto
o
=
EigenScalar
<
bool
>::
From
(
*
out_
);
// return any of predicate_(t) is true.
o
.
device
(
*
ctx_
.
eigen_device
())
=
predicate_
(
t
).
any
();
}
};
template
<
typename
Predicate
,
typename
DevCtx
>
inline
void
AnyImpl
(
Predicate
predicate
,
const
framework
::
Tensor
&
tensor
,
const
DevCtx
&
ctx
,
framework
::
Tensor
*
out
)
{
VisitDataType
(
ToDataType
(
tensor
.
type
()),
AnyDTypeVisitor
<
Predicate
,
DevCtx
>
(
predicate
,
tensor
,
ctx
,
out
));
}
template
<
typename
Predicate
>
struct
AnyVisitor
:
public
boost
::
static_visitor
<
bool
>
{
const
framework
::
Tensor
&
tensor_
;
Predicate
predicate_
;
AnyVisitor
(
const
framework
::
Tensor
&
tensor
,
Predicate
predicate
)
:
tensor_
(
tensor
),
predicate_
(
std
::
move
(
predicate
))
{}
template
<
typename
Place
>
bool
operator
()(
const
Place
&
place
)
const
{
framework
::
Tensor
out
;
out
.
Resize
({
1
});
out
.
mutable_data
<
bool
>
(
place
);
auto
*
ctx
=
platform
::
DeviceContextPool
::
Instance
().
GetByPlace
(
place
);
AnyImpl
(
predicate_
,
tensor_
,
*
ctx
,
&
out
);
return
this
->
GetResult
(
out
,
place
);
}
bool
GetResult
(
const
framework
::
Tensor
&
out
,
const
platform
::
CUDAPlace
&
gpu
)
const
{
platform
::
CPUPlace
cpu
;
framework
::
Tensor
tmp
;
tmp
.
Resize
({
1
});
tmp
.
mutable_data
<
bool
>
(
cpu
);
auto
gpuctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
gpu
);
gpuctx
->
Wait
();
TensorCopy
(
out
,
cpu
,
*
gpuctx
,
&
tmp
);
gpuctx
->
Wait
();
return
GetResult
(
tmp
,
cpu
);
}
bool
GetResult
(
const
framework
::
Tensor
&
out
,
const
platform
::
CPUPlace
&
cpu
)
const
{
return
*
out
.
data
<
bool
>
();
}
bool
GetResult
(
const
framework
::
Tensor
&
out
,
const
platform
::
CUDAPinnedPlace
&
cpu
)
const
{
return
*
out
.
data
<
bool
>
();
}
};
template
<
typename
Predicate
>
inline
bool
Any
(
const
framework
::
Tensor
&
tensor
,
Predicate
predicate
)
{
AnyVisitor
<
Predicate
>
visitor
(
tensor
,
predicate
);
auto
place
=
tensor
.
place
();
return
platform
::
VisitPlace
(
place
,
visitor
);
}
struct
ContainsNANPredicate
{
template
<
typename
T
>
auto
operator
()(
const
T
&
eigen_vec
)
const
->
decltype
(
std
::
declval
<
T
>
().
isnan
())
{
// Cast eigen_vector to vector of bool. true if is inf.
return
eigen_vec
.
isnan
();
}
};
bool
TensorContainsNAN
(
const
framework
::
Tensor
&
tensor
)
{
ContainsNANPredicate
predicate
;
return
Any
(
tensor
,
predicate
);
}
struct
ContainsInfPredicate
{
template
<
typename
T
>
auto
operator
()(
const
T
&
eigen_vec
)
const
->
decltype
(
std
::
declval
<
T
>
().
isinf
())
{
// Cast eigen_vector to vector of bool. true if is inf.
return
eigen_vec
.
isinf
();
}
};
bool
TensorContainsInf
(
const
framework
::
Tensor
&
tensor
)
{
ContainsInfPredicate
predicate
;
return
Any
(
tensor
,
predicate
);
}
void
TensorToStream
(
std
::
ostream
&
os
,
const
Tensor
&
tensor
,
const
platform
::
DeviceContext
&
dev_ctx
)
{
{
// the 1st field, uint32_t version
constexpr
uint32_t
version
=
0
;
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
version
),
sizeof
(
version
));
}
{
// the 2nd field, tensor description
// int32_t size
// void* protobuf message
proto
::
VarType
::
TensorDesc
desc
;
desc
.
set_data_type
(
framework
::
ToDataType
(
tensor
.
type
()));
auto
dims
=
framework
::
vectorize
(
tensor
.
dims
());
auto
*
pb_dims
=
desc
.
mutable_dims
();
pb_dims
->
Resize
(
static_cast
<
int
>
(
dims
.
size
()),
0
);
std
::
copy
(
dims
.
begin
(),
dims
.
end
(),
pb_dims
->
begin
());
int32_t
size
=
desc
.
ByteSize
();
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
size
),
sizeof
(
size
));
auto
out
=
desc
.
SerializeAsString
();
os
.
write
(
out
.
data
(),
size
);
}
{
// the 3rd field, tensor data
uint64_t
size
=
tensor
.
numel
()
*
framework
::
SizeOfType
(
tensor
.
type
());
auto
*
data_ptr
=
tensor
.
data
<
void
>
();
PADDLE_ENFORCE
(
size
<
std
::
numeric_limits
<
std
::
streamsize
>::
max
(),
"Index overflow when writing tensor"
);
if
(
platform
::
is_gpu_place
(
tensor
.
place
()))
{
#ifdef PADDLE_WITH_CUDA
constexpr
size_t
kBufSize
=
1024
*
1024
*
64
;
// 64MB
std
::
unique_ptr
<
char
[]
>
buf
(
new
char
[
kBufSize
]);
auto
&
gpu_dev_ctx
=
static_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
);
platform
::
CPUPlace
cpu
;
uintptr_t
data
=
reinterpret_cast
<
uintptr_t
>
(
data_ptr
);
while
(
size
!=
0
)
{
size_t
size_to_write
=
std
::
min
(
kBufSize
,
static_cast
<
size_t
>
(
size
));
memory
::
Copy
(
cpu
,
buf
.
get
(),
boost
::
get
<
platform
::
CUDAPlace
>
(
tensor
.
place
()),
reinterpret_cast
<
const
void
*>
(
data
),
size_to_write
,
gpu_dev_ctx
.
stream
());
gpu_dev_ctx
.
Wait
();
os
.
write
(
buf
.
get
(),
size_to_write
);
data
+=
size_to_write
;
size
-=
size_to_write
;
}
#else
PADDLE_THROW
(
"Unexpected branch"
);
#endif
}
else
{
os
.
write
(
static_cast
<
const
char
*>
(
data_ptr
),
static_cast
<
std
::
streamsize
>
(
size
));
}
}
}
struct
DeserializedDataFunctor
{
DeserializedDataFunctor
(
void
**
buf
,
Tensor
*
tensor
,
const
platform
::
Place
&
place
)
:
buf_
(
buf
),
tensor_
(
tensor
),
place_
(
place
)
{}
template
<
typename
T
>
void
apply
()
{
*
buf_
=
tensor_
->
mutable_data
<
T
>
(
place_
);
}
void
**
buf_
;
Tensor
*
tensor_
;
platform
::
Place
place_
;
};
void
TensorFromStream
(
std
::
istream
&
is
,
Tensor
*
tensor
,
const
platform
::
DeviceContext
&
dev_ctx
)
{
uint32_t
version
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
version
),
sizeof
(
version
));
PADDLE_ENFORCE_EQ
(
version
,
0U
,
"Only version 0 is supported"
);
proto
::
VarType
::
TensorDesc
desc
;
{
// int32_t size
// proto buffer
int32_t
size
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
size
),
sizeof
(
size
));
std
::
unique_ptr
<
char
[]
>
buf
(
new
char
[
size
]);
is
.
read
(
reinterpret_cast
<
char
*>
(
buf
.
get
()),
size
);
PADDLE_ENFORCE
(
desc
.
ParseFromArray
(
buf
.
get
(),
size
),
"Cannot parse tensor desc"
);
}
{
// read tensor
std
::
vector
<
int64_t
>
dims
;
dims
.
reserve
(
static_cast
<
size_t
>
(
desc
.
dims
().
size
()));
std
::
copy
(
desc
.
dims
().
begin
(),
desc
.
dims
().
end
(),
std
::
back_inserter
(
dims
));
tensor
->
Resize
(
framework
::
make_ddim
(
dims
));
void
*
buf
;
auto
ctx
=
platform
::
CPUDeviceContext
();
size_t
size
=
tensor
->
numel
()
*
framework
::
SizeOfType
(
framework
::
ToTypeIndex
(
desc
.
data_type
()));
if
(
platform
::
is_gpu_place
(
dev_ctx
.
GetPlace
()))
{
#ifdef PADDLE_WITH_CUDA
Tensor
cpu_tensor
;
cpu_tensor
.
Resize
(
framework
::
make_ddim
(
dims
));
framework
::
VisitDataType
(
desc
.
data_type
(),
DeserializedDataFunctor
(
&
buf
,
&
cpu_tensor
,
ctx
.
GetPlace
()));
is
.
read
(
static_cast
<
char
*>
(
buf
),
size
);
auto
dst_place
=
dev_ctx
.
GetPlace
();
framework
::
TensorCopy
(
cpu_tensor
,
dst_place
,
dev_ctx
,
tensor
);
#else
PADDLE_THROW
(
"Unexpected branch"
);
#endif
}
else
{
framework
::
VisitDataType
(
desc
.
data_type
(),
DeserializedDataFunctor
(
&
buf
,
tensor
,
ctx
.
GetPlace
()));
is
.
read
(
static_cast
<
char
*>
(
buf
),
size
);
}
}
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/inference/api/demo_ci/inference_icnet.cc
浏览文件 @
5993155d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
You may obtain a copy of the License at
// You may obtain a copy of the License at
//
http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
Unless required by applicable law or agreed to in writing, software
// Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
// distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
limitations under the License. */
// limitations under the License.
/*
#include <cassert>
* This file contains a simple demo for how to take a model for inference.
#include <chrono>
*/
#include <iostream>
#include <cassert>
#include <fstream>
#include <cctype>
#include <algorithm>
#include <vector>
#include <algorithm>
#include <string>
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include <iterator>
#include <memory>
namespace
paddle
{
#include <sstream>
#include <string>
std
::
string
DIRNAME
=
"./Release/infer_model"
;
#include <thread> //NOLINT
std
::
string
DATA
=
"./test-image.txt"
;
#include "paddle/fluid/inference/paddle_inference_api.h"
const
int
C
=
3
;
// image channel
const
int
H
=
449
;
// image height
std
::
string
MODELDIR
=
""
;
/* "Directory of the inference model." */
// NOLINT
const
int
W
=
581
;
// image width
std
::
string
REFER
=
""
;
// 数据格式
/*"path to reference result for comparison."*/
//NOTLINT
// "<space splitted floats as data>\t<space splitted ints as shape"
/*path of data; each line is a record, format:
// 1. 存储为float32格式。
<space splitted floats as data>\t<space splitted ints as shape>
// 2. 必须减去均值。 CHW三个通道为 mean = 112.15, 109.41, 185.42
Please check the demo data of data.txt for details.
struct
Record
*/
{
std
::
string
DATA
=
""
;
std
::
vector
<
float
>
data
;
bool
USE_GPU
=
true
;
/*"Whether use gpu."*/
std
::
vector
<
int32_t
>
shape
;
};
auto
message_err
=
[]()
{
NativeConfig
GetConfig
()
{
std
::
cout
<<
"Copyright (c) 2018 PaddlePaddle Authors."
<<
std
::
endl
;
NativeConfig
config
;
std
::
cout
<<
"Demo Case for windows inference. "
config
.
prog_file
=
DIRNAME
+
"/__model__"
;
<<
"
\n
"
config
.
param_file
=
DIRNAME
+
"/__params__"
;
<<
"Usage: Input your model path and use_gpu as the guide requires,"
config
.
fraction_of_gpu_memory
=
0.0
;
<<
"then run the demo inference, and will get a result."
config
.
use_gpu
=
true
;
<<
std
::
endl
;
config
.
device
=
0
;
std
::
cout
<<
std
::
endl
;
return
config
;
};
}
namespace
paddle
using
Time
=
decltype
(
std
::
chrono
::
high_resolution_clock
::
now
());
{
namespace
demo
Time
time
()
{
return
std
::
chrono
::
high_resolution_clock
::
now
();
};
{
void
split
(
const
std
::
string
&
str
,
char
sep
,
double
time_diff
(
Time
t1
,
Time
t2
)
{
std
::
vector
<
std
::
string
>*
pieces
)
typedef
std
::
chrono
::
microseconds
ms
;
{
auto
diff
=
t2
-
t1
;
pieces
->
clear
();
ms
counter
=
std
::
chrono
::
duration_cast
<
ms
>
(
diff
);
if
(
str
.
empty
())
return
counter
.
count
()
/
1000.0
;
{
}
return
;
}
static
void
split
(
const
std
::
string
&
str
,
char
sep
,
size_t
pos
=
0
;
std
::
vector
<
std
::
string
>*
pieces
)
{
size_t
next
=
str
.
find
(
sep
,
pos
);
pieces
->
clear
();
while
(
next
!=
std
::
string
::
npos
)
if
(
str
.
empty
())
{
{
return
;
pieces
->
push_back
(
str
.
substr
(
pos
,
next
-
pos
));
}
pos
=
next
+
1
;
size_t
pos
=
0
;
next
=
str
.
find
(
sep
,
pos
);
size_t
next
=
str
.
find
(
sep
,
pos
);
}
while
(
next
!=
std
::
string
::
npos
)
{
if
(
!
str
.
substr
(
pos
).
empty
())
pieces
->
push_back
(
str
.
substr
(
pos
,
next
-
pos
));
{
pos
=
next
+
1
;
pieces
->
push_back
(
str
.
substr
(
pos
));
next
=
str
.
find
(
sep
,
pos
);
}
}
}
if
(
!
str
.
substr
(
pos
).
empty
())
{
pieces
->
push_back
(
str
.
substr
(
pos
));
/*
}
* Get a summary of a PaddleTensor content.
}
*/
std
::
string
SummaryTensor
(
const
PaddleTensor
&
tensor
)
Record
ProcessALine
(
const
std
::
string
&
line
)
{
{
std
::
vector
<
std
::
string
>
columns
;
std
::
stringstream
ss
;
split
(
line
,
'\t'
,
&
columns
);
int
num_elems
=
tensor
.
data
.
length
()
/
PaddleDtypeSize
(
tensor
.
dtype
);
Record
record
;
ss
<<
"data[:10]
\t
"
;
std
::
vector
<
std
::
string
>
data_strs
;
switch
(
tensor
.
dtype
)
split
(
columns
[
0
],
' '
,
&
data_strs
);
{
for
(
auto
&
d
:
data_strs
)
{
case
PaddleDType
::
INT64
:
record
.
data
.
push_back
(
std
::
stof
(
d
));
for
(
int
i
=
0
;
i
<
std
::
min
(
num_elems
,
10
);
i
++
)
}
{
ss
<<
static_cast
<
int64_t
*>
(
tensor
.
data
.
data
())[
i
]
<<
" "
;
std
::
vector
<
std
::
string
>
shape_strs
;
}
split
(
columns
[
1
],
' '
,
&
shape_strs
);
break
;
for
(
auto
&
s
:
shape_strs
)
{
case
PaddleDType
::
FLOAT32
:
record
.
shape
.
push_back
(
std
::
stoi
(
s
));
for
(
int
i
=
0
;
i
<
std
::
min
(
num_elems
,
10
);
i
++
)
}
{
return
record
;
ss
<<
static_cast
<
float
*>
(
tensor
.
data
.
data
())[
i
]
<<
" "
;
}
}
break
;
void
test_naive
(
int
batch_size
){
}
NativeConfig
config
=
GetConfig
();
return
ss
.
str
();
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
}
int
height
=
H
;
int
width
=
W
;
std
::
string
ToString
(
const
NativeConfig
&
config
)
int
channel
=
C
;
{
int
num_sum
=
height
*
width
*
channel
*
batch_size
;
std
::
stringstream
ss
;
ss
<<
"Use GPU : "
<<
(
config
.
use_gpu
?
"True"
:
"False"
)
<<
"
\n
"
// 1. use fake data
<<
"Device : "
<<
config
.
device
<<
"
\n
"
std
::
vector
<
float
>
data
;
<<
"fraction_of_gpu_memory : "
<<
config
.
fraction_of_gpu_memory
<<
"
\n
"
for
(
int
i
=
0
;
i
<
num_sum
;
i
++
)
{
<<
"specify_input_name : "
data
.
push_back
(
0.0
);
<<
(
config
.
specify_input_name
?
"True"
:
"False"
)
<<
"
\n
"
}
<<
"Program File : "
<<
config
.
prog_file
<<
"
\n
"
<<
"Param File : "
<<
config
.
param_file
;
PaddleTensor
tensor
;
return
ss
.
str
();
tensor
.
shape
=
std
::
vector
<
int
>
({
batch_size
,
channel
,
height
,
width
});
}
tensor
.
data
.
Resize
(
sizeof
(
float
)
*
batch_size
*
channel
*
height
*
width
);
std
::
copy
(
data
.
begin
(),
data
.
end
(),
static_cast
<
float
*>
(
tensor
.
data
.
data
()));
struct
Record
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
{
std
::
vector
<
float
>
data
;
// 2. read data from file
std
::
vector
<
int32_t
>
shape
;
// std::string line;
};
// std::ifstream file(DATA);
// std::getline(file, line);
Record
ProcessALine
(
const
std
::
string
&
line
)
// auto record = ProcessALine(line);
{
// file.close();
std
::
cout
<<
"process a line"
<<
std
::
endl
;
// PaddleTensor tensor;
std
::
vector
<
std
::
string
>
columns
;
// tensor.shape = record.shape;
split
(
line
,
'\t'
,
&
columns
);
// tensor.data =
assert
(
columns
.
size
()
==
2UL
,
"data format error, should be <data>
\t
<shape>"
);
// PaddleBuf(record.data.data(), record.data.size() * sizeof(float));
Record
record
;
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
,
tensor
);
std
::
vector
<
std
::
string
>
data_strs
;
PaddleTensor
tensor_out
;
split
(
columns
[
0
],
' '
,
&
data_strs
);
//将数据字符串转换为整型数据并放到record.data中
std
::
vector
<
PaddleTensor
>
outputs
(
1
,
tensor_out
);
for
(
auto
&
d
:
data_strs
)
{
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
,
batch_size
);
record
.
data
.
push_back
(
std
::
stof
(
d
));
auto
time1
=
time
();
}
for
(
size_t
i
=
0
;
i
<
2
;
i
++
)
{
std
::
vector
<
std
::
string
>
shape_strs
;
std
::
cout
<<
"Pass "
<<
i
<<
"predict"
;
split
(
columns
[
1
],
' '
,
&
shape_strs
);
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
,
batch_size
);
for
(
auto
&
s
:
shape_strs
)
}
{
record
.
shape
.
push_back
(
std
::
stoi
(
s
));
auto
time2
=
time
();
}
std
::
ofstream
ofresult
(
"naive_test_result.txt"
,
std
::
ios
::
app
);
std
::
cout
<<
"data size "
<<
record
.
data
.
size
()
<<
std
::
endl
;
std
::
cout
<<
"data shape size "
<<
record
.
shape
.
size
()
<<
std
::
endl
;
std
::
cout
<<
"batch: "
<<
batch_size
<<
" predict cost: "
<<
time_diff
(
time1
,
time2
)
/
100.0
<<
"ms"
<<
std
::
endl
;
return
record
;
std
::
cout
<<
outputs
.
size
()
<<
std
::
endl
;
}
}
void
CheckOutput
(
const
std
::
string
&
referfile
,
const
PaddleTensor
&
output
)
}
// namespace paddle
{
std
::
string
line
;
int
main
(
int
argc
,
char
**
argv
)
{
std
::
ifstream
file
(
referfile
);
paddle
::
test_naive
(
1
<<
0
);
std
::
getline
(
file
,
line
);
return
0
;
auto
refer
=
ProcessALine
(
line
);
}
file
.
close
();
\ No newline at end of file
size_t
numel
=
output
.
data
.
length
()
/
PaddleDtypeSize
(
output
.
dtype
);
std
::
cout
<<
"predictor output numel "
<<
numel
<<
std
::
endl
;
std
::
cout
<<
"reference output numel "
<<
refer
.
data
.
size
()
<<
std
::
endl
;
assert
(
numel
==
refer
.
data
.
size
());
switch
(
output
.
dtype
)
{
case
PaddleDType
::
INT64
:
for
(
size_t
i
=
0
;
i
<
numel
;
++
i
)
{
assert
(
static_cast
<
int64_t
*>
(
output
.
data
.
data
())[
i
]
==
refer
.
data
[
i
]);
}
break
;
case
PaddleDType
::
FLOAT32
:
for
(
size_t
i
=
0
;
i
<
numel
;
++
i
)
{
assert
(
fabs
(
static_cast
<
float
*>
(
output
.
data
.
data
())[
i
]
-
refer
.
data
[
i
])
<=
1e-5
);
}
break
;
}
}
/*
* Use the native fluid engine to inference the demo.
*/
void
Main
(
bool
use_gpu
)
{
NativeConfig
config
;
config
.
model_dir
=
MODELDIR
;
//config.param_file = MODELDIR + "/__params__";
//config.prog_file = MODELDIR + "/__model__";
config
.
use_gpu
=
USE_GPU
;
config
.
device
=
0
;
if
(
USE_GPU
)
{
config
.
fraction_of_gpu_memory
=
0.1
f
;
// set by yourself
}
std
::
cout
<<
ToString
(
config
)
<<
std
::
endl
;
std
::
cout
<<
"init predictor"
<<
std
::
endl
;
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
std
::
cout
<<
"begin to process data"
<<
std
::
endl
;
// Just a single batch of data.
std
::
string
line
;
std
::
cout
<<
"data : "
<<
std
::
endl
;
std
::
ifstream
file
(
DATA
);
if
(
!
file
.
is_open
())
{
std
::
cout
<<
"failed open data"
<<
DATA
<<
std
::
endl
;
exit
(
0
);
}
std
::
getline
(
file
,
line
);
auto
record
=
ProcessALine
(
line
);
file
.
close
();
// Inference.
PaddleTensor
input
;
input
.
shape
=
record
.
shape
;
input
.
data
=
PaddleBuf
(
record
.
data
.
data
(),
record
.
data
.
size
()
*
sizeof
(
float
));
input
.
dtype
=
PaddleDType
::
FLOAT32
;
std
::
cout
<<
"run executor"
<<
std
::
endl
;
std
::
vector
<
PaddleTensor
>
output
;
predictor
->
Run
({
input
},
&
output
);
std
::
cout
<<
"output.size "
<<
output
.
size
()
<<
std
::
endl
;
auto
&
tensor
=
output
.
front
();
std
::
cout
<<
"output: "
<<
SummaryTensor
(
tensor
)
<<
std
::
endl
;
// compare with reference result
std
::
cout
<<
"refer result : "
<<
REFER
<<
std
::
endl
;
CheckOutput
(
REFER
,
tensor
);
}
}
}
int
main
(
int
argc
,
char
**
argv
)
{
MODELDIR
=
"./LB_icnet_model"
;
//DATA = "./icnet_image.txt";
DATA
=
"./1.png.txt"
;
REFER
=
"./icnet_label.txt"
;
paddle
::
demo
::
Main
(
USE_GPU
);
system
(
"pause"
);
return
0
;
}
paddle/fluid/inference/api/demo_ci/naive_model_test.cc
已删除
100644 → 0
浏览文件 @
f9e7cfb0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <chrono>
#include <iostream>
#include <fstream>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace
paddle
{
std
::
string
DIRNAME
=
"./LB_icnet_model"
;
//std::string DIRNAME = "./infer_models";
NativeConfig
GetConfig
()
{
NativeConfig
config
;
config
.
prog_file
=
DIRNAME
+
"/__model__"
;
config
.
param_file
=
DIRNAME
+
"/__params__"
;
config
.
fraction_of_gpu_memory
=
0.8
;
config
.
use_gpu
=
true
;
config
.
device
=
0
;
return
config
;
}
using
Time
=
decltype
(
std
::
chrono
::
high_resolution_clock
::
now
());
Time
time
()
{
return
std
::
chrono
::
high_resolution_clock
::
now
();
};
double
time_diff
(
Time
t1
,
Time
t2
)
{
typedef
std
::
chrono
::
microseconds
ms
;
auto
diff
=
t2
-
t1
;
ms
counter
=
std
::
chrono
::
duration_cast
<
ms
>
(
diff
);
return
counter
.
count
()
/
1000.0
;
}
void
test_naive
(
int
batch_size
){
NativeConfig
config
=
GetConfig
();
// config.model_dir = model_path;
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
int
height
=
449
;
int
width
=
581
;
//int height = 3;
//int width = 3;
int
num_sum
=
height
*
width
*
3
*
batch_size
;
std
::
vector
<
float
>
data
;
for
(
int
i
=
0
;
i
<
num_sum
;
i
++
)
{
data
.
push_back
(
0.0
);
}
PaddleTensor
tensor
;
tensor
.
shape
=
std
::
vector
<
int
>
({
batch_size
,
3
,
height
,
width
});
tensor
.
data
.
Resize
(
sizeof
(
float
)
*
batch_size
*
3
*
height
*
width
);
std
::
copy
(
data
.
begin
(),
data
.
end
(),
static_cast
<
float
*>
(
tensor
.
data
.
data
()));
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
,
tensor
);
PaddleTensor
tensor_out
;
std
::
vector
<
PaddleTensor
>
outputs
(
1
,
tensor_out
);
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
,
batch_size
);
std
::
cout
<<
"start predict123:"
<<
std
::
endl
;
auto
time1
=
time
();
for
(
size_t
i
=
0
;
i
<
2
;
i
++
)
{
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
,
batch_size
);
std
::
cout
<<
"pass "
<<
i
;
}
auto
time2
=
time
();
std
::
ofstream
ofresult
(
"naive_test_result.txt"
,
std
::
ios
::
app
);
std
::
cout
<<
"batch: "
<<
batch_size
<<
" predict cost: "
<<
time_diff
(
time1
,
time2
)
/
100.0
<<
"ms"
<<
std
::
endl
;
std
::
cout
<<
outputs
.
size
()
<<
std
::
endl
;
/*
int64_t * data_o = static_cast<int64_t*>(outputs[0].data.data());
for (size_t j = 0; j < outputs[0].data.length() / sizeof(int64_t); ++j) {
ofresult << std::to_string(data_o[j]) << " ";
}
ofresult << std::endl;
ofresult.close();
*/
}
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
paddle
::
test_naive
(
1
<<
0
);
return
0
;
}
\ No newline at end of file
paddle/fluid/operators/conv_cudnn_op.cu.cc
浏览文件 @
5993155d
...
@@ -43,6 +43,7 @@ template <typename T>
...
@@ -43,6 +43,7 @@ template <typename T>
class
CUDNNConvOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CUDNNConvOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
VLOG
(
3
)
<<
"inside cudnn"
;
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()),
"It must use CUDAPlace."
);
"It must use CUDAPlace."
);
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
...
@@ -59,7 +60,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
...
@@ -59,7 +60,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
const
T
*
input_data
=
input
->
data
<
T
>
();
const
T
*
input_data
=
input
->
data
<
T
>
();
const
T
*
filter_data
=
filter
->
data
<
T
>
();
const
T
*
filter_data
=
filter
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
VLOG
(
3
)
<<
"get all inputs"
;
// ------------------- cudnn descriptors ---------------------
// ------------------- cudnn descriptors ---------------------
ScopedTensorDescriptor
input_desc
;
ScopedTensorDescriptor
input_desc
;
ScopedTensorDescriptor
output_desc
;
ScopedTensorDescriptor
output_desc
;
...
@@ -72,7 +73,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
...
@@ -72,7 +73,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
cudnnConvolutionDescriptor_t
cudnn_conv_desc
=
cudnnConvolutionDescriptor_t
cudnn_conv_desc
=
conv_desc
.
descriptor
<
T
>
(
paddings
,
strides
,
dilations
);
conv_desc
.
descriptor
<
T
>
(
paddings
,
strides
,
dilations
);
VLOG
(
3
)
<<
"create tensor descriptor"
;
#if CUDNN_VERSION_MIN(7, 0, 1)
#if CUDNN_VERSION_MIN(7, 0, 1)
// cudnn 7 can support groups, no need to do it mannually
// cudnn 7 can support groups, no need to do it mannually
// FIXME(typhoonzero): find a better way to disable groups
// FIXME(typhoonzero): find a better way to disable groups
...
@@ -81,7 +82,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
...
@@ -81,7 +82,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
cudnn_conv_desc
,
groups
));
cudnn_conv_desc
,
groups
));
groups
=
1
;
groups
=
1
;
#endif
#endif
VLOG
(
3
)
<<
"before create tensor descriptor"
;
cudnnTensorDescriptor_t
cudnn_input_desc
=
input_desc
.
descriptor
<
T
>
(
cudnnTensorDescriptor_t
cudnn_input_desc
=
input_desc
.
descriptor
<
T
>
(
layout
,
framework
::
vectorize2int
(
input
->
dims
()),
groups
);
layout
,
framework
::
vectorize2int
(
input
->
dims
()),
groups
);
cudnnTensorDescriptor_t
cudnn_output_desc
=
output_desc
.
descriptor
<
T
>
(
cudnnTensorDescriptor_t
cudnn_output_desc
=
output_desc
.
descriptor
<
T
>
(
...
@@ -111,7 +112,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
...
@@ -111,7 +112,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
output_height
=
output
->
dims
()[
2
];
output_height
=
output
->
dims
()[
2
];
output_width
=
output
->
dims
()[
3
];
output_width
=
output
->
dims
()[
3
];
}
}
VLOG
(
3
)
<<
"after create tensor descriptor"
;
int
group_offset_in
=
int
group_offset_in
=
input_channels
/
groups
*
input_height
*
input_width
*
input_depth
;
input_channels
/
groups
*
input_height
*
input_width
*
input_depth
;
int
group_offset_out
=
int
group_offset_out
=
...
@@ -129,6 +130,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
...
@@ -129,6 +130,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
auto
handle
=
dev_ctx
.
cudnn_handle
();
auto
handle
=
dev_ctx
.
cudnn_handle
();
VLOG
(
3
)
<<
"set cudnn algorithm"
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionForwardAlgorithm
(
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionForwardAlgorithm
(
handle
,
cudnn_input_desc
,
cudnn_filter_desc
,
cudnn_conv_desc
,
handle
,
cudnn_input_desc
,
cudnn_filter_desc
,
cudnn_conv_desc
,
cudnn_output_desc
,
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
,
cudnn_output_desc
,
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
,
...
@@ -149,7 +151,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
...
@@ -149,7 +151,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
cudnn_conv_desc
,
CUDNN_DEFAULT_MATH
));
cudnn_conv_desc
,
CUDNN_DEFAULT_MATH
));
}
}
#endif
#endif
VLOG
(
3
)
<<
"before get workspace"
;
// get workspace size able to allocate
// get workspace size able to allocate
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionForwardWorkspaceSize
(
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionForwardWorkspaceSize
(
handle
,
cudnn_input_desc
,
cudnn_filter_desc
,
cudnn_conv_desc
,
handle
,
cudnn_input_desc
,
cudnn_filter_desc
,
cudnn_conv_desc
,
...
@@ -158,10 +160,12 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
...
@@ -158,10 +160,12 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
// the limit because the algo is overrided to use tensor core.
// the limit because the algo is overrided to use tensor core.
PADDLE_ENFORCE_LE
(
workspace_size_in_bytes
,
workspace_size_limit
,
PADDLE_ENFORCE_LE
(
workspace_size_in_bytes
,
workspace_size_limit
,
"workspace_size to be allocated exceeds the limit"
);
"workspace_size to be allocated exceeds the limit"
);
VLOG
(
3
)
<<
"after get workspace"
;
// Allocate on GPU memory
// Allocate on GPU memory
platform
::
CUDAPlace
gpu
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
platform
::
CUDAPlace
gpu
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
workspace_size_in_bytes
=
1024
;
cudnn_workspace
=
paddle
::
memory
::
Alloc
(
gpu
,
workspace_size_in_bytes
);
cudnn_workspace
=
paddle
::
memory
::
Alloc
(
gpu
,
workspace_size_in_bytes
);
VLOG
(
3
)
<<
"allocate memory"
;
// ------------------- cudnn conv forward ---------------------
// ------------------- cudnn conv forward ---------------------
ScalingParamType
<
T
>
alpha
=
1.0
f
,
beta
=
0.0
f
;
ScalingParamType
<
T
>
alpha
=
1.0
f
,
beta
=
0.0
f
;
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
...
@@ -171,8 +175,10 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
...
@@ -171,8 +175,10 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
cudnn_conv_desc
,
algo
,
cudnn_workspace
,
workspace_size_in_bytes
,
cudnn_conv_desc
,
algo
,
cudnn_workspace
,
workspace_size_in_bytes
,
&
beta
,
cudnn_output_desc
,
output_data
+
i
*
group_offset_out
));
&
beta
,
cudnn_output_desc
,
output_data
+
i
*
group_offset_out
));
}
}
VLOG
(
3
)
<<
"cudnn forward"
;
// Release the cudnn workspace
// Release the cudnn workspace
paddle
::
memory
::
Free
(
gpu
,
cudnn_workspace
);
paddle
::
memory
::
Free
(
gpu
,
cudnn_workspace
);
VLOG
(
3
)
<<
"cudnn pass"
;
}
}
};
};
...
@@ -318,6 +324,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
...
@@ -318,6 +324,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
// Already on GPU
// Already on GPU
void
*
cudnn_workspace
=
nullptr
;
void
*
cudnn_workspace
=
nullptr
;
platform
::
CUDAPlace
gpu
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
platform
::
CUDAPlace
gpu
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
workspace_size_in_bytes
=
1024
;
cudnn_workspace
=
paddle
::
memory
::
Alloc
(
gpu
,
workspace_size_in_bytes
);
cudnn_workspace
=
paddle
::
memory
::
Alloc
(
gpu
,
workspace_size_in_bytes
);
// ------------------- cudnn conv backward data ---------------------
// ------------------- cudnn conv backward data ---------------------
ScalingParamType
<
T
>
alpha
=
1.0
f
,
beta
=
0.0
f
;
ScalingParamType
<
T
>
alpha
=
1.0
f
,
beta
=
0.0
f
;
...
...
paddle/fluid/operators/load_combine_op.cc
浏览文件 @
5993155d
...
@@ -33,8 +33,8 @@ class LoadCombineOp : public framework::OperatorBase {
...
@@ -33,8 +33,8 @@ class LoadCombineOp : public framework::OperatorBase {
auto
filename
=
Attr
<
std
::
string
>
(
"file_path"
);
auto
filename
=
Attr
<
std
::
string
>
(
"file_path"
);
auto
load_as_fp16
=
Attr
<
bool
>
(
"load_as_fp16"
);
auto
load_as_fp16
=
Attr
<
bool
>
(
"load_as_fp16"
);
std
::
ifstream
fin
(
filename
);
std
::
ifstream
fin
(
filename
,
std
::
ios_base
::
in
|
std
::
ios_base
::
binary
);
PADDLE_ENFORCE
(
static_cast
<
bool
>
(
fin
),
PADDLE_ENFORCE
(
!
fin
.
bad
(
),
"Cannot open file %s for load_combine op"
,
filename
);
"Cannot open file %s for load_combine op"
,
filename
);
auto
out_var_names
=
Outputs
(
"Out"
);
auto
out_var_names
=
Outputs
(
"Out"
);
...
@@ -46,20 +46,21 @@ class LoadCombineOp : public framework::OperatorBase {
...
@@ -46,20 +46,21 @@ class LoadCombineOp : public framework::OperatorBase {
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
for
(
size_t
i
=
0
;
i
<
out_var_names
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
out_var_names
.
size
();
i
++
)
{
VLOG
(
3
)
<<
"load "
<<
out_var_names
[
i
];
auto
*
out_var
=
scope
.
FindVar
(
out_var_names
[
i
]);
auto
*
out_var
=
scope
.
FindVar
(
out_var_names
[
i
]);
PADDLE_ENFORCE
(
out_var
!=
nullptr
,
"Output variable %s cannot be found"
,
PADDLE_ENFORCE
(
out_var
!=
nullptr
,
"Output variable %s cannot be found"
,
out_var_names
[
i
]);
out_var_names
[
i
]);
auto
*
tensor
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
tensor
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
VLOG
(
3
)
<<
"Get Tensor"
;
// Error checking
// Error checking
PADDLE_ENFORCE
(
static_cast
<
bool
>
(
fin
),
"Cannot read more from file %s"
,
PADDLE_ENFORCE
(
!
fin
.
bad
(
),
"Cannot read more from file %s"
,
filename
);
filename
);
VLOG
(
3
)
<<
"before deserialization"
;
// Get data from fin to tensor
// Get data from fin to tensor
DeserializeFromStream
(
fin
,
tensor
,
dev_ctx
);
DeserializeFromStream
(
fin
,
tensor
,
dev_ctx
);
VLOG
(
3
)
<<
"after deserialization"
;
auto
in_dtype
=
framework
::
ToDataType
(
tensor
->
type
());
auto
in_dtype
=
framework
::
ToDataType
(
tensor
->
type
());
auto
out_dtype
=
auto
out_dtype
=
load_as_fp16
?
framework
::
proto
::
VarType
::
FP16
:
in_dtype
;
load_as_fp16
?
framework
::
proto
::
VarType
::
FP16
:
in_dtype
;
...
@@ -80,6 +81,7 @@ class LoadCombineOp : public framework::OperatorBase {
...
@@ -80,6 +81,7 @@ class LoadCombineOp : public framework::OperatorBase {
tensor
->
set_lod
(
fp16_tensor
.
lod
());
tensor
->
set_lod
(
fp16_tensor
.
lod
());
tensor
->
ShareDataWith
(
fp16_tensor
);
tensor
->
ShareDataWith
(
fp16_tensor
);
}
}
VLOG
(
3
)
<<
"load "
<<
out_var_names
[
i
]
<<
" finished"
;
}
}
}
}
};
};
...
...
paddle/fluid/platform/cudnn_helper.h
浏览文件 @
5993155d
...
@@ -59,6 +59,7 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) {
...
@@ -59,6 +59,7 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) {
#define CUDNN_VERSION_MIN(major, minor, patch) \
#define CUDNN_VERSION_MIN(major, minor, patch) \
(CUDNN_VERSION >= ((major)*1000 + (minor)*100 + (patch)))
(CUDNN_VERSION >= ((major)*1000 + (minor)*100 + (patch)))
#if !defined(_WIN32)
#define CUDNN_ENFORCE(condition) \
#define CUDNN_ENFORCE(condition) \
do { \
do { \
cudnnStatus_t status = condition; \
cudnnStatus_t status = condition; \
...
@@ -66,6 +67,9 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) {
...
@@ -66,6 +67,9 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) {
PADDLE_THROW(::paddle::platform::cudnnGetErrorString(status)); \
PADDLE_THROW(::paddle::platform::cudnnGetErrorString(status)); \
} \
} \
} while (false)
} while (false)
#else
#define CUDNN_ENFORCE(condition)
#endif
enum
class
DataLayout
{
// Not use
enum
class
DataLayout
{
// Not use
kNHWC
,
kNHWC
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录