Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
bf2e4cb1
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
1 年多 前同步成功
通知
699
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
bf2e4cb1
编写于
10月 30, 2018
作者:
D
dzhwinter
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
cleard. staged
上级
ebfe5a02
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
64 addition
and
109 deletion
+64
-109
cmake/cuda.cmake
cmake/cuda.cmake
+2
-1
cmake/external/threadpool.cmake
cmake/external/threadpool.cmake
+1
-0
cmake/flags.cmake
cmake/flags.cmake
+4
-16
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+15
-64
paddle/fluid/framework/executor.h
paddle/fluid/framework/executor.h
+3
-1
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+2
-1
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+1
-0
paddle/fluid/inference/api/api.cc
paddle/fluid/inference/api/api.cc
+0
-1
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+2
-3
paddle/fluid/inference/api/api_impl.h
paddle/fluid/inference/api/api_impl.h
+1
-1
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+4
-5
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+4
-6
paddle/fluid/operators/detection/roi_perspective_transform_op.cu
...fluid/operators/detection/roi_perspective_transform_op.cu
+2
-2
paddle/fluid/operators/math/CMakeLists.txt
paddle/fluid/operators/math/CMakeLists.txt
+7
-7
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+2
-0
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+14
-1
未找到文件。
cmake/cuda.cmake
浏览文件 @
bf2e4cb1
...
@@ -173,6 +173,7 @@ set(CUDA_PROPAGATE_HOST_FLAGS OFF)
...
@@ -173,6 +173,7 @@ set(CUDA_PROPAGATE_HOST_FLAGS OFF)
if
(
NOT WIN32
)
# windows msvc2015 support c++11 natively.
if
(
NOT WIN32
)
# windows msvc2015 support c++11 natively.
# -std=c++11 -fPIC not recoginize by msvc
# -std=c++11 -fPIC not recoginize by msvc
list
(
APPEND CUDA_NVCC_FLAGS
"-std=c++11"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-std=c++11"
)
# in cuda9, suppress cuda warning on eigen with "-w"
list
(
APPEND CUDA_NVCC_FLAGS
"-w"
"-Xcompiler -fPIC"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-w"
"-Xcompiler -fPIC"
)
else
(
NOT WIN32
)
else
(
NOT WIN32
)
list
(
APPEND CUDA_NVCC_FLAGS
"-w"
"-Xcompiler -fPIC"
"-Xcompiler /w"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-w"
"-Xcompiler -fPIC"
"-Xcompiler /w"
)
...
@@ -181,7 +182,7 @@ endif(NOT WIN32)
...
@@ -181,7 +182,7 @@ endif(NOT WIN32)
if
(
WITH_FAST_MATH
)
if
(
WITH_FAST_MATH
)
# Make use of fast math library. https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html
# Make use of fast math library. https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html
list
(
APPEND CUDA_NVCC_FLAGS
"--use_fast_math"
)
list
(
APPEND CUDA_NVCC_FLAGS
"--use_fast_math"
)
# in cuda9, suppress cuda warning on eigen
endif
(
WITH_FAST_MATH
)
# Set :expt-relaxed-constexpr to suppress Eigen warnings
# Set :expt-relaxed-constexpr to suppress Eigen warnings
list
(
APPEND CUDA_NVCC_FLAGS
"--expt-relaxed-constexpr"
)
list
(
APPEND CUDA_NVCC_FLAGS
"--expt-relaxed-constexpr"
)
...
...
cmake/external/threadpool.cmake
浏览文件 @
bf2e4cb1
...
@@ -3,6 +3,7 @@ INCLUDE(ExternalProject)
...
@@ -3,6 +3,7 @@ INCLUDE(ExternalProject)
SET
(
THREADPOOL_SOURCE_DIR
${
THIRD_PARTY_PATH
}
/threadpool
)
SET
(
THREADPOOL_SOURCE_DIR
${
THIRD_PARTY_PATH
}
/threadpool
)
SET
(
THREADPOOL_INCLUDE_DIR
${
THREADPOOL_SOURCE_DIR
}
/src/extern_threadpool
)
SET
(
THREADPOOL_INCLUDE_DIR
${
THREADPOOL_SOURCE_DIR
}
/src/extern_threadpool
)
INCLUDE_DIRECTORIES
(
${
THREADPOOL_INCLUDE_DIR
}
)
INCLUDE_DIRECTORIES
(
${
THREADPOOL_INCLUDE_DIR
}
)
message
(
"Debug"
${
THREADPOOL_INCLUDE_DIR
}
)
ExternalProject_Add
(
ExternalProject_Add
(
extern_threadpool
extern_threadpool
...
...
cmake/flags.cmake
浏览文件 @
bf2e4cb1
...
@@ -143,26 +143,14 @@ set(GPU_COMMON_FLAGS
...
@@ -143,26 +143,14 @@ set(GPU_COMMON_FLAGS
-Wno-error=unused-function
# Warnings in Numpy Header.
-Wno-error=unused-function
# Warnings in Numpy Header.
-Wno-error=array-bounds
# Warnings in Eigen::array
-Wno-error=array-bounds
# Warnings in Eigen::array
)
)
set
(
COMMON_FLAGS
-fPIC
-fno-omit-frame-pointer
)
set
(
GPU_COMMON_FLAGS
-fPIC
-fno-omit-frame-pointer
)
else
(
NOT WIN32
)
set
(
COMMON_FLAGS
"/w"
)
#disable all warnings.
set
(
GPU_COMMON_FLAGS
"/w"
)
#disable all warnings
endif
(
NOT WIN32
)
else
(
NOT WIN32
)
else
(
NOT WIN32
)
set
(
COMMON_FLAGS
set
(
COMMON_FLAGS
-fPIC
-fno-omit-frame-pointer
"/w"
)
#disable all warnings.
"/w"
)
#disable all warnings.
set
(
GPU_COMMON_FLAGS
set
(
GPU_COMMON_FLAGS
-fPIC
-fno-omit-frame-pointer
"/w"
)
#disable all warnings
"/w"
)
#disable all warnings
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
bf2e4cb1
...
@@ -48,6 +48,7 @@ ExecutorPrepareContext::~ExecutorPrepareContext() {
...
@@ -48,6 +48,7 @@ ExecutorPrepareContext::~ExecutorPrepareContext() {
VLOG
(
5
)
<<
"destroy ExecutorPrepareContext"
;
VLOG
(
5
)
<<
"destroy ExecutorPrepareContext"
;
}
}
#ifndef _WIN32
template
<
typename
RefCntMap
>
template
<
typename
RefCntMap
>
static
void
DeleteUnusedTensors
(
const
Scope
&
scope
,
const
OperatorBase
*
op
,
static
void
DeleteUnusedTensors
(
const
Scope
&
scope
,
const
OperatorBase
*
op
,
GarbageCollector
<
Tensor
>*
gc
,
GarbageCollector
<
Tensor
>*
gc
,
...
@@ -82,6 +83,7 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
...
@@ -82,6 +83,7 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
gc
->
Add
(
erase_tensors
);
gc
->
Add
(
erase_tensors
);
}
}
}
}
#endif
Executor
::
Executor
(
const
platform
::
Place
&
place
)
:
place_
(
place
)
{}
Executor
::
Executor
(
const
platform
::
Place
&
place
)
:
place_
(
place
)
{}
...
@@ -331,97 +333,35 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
...
@@ -331,97 +333,35 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
std
::
unique_ptr
<
ExecutorPrepareContext
>
Executor
::
Prepare
(
std
::
unique_ptr
<
ExecutorPrepareContext
>
Executor
::
Prepare
(
const
ProgramDesc
&
program
,
int
block_id
)
{
const
ProgramDesc
&
program
,
int
block_id
)
{
VLOG
(
3
)
<<
"before create prepare"
<<
block_id
<<
" "
<<
program
.
Size
();
std
::
unique_ptr
<
ExecutorPrepareContext
>
ctx
(
std
::
unique_ptr
<
ExecutorPrepareContext
>
ctx
(
new
ExecutorPrepareContext
(
program
,
block_id
));
new
ExecutorPrepareContext
(
program
,
block_id
));
VLOG
(
3
)
<<
"after create prepare"
;
PADDLE_ENFORCE_LT
(
static_cast
<
size_t
>
(
block_id
),
program
.
Size
());
// PADDLE_ENFORCE_LT(static_cast<size_t>(block_id), program.Size());
VLOG
(
3
)
<<
"before create op_desc"
;
auto
&
block
=
program
.
Block
(
block_id
);
auto
&
block
=
program
.
Block
(
block_id
);
VLOG
(
3
)
<<
"create before"
<<
ctx
->
ops_
.
size
()
<<
" "
<<
block
.
AllOps
().
size
();
int
counter
=
0
;
int
counter
=
0
;
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
ctx
->
ops_
.
push_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
ctx
->
ops_
.
push_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
VLOG
(
3
)
<<
"create op "
<<
"index "
<<
++
counter
<<
" type "
<<
op_desc
->
Type
();
}
}
VLOG
(
3
)
<<
"create finished"
<<
ctx
->
ops_
.
size
()
<<
" "
<<
block
.
AllOps
().
size
();
return
ctx
;
return
ctx
;
}
}
std
::
vector
<
std
::
shared_ptr
<
ExecutorPrepareContext
>>
Executor
::
Prepare
(
std
::
vector
<
std
::
shared_ptr
<
ExecutorPrepareContext
>>
Executor
::
Prepare
(
const
ProgramDesc
&
program
,
const
std
::
vector
<
int
>&
block_ids
)
{
const
ProgramDesc
&
program
,
const
std
::
vector
<
int
>&
block_ids
)
{
VLOG
(
3
)
<<
"inside prepare"
;
std
::
vector
<
std
::
shared_ptr
<
ExecutorPrepareContext
>>
result
;
std
::
vector
<
std
::
shared_ptr
<
ExecutorPrepareContext
>>
result
;
VLOG
(
3
)
<<
"before go through block_ids"
;
for
(
auto
&
bid
:
block_ids
)
{
for
(
auto
&
bid
:
block_ids
)
{
VLOG
(
3
)
<<
"block id"
<<
bid
;
auto
*
ctx
=
new
ExecutorPrepareContext
(
program
,
bid
);
auto
*
ctx
=
new
ExecutorPrepareContext
(
program
,
bid
);
//
PADDLE_ENFORCE_LT(static_cast<size_t>(bid), program.Size());
PADDLE_ENFORCE_LT
(
static_cast
<
size_t
>
(
bid
),
program
.
Size
());
auto
&
block
=
program
.
Block
(
bid
);
auto
&
block
=
program
.
Block
(
bid
);
int
counter
=
0
;
VLOG
(
3
)
<<
"create before"
<<
ctx
->
ops_
.
size
()
<<
" "
<<
block
.
AllOps
().
size
();
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
ctx
->
ops_
.
push_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
ctx
->
ops_
.
push_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
VLOG
(
3
)
<<
"create op "
<<
"index "
<<
++
counter
<<
" type "
<<
op_desc
->
Type
();
}
}
VLOG
(
3
)
<<
"create finished"
<<
ctx
->
ops_
.
size
()
<<
" "
<<
block
.
AllOps
().
size
();
result
.
push_back
(
std
::
shared_ptr
<
ExecutorPrepareContext
>
(
ctx
));
result
.
push_back
(
std
::
shared_ptr
<
ExecutorPrepareContext
>
(
ctx
));
}
}
return
result
;
return
result
;
}
}
// void CheckResult(const std::string op_type, ExecutorPrepareContext* ctx,
// Scope* local_scope) {
// VLOG(3) << "before checking result";
// auto& dev_ctx = *platform::DeviceContextPool::Instance().Get(place_);
// std::vector<std::string> outputs;
// auto& block = ctx->prog_.Block(0);
// bool found = false;
// framework::OpDesc* myop = nullptr;
// for(auto& op : block.AllOps()) {
// if(op->Type() == "load_combine" || op->Type() == "fetch" || op->Type() ==
// "feed") return;
// if (op->Type() == op_type) {
// found = true;
// myop = op;
// break;
// }
// }
// }
// if(!found) {
// VLOG(3) << "not found op!";
// return;
// }
// auto* op = myop;
// VLOG(3) << "start op output" << op->Type();
// for(auto var_name: op->OutputArgumentNames()) {
// auto* var = local_scope->Var(var_name);
// auto* var_desc = block.FindVar(var_name);
// if (var_desc->Persistable()) continue;
// auto* tensor = var->GetMutable<framework::LoDTensor>();
// framework::Tensor check;
// VLOG(3) << "before tensor copy";
// framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check);
// VLOG(3) << "after tensor copy";
// float sum = .0;
// for(size_t i=0; i < check.numel(); ++i) {
// sum += check.data<float>()[i];
// }
// VLOG(3) << "op " << op->Type() << " output var " << var_name << " sum "
// << sum;
// VLOG(3) << "after checking result";
// }
void
Executor
::
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
void
Executor
::
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
bool
create_local_scope
,
bool
create_vars
,
bool
create_local_scope
,
bool
create_vars
,
bool
keep_kids
)
{
bool
keep_kids
)
{
VLOG
(
3
)
<<
"RunPreparedContext inside"
;
Scope
*
local_scope
=
scope
;
Scope
*
local_scope
=
scope
;
if
(
create_vars
)
{
if
(
create_vars
)
{
if
(
create_local_scope
)
{
if
(
create_local_scope
)
{
...
@@ -430,6 +370,7 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
...
@@ -430,6 +370,7 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
CreateVariables
(
ctx
->
prog_
,
local_scope
,
ctx
->
block_id_
);
CreateVariables
(
ctx
->
prog_
,
local_scope
,
ctx
->
block_id_
);
}
}
#ifndef _WIN32
int64_t
max_memory_size
=
GetEagerDeletionThreshold
();
int64_t
max_memory_size
=
GetEagerDeletionThreshold
();
std
::
unique_ptr
<
GarbageCollector
<
Tensor
>>
gc
;
std
::
unique_ptr
<
GarbageCollector
<
Tensor
>>
gc
;
// WhileOp would set keep_kids to false
// WhileOp would set keep_kids to false
...
@@ -471,6 +412,16 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
...
@@ -471,6 +412,16 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
}
else
{
}
else
{
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
)
->
Wait
();
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
)
->
Wait
();
}
}
#else // WIN32
for
(
auto
&
op
:
ctx
->
ops_
)
{
op
->
Run
(
*
local_scope
,
place_
);
if
(
FLAGS_benchmark
)
{
VLOG
(
2
)
<<
"Memory used after operator "
+
op
->
Type
()
+
" running: "
<<
memory
::
memory_usage
(
place_
);
}
}
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
)
->
Wait
();
#endif // NOT WIN32
if
(
local_scope
!=
scope
)
{
if
(
local_scope
!=
scope
)
{
scope
->
DeleteScope
(
local_scope
);
scope
->
DeleteScope
(
local_scope
);
...
...
paddle/fluid/framework/executor.h
浏览文件 @
bf2e4cb1
...
@@ -17,12 +17,14 @@ limitations under the License. */
...
@@ -17,12 +17,14 @@ limitations under the License. */
#include <map>
#include <map>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/garbage_collector.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#ifndef _WIN32
#include "paddle/fluid/framework/garbage_collector.h"
#endif
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
bf2e4cb1
...
@@ -35,9 +35,10 @@ endif()
...
@@ -35,9 +35,10 @@ endif()
# Create static library
# Create static library
if
(
WIN32
)
if
(
WIN32
)
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
${
fluid_third
pa
}
paddle_fluid_api paddle_inference_api
)
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
${
fluid_third
_partys
}
paddle_fluid_api paddle_inference_api
)
else
(
WIND32
)
else
(
WIND32
)
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
${
STATIC_INFERENCE_APIS
}
zero_copy_tensor reset_tensor_array
)
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
${
STATIC_INFERENCE_APIS
}
zero_copy_tensor reset_tensor_array
)
endif
(
WIN32
)
if
(
NOT APPLE
)
if
(
NOT APPLE
)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
...
...
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
bf2e4cb1
...
@@ -51,6 +51,7 @@ function(inference_api_test TARGET_NAME)
...
@@ -51,6 +51,7 @@ function(inference_api_test TARGET_NAME)
endfunction
(
inference_api_test
)
endfunction
(
inference_api_test
)
cc_library
(
reset_tensor_array SRCS details/reset_tensor_array.cc DEPS lod_tensor scope
)
cc_library
(
reset_tensor_array SRCS details/reset_tensor_array.cc DEPS lod_tensor scope
)
cc_library
(
helper SRCS helper.cc DEPS reset_tensor_array lod_tensor scope
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS reset_tensor_array lod_tensor scope
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS reset_tensor_array lod_tensor scope
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor
)
cc_library
(
zero_copy_tensor SRCS details/zero_copy_tensor.cc DEPS paddle_inference_api
)
cc_library
(
zero_copy_tensor SRCS details/zero_copy_tensor.cc DEPS paddle_inference_api
)
...
...
paddle/fluid/inference/api/api.cc
浏览文件 @
bf2e4cb1
...
@@ -16,7 +16,6 @@
...
@@ -16,7 +16,6 @@
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle_inference_api.h"
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
bf2e4cb1
...
@@ -260,9 +260,8 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
...
@@ -260,9 +260,8 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
if
(
config
.
use_gpu
)
{
if
(
config
.
use_gpu
)
{
// 1. GPU memeroy
// 1. GPU memeroy
PADDLE_ENFORCE_GT
(
PADDLE_ENFORCE_GT
(
config
.
fraction_of_gpu_memory
,
0.
f
,
config
.
fraction_of_gpu_memory
,
0.
f
,
"fraction_of_gpu_memory in the config should be set to range (0.,
"fraction_of_gpu_memory in the config should be set to range (0.,1.]"
);
1.]"
);
PADDLE_ENFORCE_GE
(
config
.
device
,
0
,
"Invalid device id %d"
,
config
.
device
);
PADDLE_ENFORCE_GE
(
config
.
device
,
0
,
"Invalid device id %d"
,
config
.
device
);
std
::
vector
<
std
::
string
>
flags
;
std
::
vector
<
std
::
string
>
flags
;
if
(
config
.
fraction_of_gpu_memory
>=
0.0
f
||
if
(
config
.
fraction_of_gpu_memory
>=
0.0
f
||
...
...
paddle/fluid/inference/api/api_impl.h
浏览文件 @
bf2e4cb1
...
@@ -31,10 +31,10 @@ limitations under the License. */
...
@@ -31,10 +31,10 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle_inference_api.h" // NOLINT
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
bf2e4cb1
...
@@ -14,8 +14,9 @@
...
@@ -14,8 +14,9 @@
#pragma once
#pragma once
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include <glog/logging.h>
#include <glog/logging.h>
#include <algorithm>
#include <algorithm>
#include <chrono> // NOLINT
#include <chrono> // NOLINT
#include <iterator>
#include <iterator>
...
@@ -23,9 +24,7 @@
...
@@ -23,9 +24,7 @@
#include <sstream>
#include <sstream>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/string/printf.h"
#include "paddle_inference_api.h" //NOLINT
#include "paddle_inference_api.h"
#include "timer.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
...
@@ -97,7 +96,7 @@ static void TensorAssignData(PaddleTensor *tensor,
...
@@ -97,7 +96,7 @@ static void TensorAssignData(PaddleTensor *tensor,
}
}
template
<
typename
T
>
template
<
typename
T
>
static
int
ZeroCopyTensorAssignData
(
ZeroCopyTensor
*
tensor
,
static
int
ZeroCopyTensorAssignData
(
paddle
::
ZeroCopyTensor
*
tensor
,
const
std
::
vector
<
std
::
vector
<
T
>>
&
data
)
{
const
std
::
vector
<
std
::
vector
<
T
>>
&
data
)
{
int
size
{
0
};
int
size
{
0
};
auto
*
ptr
=
tensor
->
mutable_data
<
T
>
(
PaddlePlace
::
kCPU
);
auto
*
ptr
=
tensor
->
mutable_data
<
T
>
(
PaddlePlace
::
kCPU
);
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
bf2e4cb1
...
@@ -284,12 +284,10 @@ op_library(array_to_lod_tensor_op DEPS lod_rank_table_op)
...
@@ -284,12 +284,10 @@ op_library(array_to_lod_tensor_op DEPS lod_rank_table_op)
op_library
(
max_sequence_len_op DEPS lod_rank_table
)
op_library
(
max_sequence_len_op DEPS lod_rank_table
)
op_library
(
sequence_conv_op DEPS context_project
)
op_library
(
sequence_conv_op DEPS context_project
)
op_library
(
sequence_pool_op DEPS sequence_pooling
)
op_library
(
sequence_pool_op DEPS sequence_pooling
)
if
(
NOT WIN32
)
op_library
(
lstm_op DEPS sequence2batch lstm_compute
)
op_library
(
lstm_op DEPS sequence2batch lstm_compute
)
op_library
(
hierarchical_sigmoid_op DEPS matrix_bit_code
)
op_library
(
hierarchical_sigmoid_op DEPS matrix_bit_code
)
op_library
(
lstmp_op DEPS sequence2batch lstm_compute
)
op_library
(
lstmp_op DEPS sequence2batch lstm_compute
)
op_library
(
gru_op DEPS sequence2batch gru_compute
)
op_library
(
gru_op DEPS sequence2batch gru_compute
)
endif
(
NOT WIN32
)
op_library
(
recurrent_op DEPS executor
)
op_library
(
recurrent_op DEPS executor
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale
)
op_library
(
cos_sim_op DEPS cos_sim_functor
)
op_library
(
cos_sim_op DEPS cos_sim_functor
)
...
...
paddle/fluid/operators/detection/roi_perspective_transform_op.cu
浏览文件 @
bf2e4cb1
...
@@ -31,12 +31,12 @@ namespace operators {
...
@@ -31,12 +31,12 @@ namespace operators {
template
<
typename
T
>
template
<
typename
T
>
__device__
bool
GT_E
(
T
a
,
T
b
)
{
__device__
bool
GT_E
(
T
a
,
T
b
)
{
return
(
a
>
b
)
||
fabs
(
a
-
b
)
<
1e-4
;
return
(
a
>
b
)
||
fabs
f
(
static_cast
<
float
>
(
a
-
b
)
)
<
1e-4
;
}
}
template
<
typename
T
>
template
<
typename
T
>
__device__
bool
LT_E
(
T
a
,
T
b
)
{
__device__
bool
LT_E
(
T
a
,
T
b
)
{
return
(
a
<
b
)
||
fabs
(
a
-
b
)
<
1e-4
;
return
(
a
<
b
)
||
fabs
f
(
static_cast
<
float
>
(
a
-
b
)
)
<
1e-4
;
}
}
template
<
typename
T
>
template
<
typename
T
>
...
...
paddle/fluid/operators/math/CMakeLists.txt
浏览文件 @
bf2e4cb1
...
@@ -57,9 +57,6 @@ math_library(sequence_padding)
...
@@ -57,9 +57,6 @@ math_library(sequence_padding)
math_library
(
sequence_pooling DEPS math_function
)
math_library
(
sequence_pooling DEPS math_function
)
math_library
(
sequence_scale
)
math_library
(
sequence_scale
)
math_library
(
softmax DEPS math_function
)
math_library
(
softmax DEPS math_function
)
if
(
NOT WIN32
)
math_library
(
matrix_bit_code
)
endif
(
NOT WIN32
)
math_library
(
unpooling
)
math_library
(
unpooling
)
math_library
(
vol2col
)
math_library
(
vol2col
)
...
@@ -75,7 +72,10 @@ if(WITH_GPU)
...
@@ -75,7 +72,10 @@ if(WITH_GPU)
endif
()
endif
()
cc_test
(
concat_test SRCS concat_test.cc DEPS concat_and_split
)
cc_test
(
concat_test SRCS concat_test.cc DEPS concat_and_split
)
cc_test
(
cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info
)
cc_test
(
cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info
)
cc_library
(
jit_kernel
if
(
NOT WIN32
)
SRCS jit_kernel.cc jit_kernel_blas.cc jit_kernel_exp.cc jit_kernel_rnn.cc jit_kernel_crf_decode.cc
math_library
(
matrix_bit_code
)
DEPS cpu_info cblas
)
cc_library
(
jit_kernel
cc_test
(
jit_kernel_test SRCS jit_kernel_test.cc DEPS jit_kernel
)
SRCS jit_kernel.cc jit_kernel_blas.cc jit_kernel_exp.cc jit_kernel_rnn.cc jit_kernel_crf_decode.cc
DEPS cpu_info cblas
)
cc_test
(
jit_kernel_test SRCS jit_kernel_test.cc DEPS jit_kernel
)
endif
(
NOT WIN32
)
paddle/fluid/platform/device_context.cc
浏览文件 @
bf2e4cb1
...
@@ -235,7 +235,9 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
...
@@ -235,7 +235,9 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
<<
", Runtime Version: "
<<
runtime_version_
/
1000
<<
"."
<<
", Runtime Version: "
<<
runtime_version_
/
1000
<<
"."
<<
(
runtime_version_
%
100
)
/
10
;
<<
(
runtime_version_
%
100
)
/
10
;
#ifndef _WIN32
callback_manager_
.
reset
(
new
StreamCallbackManager
(
stream_
));
callback_manager_
.
reset
(
new
StreamCallbackManager
(
stream_
));
#endif // NOT WIN32
}
}
CUDADeviceContext
::~
CUDADeviceContext
()
{
CUDADeviceContext
::~
CUDADeviceContext
()
{
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
bf2e4cb1
...
@@ -31,7 +31,7 @@ limitations under the License. */
...
@@ -31,7 +31,7 @@ limitations under the License. */
#include "glog/logging.h"
#include "glog/logging.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/stream_callback_manager.h"
#include "paddle/fluid/platform/stream_callback_manager.h"
#endif
#endif
#include "unsupported/Eigen/CXX11/Tensor"
#include "unsupported/Eigen/CXX11/Tensor"
...
@@ -115,6 +115,7 @@ class CUDADeviceContext : public DeviceContext {
...
@@ -115,6 +115,7 @@ class CUDADeviceContext : public DeviceContext {
PADDLE_ENFORCE
(
cudaEventRecord
(
ev
,
stream_
));
PADDLE_ENFORCE
(
cudaEventRecord
(
ev
,
stream_
));
}
}
#ifndef _WIN32
template
<
typename
Callback
>
template
<
typename
Callback
>
void
AddStreamCallback
(
Callback
&&
callback
)
const
{
void
AddStreamCallback
(
Callback
&&
callback
)
const
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
callback_mtx_
);
std
::
lock_guard
<
std
::
mutex
>
guard
(
callback_mtx_
);
...
@@ -125,6 +126,16 @@ class CUDADeviceContext : public DeviceContext {
...
@@ -125,6 +126,16 @@ class CUDADeviceContext : public DeviceContext {
std
::
lock_guard
<
std
::
mutex
>
guard
(
callback_mtx_
);
std
::
lock_guard
<
std
::
mutex
>
guard
(
callback_mtx_
);
callback_manager_
->
Wait
();
callback_manager_
->
Wait
();
}
}
#else
template
<
typename
Callback
>
void
AddStreamCallback
(
Callback
&&
callback
)
const
{
// ugly empty functor.
}
void
WaitStreamCallback
()
const
{
// ugly empty functor.
}
#endif
private:
private:
CUDAPlace
place_
;
CUDAPlace
place_
;
...
@@ -143,10 +154,12 @@ class CUDADeviceContext : public DeviceContext {
...
@@ -143,10 +154,12 @@ class CUDADeviceContext : public DeviceContext {
mutable
std
::
mutex
mtx_
;
mutable
std
::
mutex
mtx_
;
#ifndef _WIN32
// This lock is only used by callback
// This lock is only used by callback
// If we use mtx_ for StreamCallbackManager, deadlock may occur sometimes
// If we use mtx_ for StreamCallbackManager, deadlock may occur sometimes
mutable
std
::
mutex
callback_mtx_
;
mutable
std
::
mutex
callback_mtx_
;
std
::
unique_ptr
<
StreamCallbackManager
>
callback_manager_
;
std
::
unique_ptr
<
StreamCallbackManager
>
callback_manager_
;
#endif
};
};
template
<
>
template
<
>
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录