Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
6cc78705
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6cc78705
编写于
8月 27, 2018
作者:
D
dzhwinter
提交者:
sneaxiy
8月 27, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix concat synchronization bug
上级
d292ad85
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
77 addition
and
10 deletion
+77
-10
paddle/fluid/operators/math/concat.cu
paddle/fluid/operators/math/concat.cu
+6
-0
paddle/fluid/platform/CMakeLists.txt
paddle/fluid/platform/CMakeLists.txt
+5
-0
paddle/fluid/platform/cpu_info.cc
paddle/fluid/platform/cpu_info.cc
+17
-4
paddle/fluid/platform/device_tracer.h
paddle/fluid/platform/device_tracer.h
+9
-1
paddle/fluid/platform/dynload/CMakeLists.txt
paddle/fluid/platform/dynload/CMakeLists.txt
+2
-0
paddle/fluid/platform/dynload/dynamic_loader.cc
paddle/fluid/platform/dynload/dynamic_loader.cc
+1
-2
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+27
-3
paddle/fluid/platform/profiler.h
paddle/fluid/platform/profiler.h
+10
-0
未找到文件。
paddle/fluid/operators/math/concat.cu
浏览文件 @
6cc78705
...
@@ -177,6 +177,9 @@ class ConcatFunctor<platform::CUDADeviceContext, T> {
...
@@ -177,6 +177,9 @@ class ConcatFunctor<platform::CUDADeviceContext, T> {
dev_ins_data
,
dev_ins_col_data
,
static_cast
<
int
>
(
inputs_col
.
size
()),
dev_ins_data
,
dev_ins_col_data
,
static_cast
<
int
>
(
inputs_col
.
size
()),
out_row
,
out_col
,
output
->
data
<
T
>
());
out_row
,
out_col
,
output
->
data
<
T
>
());
}
}
// Wait() must be called because `inputs_data` may be destructed before
// kernel ends
context
.
Wait
();
}
}
};
};
...
@@ -252,6 +255,9 @@ class ConcatGradFunctor<platform::CUDADeviceContext, T> {
...
@@ -252,6 +255,9 @@ class ConcatGradFunctor<platform::CUDADeviceContext, T> {
input
.
data
<
T
>
(),
in_row
,
in_col
,
dev_outs_col_data
,
input
.
data
<
T
>
(),
in_row
,
in_col
,
dev_outs_col_data
,
static_cast
<
int
>
(
outputs_cols
.
size
()),
dev_out_gpu_data
);
static_cast
<
int
>
(
outputs_cols
.
size
()),
dev_out_gpu_data
);
}
}
// Wait() must be called because `outputs_data` may be destructed before
// kernel ends
context
.
Wait
();
}
}
};
};
...
...
paddle/fluid/platform/CMakeLists.txt
浏览文件 @
6cc78705
if
(
NOT WIN32
)
proto_library
(
profiler_proto SRCS profiler.proto DEPS framework_proto
)
proto_library
(
profiler_proto SRCS profiler.proto DEPS framework_proto
)
py_proto_compile
(
profiler_py_proto SRCS profiler.proto
)
py_proto_compile
(
profiler_py_proto SRCS profiler.proto
)
...
@@ -10,6 +11,7 @@ add_custom_command(TARGET profiler_py_proto POST_BUILD
...
@@ -10,6 +11,7 @@ add_custom_command(TARGET profiler_py_proto POST_BUILD
COMMAND cp *.py
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/proto/profiler
COMMAND cp *.py
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/proto/profiler
COMMENT
"Copy generated python proto into directory paddle/fluid/proto/profiler."
COMMENT
"Copy generated python proto into directory paddle/fluid/proto/profiler."
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
endif
(
NOT WIN32
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
nv_library
(
enforce SRCS enforce.cc
)
nv_library
(
enforce SRCS enforce.cc
)
...
@@ -58,9 +60,12 @@ cc_test(init_test SRCS init_test.cc DEPS device_context)
...
@@ -58,9 +60,12 @@ cc_test(init_test SRCS init_test.cc DEPS device_context)
nv_test
(
cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda
)
nv_test
(
cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda
)
nv_test
(
transform_test SRCS transform_test.cu DEPS memory place device_context
)
nv_test
(
transform_test SRCS transform_test.cu DEPS memory place device_context
)
if
(
NOT WIN32
)
cc_library
(
device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto
${
GPU_CTX_DEPS
}
)
cc_library
(
device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto
${
GPU_CTX_DEPS
}
)
cc_library
(
profiler SRCS profiler.cc DEPS device_context device_tracer
)
cc_library
(
profiler SRCS profiler.cc DEPS device_context device_tracer
)
cc_test
(
profiler_test SRCS profiler_test.cc DEPS profiler
)
cc_test
(
profiler_test SRCS profiler_test.cc DEPS profiler
)
endif
(
NOT WIN32
)
nv_test
(
float16_gpu_test SRCS float16_test.cu DEPS lod_tensor
)
nv_test
(
float16_gpu_test SRCS float16_test.cu DEPS lod_tensor
)
cc_test
(
float16_test SRCS float16_test.cc DEPS lod_tensor
)
cc_test
(
float16_test SRCS float16_test.cc DEPS lod_tensor
)
...
...
paddle/fluid/platform/cpu_info.cc
浏览文件 @
6cc78705
...
@@ -22,9 +22,13 @@ limitations under the License. */
...
@@ -22,9 +22,13 @@ limitations under the License. */
#ifdef __APPLE__
#ifdef __APPLE__
#include <sys/sysctl.h>
#include <sys/sysctl.h>
#include <sys/types.h>
#include <sys/types.h>
#elif defined(_WIN32)
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#include <windows.h>
#else
#else
#include <unistd.h>
#include <unistd.h>
#endif
#endif
// _WIN32
#include <algorithm>
#include <algorithm>
#include "gflags/gflags.h"
#include "gflags/gflags.h"
...
@@ -32,16 +36,20 @@ limitations under the License. */
...
@@ -32,16 +36,20 @@ limitations under the License. */
DEFINE_double
(
fraction_of_cpu_memory_to_use
,
1
,
DEFINE_double
(
fraction_of_cpu_memory_to_use
,
1
,
"Default use 100% of CPU memory for PaddlePaddle,"
"Default use 100% of CPU memory for PaddlePaddle,"
"reserve the rest for page tables, etc"
);
"reserve the rest for page tables, etc"
);
#if !defined(_WIN32)
DEFINE_uint64
(
initial_cpu_memory_in_mb
,
DEFINE_uint64
(
initial_cpu_memory_in_mb
,
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
/* Aligned with mozga-intel, MKLDNN need at least 5000 MB
/* Aligned with mozga-intel, MKLDNN need at least 5000 MB
* to obtain the best performance*/
* to obtain the best performance*/
5000
,
5000
ul
,
#else
#else
500
,
500
ul
,
#endif
#endif
"Initial CPU memory for PaddlePaddle, in MD unit."
);
"Initial CPU memory for PaddlePaddle, in MD unit."
);
#else
DEFINE_uint64
(
initial_cpu_memory_in_mb
,
500ul
,
"Initial CPU memory for PaddlePaddle, in MD unit."
);
#endif // !defined(_WIN32)
DEFINE_double
(
DEFINE_double
(
fraction_of_cuda_pinned_memory_to_use
,
0.5
,
fraction_of_cuda_pinned_memory_to_use
,
0.5
,
...
@@ -60,6 +68,11 @@ inline size_t CpuTotalPhysicalMemory() {
...
@@ -60,6 +68,11 @@ inline size_t CpuTotalPhysicalMemory() {
size_t
len
=
sizeof
(
size
);
size_t
len
=
sizeof
(
size
);
if
(
sysctl
(
mib
,
2
,
&
size
,
&
len
,
NULL
,
0
)
==
0
)
return
(
size_t
)
size
;
if
(
sysctl
(
mib
,
2
,
&
size
,
&
len
,
NULL
,
0
)
==
0
)
return
(
size_t
)
size
;
return
0L
;
return
0L
;
#elif defined(_WIN32)
MEMORYSTATUSEX
sMeminfo
;
sMeminfo
.
dwLength
=
sizeof
(
sMeminfo
);
GlobalMemoryStatusEx
(
&
sMeminfo
);
return
sMeminfo
.
ullTotalPhys
;
#else
#else
int64_t
pages
=
sysconf
(
_SC_PHYS_PAGES
);
int64_t
pages
=
sysconf
(
_SC_PHYS_PAGES
);
int64_t
page_size
=
sysconf
(
_SC_PAGE_SIZE
);
int64_t
page_size
=
sysconf
(
_SC_PAGE_SIZE
);
...
...
paddle/fluid/platform/device_tracer.h
浏览文件 @
6cc78705
...
@@ -13,7 +13,12 @@ See the License for the specific language governing permissions and
...
@@ -13,7 +13,12 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#if !defined(_WIN32)
#include <sys/time.h>
#include <sys/time.h>
#else
#include <windows.h>
#endif // !_WIN32
#include <time.h>
#include <time.h>
#include <chrono> // NOLINT
#include <chrono> // NOLINT
#include <string>
#include <string>
...
@@ -27,12 +32,15 @@ namespace platform {
...
@@ -27,12 +32,15 @@ namespace platform {
///////////////////////
///////////////////////
// WARN: Under Development. Don't depend on it yet.
// WARN: Under Development. Don't depend on it yet.
//////////////////////
//////////////////////
#if !defined(_WIN32)
inline
uint64_t
PosixInNsec
()
{
inline
uint64_t
PosixInNsec
()
{
struct
timeval
tv
;
struct
timeval
tv
;
gettimeofday
(
&
tv
,
nullptr
);
gettimeofday
(
&
tv
,
nullptr
);
return
1000
*
(
static_cast
<
uint64_t
>
(
tv
.
tv_sec
)
*
1000000
+
tv
.
tv_usec
);
return
1000
*
(
static_cast
<
uint64_t
>
(
tv
.
tv_sec
)
*
1000000
+
tv
.
tv_usec
);
}
}
#else
inline
uint64_t
PosixInNsec
()
{
return
static_cast
<
uint64_t
>
(
0
);
}
#endif // !_WIN32
// DeviceTracer performs the following tasks:
// DeviceTracer performs the following tasks:
// 1. Register cuda callbacks for various events: kernel, memcpy, etc.
// 1. Register cuda callbacks for various events: kernel, memcpy, etc.
...
...
paddle/fluid/platform/dynload/CMakeLists.txt
浏览文件 @
6cc78705
...
@@ -16,7 +16,9 @@ if (CUPTI_FOUND)
...
@@ -16,7 +16,9 @@ if (CUPTI_FOUND)
list
(
APPEND CUDA_SRCS cupti.cc
)
list
(
APPEND CUDA_SRCS cupti.cc
)
endif
(
CUPTI_FOUND
)
endif
(
CUPTI_FOUND
)
nv_library
(
dynload_cuda SRCS
${
CUDA_SRCS
}
DEPS dynamic_loader
)
nv_library
(
dynload_cuda SRCS
${
CUDA_SRCS
}
DEPS dynamic_loader
)
if
(
NOT WIN32
)
cc_library
(
dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc
)
cc_library
(
dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc
)
endif
(
NOT WIN32
)
if
(
WITH_MKLML
)
if
(
WITH_MKLML
)
cc_library
(
dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml
)
cc_library
(
dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml
)
endif
()
endif
()
...
...
paddle/fluid/platform/dynload/dynamic_loader.cc
浏览文件 @
6cc78705
...
@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
...
@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include <dlfcn.h>
#include <memory>
#include <memory>
#include <mutex> // NOLINT
#include <mutex> // NOLINT
#include <string>
#include <string>
...
@@ -23,6 +21,7 @@ limitations under the License. */
...
@@ -23,6 +21,7 @@ limitations under the License. */
#include "glog/logging.h"
#include "glog/logging.h"
#include "paddle/fluid/platform/dynload/cupti_lib_path.h"
#include "paddle/fluid/platform/dynload/cupti_lib_path.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/port.h"
DEFINE_string
(
cudnn_dir
,
""
,
DEFINE_string
(
cudnn_dir
,
""
,
"Specify path for loading libcudnn.so. For instance, "
"Specify path for loading libcudnn.so. For instance, "
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
6cc78705
...
@@ -18,6 +18,11 @@ limitations under the License. */
...
@@ -18,6 +18,11 @@ limitations under the License. */
#include <cxxabi.h> // for __cxa_demangle
#include <cxxabi.h> // for __cxa_demangle
#endif // __GNUC__
#endif // __GNUC__
#if defined(_WIN32)
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#endif
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
#include <cublas_v2.h>
#include <cublas_v2.h>
#include <cudnn.h>
#include <cudnn.h>
...
@@ -117,7 +122,12 @@ struct EOFException : public std::exception {
...
@@ -117,7 +122,12 @@ struct EOFException : public std::exception {
// always forces branch prediction of true.
// always forces branch prediction of true.
// This generates faster binary code. __builtin_expect is since C++11.
// This generates faster binary code. __builtin_expect is since C++11.
// For more details, please check https://stackoverflow.com/a/43870188/724872.
// For more details, please check https://stackoverflow.com/a/43870188/724872.
#if !defined(_WIN32)
#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
#else
// there is no equivalent intrinsics in msvc.
#define UNLIKELY(condition) (condition == 0)
#endif
template
<
typename
...
Args
>
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
...
@@ -230,6 +240,7 @@ inline void throw_on_error(T e) {
...
@@ -230,6 +240,7 @@ inline void throw_on_error(T e) {
throw_on_error
(
e
,
""
);
throw_on_error
(
e
,
""
);
}
}
#if !defined(_WIN32)
#define PADDLE_THROW(...) \
#define PADDLE_THROW(...) \
do { \
do { \
throw ::paddle::platform::EnforceNotMet( \
throw ::paddle::platform::EnforceNotMet( \
...
@@ -248,15 +259,28 @@ inline void throw_on_error(T e) {
...
@@ -248,15 +259,28 @@ inline void throw_on_error(T e) {
__FILE__, __LINE__); \
__FILE__, __LINE__); \
} \
} \
} while (false)
} while (false)
#else
#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__);
#endif
#define PADDLE_THROW_EOF() \
#define PADDLE_THROW_EOF() \
do { \
do { \
throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \
throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \
__LINE__); \
__LINE__); \
} while (false)
} while (false)
#else
#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__)
#endif // REPLACE_ENFORCE_GLOG
#else // !_WIN32
// disable enforce, caused by the varardic macro exception error
#define PADDLE_THROW(x) \
do { \
throw std::make_exception_ptr( \
std::runtime_error("Windows disable the enforce.")); \
} while (false)
#define PADDLE_ENFORCE(x, ...) x
#endif // !_WIN32
/*
/*
* Some enforce helpers here, usage:
* Some enforce helpers here, usage:
* int a = 1;
* int a = 1;
...
...
paddle/fluid/platform/profiler.h
浏览文件 @
6cc78705
...
@@ -69,6 +69,7 @@ void PushEvent(const std::string& name, const DeviceContext* dev_ctx);
...
@@ -69,6 +69,7 @@ void PushEvent(const std::string& name, const DeviceContext* dev_ctx);
void
PopEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
);
void
PopEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
);
#if !defined(_WIN32)
struct
RecordEvent
{
struct
RecordEvent
{
RecordEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
);
RecordEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
);
...
@@ -94,6 +95,15 @@ struct RecordBlock {
...
@@ -94,6 +95,15 @@ struct RecordBlock {
std
::
string
name_
;
std
::
string
name_
;
uint64_t
start_ns_
;
uint64_t
start_ns_
;
};
};
#else
// windows do not support profiler temporarily.
struct
RecordEvent
{
RecordEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
{}
};
struct
RecordBlock
{
explicit
RecordBlock
(
int
block_id
)
{}
};
#endif
// Return the event list of all threads. Assumed the returned value calls
// Return the event list of all threads. Assumed the returned value calls
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录