Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Oneflow-Inc
oneflow
提交
c5fb5df0
O
oneflow
项目概览
Oneflow-Inc
/
oneflow
上一次同步 2 年多
通知
13
Star
2733
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
oneflow
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
c5fb5df0
编写于
7月 01, 2017
作者:
W
willzhang4a58
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
CpuStream
上级
330ef814
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
58 addition
and
27 deletion
+58
-27
oneflow/core/device/cpu_device_context.h
oneflow/core/device/cpu_device_context.h
+2
-2
oneflow/core/device/cpu_stream.h
oneflow/core/device/cpu_stream.h
+33
-0
oneflow/core/device/device_context.h
oneflow/core/device/device_context.h
+4
-7
oneflow/core/kernel/clone_kernel_test.cpp
oneflow/core/kernel/clone_kernel_test.cpp
+2
-2
oneflow/core/kernel/innerproduct_kernel_test.cpp
oneflow/core/kernel/innerproduct_kernel_test.cpp
+2
-2
oneflow/core/kernel/kernel_util.cpp
oneflow/core/kernel/kernel_util.cpp
+9
-9
oneflow/core/kernel/model_save_kernel.cpp
oneflow/core/kernel/model_save_kernel.cpp
+1
-1
oneflow/core/thread/cpu_thread.cpp
oneflow/core/thread/cpu_thread.cpp
+1
-1
oneflow/core/thread/cpu_thread.h
oneflow/core/thread/cpu_thread.h
+2
-1
oneflow/core/thread/thread_context.h
oneflow/core/thread/thread_context.h
+2
-2
未找到文件。
oneflow/core/device/cpu_device_context.h
浏览文件 @
c5fb5df0
...
...
@@ -11,10 +11,10 @@ class CpuDeviceCtx final : public DeviceCtx {
CpuDeviceCtx
()
=
delete
;
~
CpuDeviceCtx
()
=
default
;
CpuDeviceCtx
(
C
hannel
<
std
::
function
<
void
()
>>*
chan
)
{
set_cpu_stream
(
chan
);
}
CpuDeviceCtx
(
C
puStream
*
val
)
{
set_cpu_stream
(
val
);
}
void
AddCallBack
(
std
::
function
<
void
()
>
callback
)
const
override
{
cpu_stream
()
->
Send
(
callback
);
cpu_stream
()
->
Send
Work
(
callback
);
}
private:
...
...
oneflow/core/device/cpu_stream.h
0 → 100644
浏览文件 @
c5fb5df0
#ifndef ONEFLOW_CORE_DEVICE_CPU_STREAM_H_
#define ONEFLOW_CORE_DEVICE_CPU_STREAM_H_
#include "oneflow/core/common/channel.h"
namespace
oneflow
{
class
CpuStream
final
{
public:
OF_DISALLOW_COPY_AND_MOVE
(
CpuStream
);
CpuStream
()
=
default
;
~
CpuStream
()
=
default
;
void
SendWork
(
std
::
function
<
void
()
>
work
)
{
CHECK_EQ
(
work_channel_
.
Send
(
work
),
0
);
}
// 0: success
// -1: fail
int
ReceiveWork
(
std
::
function
<
void
()
>*
work
)
{
return
work_channel_
.
Receive
(
work
);
}
void
CloseSendEnd
()
{
work_channel_
.
CloseSendEnd
();
}
void
CloseReceiveEnd
()
{
work_channel_
.
CloseReceiveEnd
();
}
private:
Channel
<
std
::
function
<
void
()
>>
work_channel_
;
};
}
// namespace oneflow
#endif // ONEFLOW_CORE_DEVICE_CPU_STREAM_H_
oneflow/core/device/device_context.h
浏览文件 @
c5fb5df0
#ifndef ONEFLOW_CORE_DEVICE_DEVICE_CONTEXT_H_
#define ONEFLOW_CORE_DEVICE_DEVICE_CONTEXT_H_
#include "oneflow/core/common/channel.h"
#include "oneflow/core/common/util.h"
#include "oneflow/core/device/cpu_stream.h"
namespace
oneflow
{
...
...
@@ -11,7 +10,7 @@ class DeviceCtx {
// OF_DISALLOW_COPY_AND_MOVE(DeviceCtx);
virtual
~
DeviceCtx
()
=
default
;
C
hannel
<
std
::
function
<
void
()
>>
*
cpu_stream
()
const
{
return
cpu_stream_
;
}
C
puStream
*
cpu_stream
()
const
{
return
cpu_stream_
;
}
const
cudaStream_t
&
cuda_stream
()
const
{
return
*
cuda_stream_
;
}
const
cublasHandle_t
&
cublas_handle
()
const
{
return
*
cublas_handle_
;
}
const
cudnnHandle_t
&
cudnn_handle
()
const
{
return
*
cudnn_handle_
;
}
...
...
@@ -25,15 +24,13 @@ class DeviceCtx {
cublas_handle_
(
nullptr
),
cudnn_handle_
(
nullptr
)
{}
void
set_cpu_stream
(
Channel
<
std
::
function
<
void
()
>>*
val
)
{
cpu_stream_
=
val
;
}
void
set_cpu_stream
(
CpuStream
*
val
)
{
cpu_stream_
=
val
;
}
void
set_cuda_stream
(
const
cudaStream_t
*
val
)
{
cuda_stream_
=
val
;
}
void
set_cublas_handle
(
const
cublasHandle_t
*
val
)
{
cublas_handle_
=
val
;
}
void
set_cudnn_handle
(
const
cudnnHandle_t
*
val
)
{
cudnn_handle_
=
val
;
}
private:
C
hannel
<
std
::
function
<
void
()
>>
*
cpu_stream_
;
C
puStream
*
cpu_stream_
;
const
cudaStream_t
*
cuda_stream_
;
const
cublasHandle_t
*
cublas_handle_
;
const
cudnnHandle_t
*
cudnn_handle_
;
...
...
oneflow/core/kernel/clone_kernel_test.cpp
浏览文件 @
c5fb5df0
...
...
@@ -125,7 +125,7 @@ void InitBn2BlobPtr(HashMap<std::string, Blob*>& bn2blob_ptr,
void
CPUStreamExec
(
int
out_num
,
std
::
function
<
Blob
*
(
const
std
::
string
&
)
>
fp
)
{
KernelCtx
ctx
;
ctx
.
device_ctx
=
new
CpuDeviceCtx
(
new
C
hannel
<
std
::
function
<
void
()
>>
);
ctx
.
device_ctx
=
new
CpuDeviceCtx
(
new
C
puStream
);
auto
clone_kernel
=
ConstructCloneKernel
<
DeviceType
::
kCPU
>
(
out_num
,
"clone_kernel_test"
);
...
...
@@ -136,7 +136,7 @@ void CPUStreamExec(int out_num, std::function<Blob*(const std::string&)> fp) {
std
::
function
<
void
()
>
work
;
// Both Forward and Backward receive out_num times
for
(
int
i
=
0
;
i
<
out_num
*
2
;
++
i
)
{
if
(
ctx
.
device_ctx
->
cpu_stream
()
->
Receive
(
&
work
)
==
0
)
{
work
();
}
if
(
ctx
.
device_ctx
->
cpu_stream
()
->
Receive
Work
(
&
work
)
==
0
)
{
work
();
}
}
});
cpu_thread
.
join
();
...
...
oneflow/core/kernel/innerproduct_kernel_test.cpp
浏览文件 @
c5fb5df0
...
...
@@ -107,7 +107,7 @@ void BuildKernelCtx(KernelCtx* ctx);
template
<
>
void
BuildKernelCtx
<
DeviceType
::
kCPU
>
(
KernelCtx
*
ctx
)
{
auto
cpu_stream
=
new
C
hannel
<
std
::
function
<
void
()
>>
;
auto
cpu_stream
=
new
C
puStream
;
ctx
->
device_ctx
=
new
CpuDeviceCtx
(
cpu_stream
);
}
...
...
@@ -153,7 +153,7 @@ void SyncStream<DeviceType::kCPU>(KernelCtx* ctx) {
auto
cpu_thread
=
std
::
thread
([
&
]
{
std
::
function
<
void
()
>
work
;
while
(
ctx
->
device_ctx
->
cpu_stream
()
->
Receive
(
&
work
)
==
0
)
{
work
();
}
while
(
ctx
->
device_ctx
->
cpu_stream
()
->
Receive
Work
(
&
work
)
==
0
)
{
work
();
}
});
cpu_thread
.
join
();
}
...
...
oneflow/core/kernel/kernel_util.cpp
浏览文件 @
c5fb5df0
...
...
@@ -11,13 +11,13 @@ class KernelUtil<DeviceType::kCPU, FloatingPointType> final {
static
void
Memcpy
(
const
KernelCtx
&
ctx
,
void
*
dst
,
const
void
*
src
,
size_t
sz
,
cudaMemcpyKind
kind
=
cudaMemcpyKind
::
cudaMemcpyHostToHost
)
{
ctx
.
device_ctx
->
cpu_stream
()
->
Send
(
ctx
.
device_ctx
->
cpu_stream
()
->
Send
Work
(
[
dst
,
src
,
sz
]()
{
memcpy
(
dst
,
src
,
sz
);
});
}
static
void
Memset
(
const
KernelCtx
&
ctx
,
void
*
dst
,
const
char
value
,
size_t
sz
)
{
ctx
.
device_ctx
->
cpu_stream
()
->
Send
(
ctx
.
device_ctx
->
cpu_stream
()
->
Send
Work
(
[
dst
,
value
,
sz
]()
{
memset
(
dst
,
value
,
sz
);
});
}
...
...
@@ -25,7 +25,7 @@ class KernelUtil<DeviceType::kCPU, FloatingPointType> final {
const
FloatingPointType
alpha
,
const
FloatingPointType
*
x
,
const
int
incx
,
FloatingPointType
*
y
,
const
int
incy
)
{
ctx
.
device_ctx
->
cpu_stream
()
->
Send
([
n
,
alpha
,
x
,
incx
,
y
,
incy
]()
{
ctx
.
device_ctx
->
cpu_stream
()
->
Send
Work
([
n
,
alpha
,
x
,
incx
,
y
,
incy
]()
{
cblas_axpy
(
n
,
alpha
,
x
,
incx
,
y
,
incy
);
});
}
...
...
@@ -33,7 +33,7 @@ class KernelUtil<DeviceType::kCPU, FloatingPointType> final {
static
void
BlasScal
(
const
KernelCtx
&
ctx
,
const
int
n
,
const
FloatingPointType
alpha
,
FloatingPointType
*
x
,
const
int
incx
)
{
ctx
.
device_ctx
->
cpu_stream
()
->
Send
(
ctx
.
device_ctx
->
cpu_stream
()
->
Send
Work
(
[
n
,
alpha
,
x
,
incx
]()
{
cblas_scal
(
n
,
alpha
,
x
,
incx
);
});
}
...
...
@@ -43,7 +43,7 @@ class KernelUtil<DeviceType::kCPU, FloatingPointType> final {
const
FloatingPointType
*
x
,
const
int
incx
,
const
FloatingPointType
beta
,
FloatingPointType
*
y
,
const
int
incy
)
{
ctx
.
device_ctx
->
cpu_stream
()
->
Send
([
=
]()
{
ctx
.
device_ctx
->
cpu_stream
()
->
Send
Work
([
=
]()
{
// Set col major to keep it as the same with cublas
cblas_gemv
(
CBLAS_ORDER
::
CblasColMajor
,
trans
,
m
,
n
,
alpha
,
a
,
lda
,
x
,
incx
,
beta
,
y
,
incy
);
...
...
@@ -58,7 +58,7 @@ class KernelUtil<DeviceType::kCPU, FloatingPointType> final {
const
FloatingPointType
*
b
,
const
int
ldb
,
const
FloatingPointType
beta
,
FloatingPointType
*
c
,
const
int
ldc
)
{
ctx
.
device_ctx
->
cpu_stream
()
->
Send
([
=
]()
{
ctx
.
device_ctx
->
cpu_stream
()
->
Send
Work
([
=
]()
{
cblas_gemm
(
order
,
trans_a
,
trans_b
,
m
,
n
,
k
,
alpha
,
a
,
lda
,
b
,
ldb
,
beta
,
c
,
ldc
);
});
...
...
@@ -68,20 +68,20 @@ class KernelUtil<DeviceType::kCPU, FloatingPointType> final {
const
FloatingPointType
*
x
,
const
int
incx
,
const
FloatingPointType
*
y
,
const
int
incy
,
FloatingPointType
*
result
)
{
ctx
.
device_ctx
->
cpu_stream
()
->
Send
(
ctx
.
device_ctx
->
cpu_stream
()
->
Send
Work
(
[
=
]()
{
*
result
=
cblas_dot
(
n
,
x
,
incx
,
y
,
incy
);
});
}
static
void
BlasSwap
(
const
KernelCtx
&
ctx
,
const
int
n
,
FloatingPointType
*
x
,
const
int
incx
,
FloatingPointType
*
y
,
const
int
incy
)
{
ctx
.
device_ctx
->
cpu_stream
()
->
Send
(
ctx
.
device_ctx
->
cpu_stream
()
->
Send
Work
(
[
=
]()
{
cblas_swap
(
n
,
x
,
incx
,
y
,
incy
);
});
}
static
void
BlasCopy
(
const
KernelCtx
&
ctx
,
const
int
n
,
const
FloatingPointType
*
x
,
const
int
incx
,
FloatingPointType
*
y
,
const
int
incy
)
{
ctx
.
device_ctx
->
cpu_stream
()
->
Send
(
ctx
.
device_ctx
->
cpu_stream
()
->
Send
Work
(
[
=
]()
{
cblas_copy
(
n
,
x
,
incx
,
y
,
incy
);
});
}
};
...
...
oneflow/core/kernel/model_save_kernel.cpp
浏览文件 @
c5fb5df0
...
...
@@ -13,7 +13,7 @@ void ModelSaveKernel<DeviceType::kCPU, FloatingPointType>::Forward(
for
(
const
std
::
string
&
ibn
:
op
()
->
input_bns
())
{
const
std
::
string
&
lbn
=
op
()
->
Lbn4BnInOp
(
ibn
);
Blob
*
blob_ptr
=
BnInOp2BlobPtr
(
ibn
);
kernel_ctx
.
device_ctx
->
cpu_stream
()
->
Send
([
=
]()
{
kernel_ctx
.
device_ctx
->
cpu_stream
()
->
Send
Work
([
=
]()
{
std
::
unique_ptr
<
PersistentOutStream
>
out_stream
=
snapshot
->
GetOutStream
(
lbn
,
parallel_id
);
out_stream
->
Write
(
...
...
oneflow/core/thread/cpu_thread.cpp
浏览文件 @
c5fb5df0
...
...
@@ -5,7 +5,7 @@ namespace oneflow {
CpuThread
::
CpuThread
()
{
cpu_device_
=
std
::
thread
([
this
]()
{
std
::
function
<
void
()
>
work
;
while
(
cpu_stream_
.
Receive
(
&
work
)
==
0
)
{
work
();
}
while
(
cpu_stream_
.
Receive
Work
(
&
work
)
==
0
)
{
work
();
}
});
mut_actor_thread
()
=
std
::
thread
([
this
]()
{
ThreadCtx
ctx
;
...
...
oneflow/core/thread/cpu_thread.h
浏览文件 @
c5fb5df0
#ifndef ONEFLOW_CORE_THREAD_CPU_THREAD_H_
#define ONEFLOW_CORE_THREAD_CPU_THREAD_H_
#include "oneflow/core/device/cpu_stream.h"
#include "oneflow/core/thread/thread.h"
namespace
oneflow
{
...
...
@@ -13,7 +14,7 @@ class CpuThread final : public Thread {
private:
std
::
thread
cpu_device_
;
C
hannel
<
std
::
function
<
void
()
>>
cpu_stream_
;
C
puStream
cpu_stream_
;
};
}
// namespace oneflow
...
...
oneflow/core/thread/thread_context.h
浏览文件 @
c5fb5df0
#ifndef ONEFLOW_CORE_THREAD_THREAD_CONTEXT_H_
#define ONEFLOW_CORE_THREAD_THREAD_CONTEXT_H_
#include "oneflow/core/
common/channel
.h"
#include "oneflow/core/
device/cpu_stream
.h"
namespace
oneflow
{
struct
ThreadCtx
{
ThreadCtx
()
:
cpu_stream
(
nullptr
),
copy_hd_cuda_stream
(
nullptr
)
{}
C
hannel
<
std
::
function
<
void
()
>>
*
cpu_stream
;
C
puStream
*
cpu_stream
;
const
cudaStream_t
*
copy_hd_cuda_stream
;
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录