Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
06fa79aa
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
06fa79aa
编写于
3月 23, 2018
作者:
U
Unknown
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix conflicts
上级
fb8d09a8
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
0 addition
and
178 deletion
+0
-178
mace/kernels/opencl/cl/space_to_depth.cl
mace/kernels/opencl/cl/space_to_depth.cl
+0
-25
mace/kernels/opencl/space_to_depth_opencl.cc
mace/kernels/opencl/space_to_depth_opencl.cc
+0
-77
mace/kernels/space_to_depth.h
mace/kernels/space_to_depth.h
+0
-76
未找到文件。
mace/kernels/opencl/cl/space_to_depth.cl
已删除
100644 → 0
浏览文件 @
fb8d09a8
#
include
<common.h>
__kernel
void
space_to_depth
(
__read_only
image2d_t
input,
__private
const
int
block_size,
__private
const
int
input_depth,
__write_only
image2d_t
output
)
{
const
int
d
=
get_global_id
(
0
)
;
const
int
w
=
get_global_id
(
1
)
;
const
int
h
=
get_global_id
(
2
)
;
const
int
input_width
=
get_global_size
(
1
)
;
const
int
in_pos
=
mad24
(
d,
input_width,
w
)
;
const
int
output_width
=
input_width
/
block_size
;
const
int
out_h
=
h
/
block_size
;
const
int
offset_h
=
h
%
block_size
;
const
int
out_w
=
w
/
block_size
;
const
int
offset_w
=
w
%
block_size
;
const
int
offset_d
=
(
offset_h
*
block_size
+
offset_w
)
*
input_depth
;
const
int
out_d
=
d
+
offset_d
;
const
int
out_pos
=
mad24
(
out_d,
output_width,
out_w
)
;
DATA_TYPE4
in_data
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_pos,
h
))
;
WRITE_IMAGET
(
output,
(
int2
)(
out_pos,
out_h
)
,
in_data
)
;
}
mace/kernels/opencl/space_to_depth_opencl.cc
已删除
100644 → 0
浏览文件 @
fb8d09a8
//
// Copyright (c) 2018 XiaoMi All rights reserved.
//
#include "mace/kernels/space_to_depth.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/opencl/helper.h"
#include "mace/utils/utils.h"
#include "mace/utils/tuner.h"
namespace
mace
{
namespace
kernels
{
template
<
typename
T
>
void
SpaceToDepthOpFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
index_t
batch_size
=
input
->
dim
(
0
);
const
index_t
input_height
=
input
->
dim
(
1
);
const
index_t
input_width
=
input
->
dim
(
2
);
const
index_t
input_depth
=
input
->
dim
(
3
);
const
index_t
output_height
=
input_height
/
block_size_
;
const
index_t
output_width
=
input_width
/
block_size_
;
const
index_t
output_depth
=
input_depth
*
block_size_
*
block_size_
;
std
::
vector
<
index_t
>
output_shape
=
{
batch_size
,
output_height
,
output_width
,
output_depth
};
std
::
vector
<
size_t
>
image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
&
image_shape
);
output
->
ResizeImage
(
output_shape
,
image_shape
);
const
int
input_depth_blocks
=
RoundUpDiv4
(
input_depth
);
if
(
kernel_
.
get
()
==
nullptr
)
{
auto
runtime
=
OpenCLRuntime
::
Global
();
std
::
set
<
std
::
string
>
built_options
;
std
::
string
kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
"space_to_depth"
);
built_options
.
emplace
(
"-Dspace_to_depth="
+
kernel_name
);
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
kernel_
=
runtime
->
BuildKernel
(
"space_to_depth"
,
kernel_name
,
built_options
);
}
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
block_size_
);
kernel_
.
setArg
(
idx
++
,
input_depth_blocks
);
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
input
->
shape
();
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
input_depth_blocks
),
static_cast
<
uint32_t
>
(
input_width
),
static_cast
<
uint32_t
>
(
input_height
*
batch_size
)};
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
"space_to_depth_opencl_kernel_"
<<
input
->
dim
(
0
)
<<
"_"
<<
input
->
dim
(
1
)
<<
"_"
<<
input
->
dim
(
2
)
<<
"_"
<<
input
->
dim
(
3
);
TuningOrRun3DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
}
template
struct
SpaceToDepthOpFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
SpaceToDepthOpFunctor
<
DeviceType
::
OPENCL
,
half
>;
}
// namespace kernels
}
// namespace mace
mace/kernels/space_to_depth.h
已删除
100644 → 0
浏览文件 @
fb8d09a8
//
// Created by liutuo on 18-3-20.
//
#ifndef MACE_KERNELS_SPACE_TO_DEPTH_H
#define MACE_KERNELS_SPACE_TO_DEPTH_H
#include "mace/core/future.h"
#include "mace/core/tensor.h"
namespace
mace
{
namespace
kernels
{
template
<
DeviceType
D
,
typename
T
>
struct
SpaceToDepthOpFunctor
{
explicit
SpaceToDepthOpFunctor
(
const
int
block_size
)
:
block_size_
(
block_size
)
{}
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
const
int
batch_size
=
input
->
dim
(
0
);
const
int
input_height
=
input
->
dim
(
1
);
const
int
input_width
=
input
->
dim
(
2
);
const
int
input_depth
=
input
->
dim
(
3
);
const
index_t
output_depth
=
input_depth
*
block_size_
*
block_size_
;
const
index_t
output_width
=
input_width
/
block_size_
;
const
index_t
output_height
=
input_height
/
block_size_
;
std
::
vector
<
index_t
>
output_shape
=
{
batch_size
,
output_height
,
output_width
,
output_depth
};
output
->
Resize
(
output_shape
);
Tensor
::
MappingGuard
logits_guard
(
input
);
Tensor
::
MappingGuard
output_guard
(
output
);
const
T
*
input_ptr
=
input
->
data
<
T
>
();
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
#pragma omp parallel for
for
(
int
b
=
0
;
b
<
batch_size
;
++
b
)
{
for
(
int
h
=
0
;
h
<
input_height
;
++
h
)
{
const
int
out_h
=
h
/
block_size_
;
const
int
offset_h
=
(
h
%
block_size_
);
for
(
int
w
=
0
;
w
<
input_width
;
++
w
)
{
const
int
out_w
=
w
/
block_size_
;
const
int
offset_w
=
(
w
%
block_size_
);
const
int
offset_d
=
(
offset_h
*
block_size_
+
offset_w
)
*
input_depth
;
for
(
int
d
=
0
;
d
<
input_depth
;
++
d
)
{
const
int
out_d
=
d
+
offset_d
;
const
int
o_index
=
((
b
*
output_height
+
out_h
)
*
output_width
+
out_w
)
*
output_depth
+
out_d
;
const
int
i_index
=
((
b
*
input_height
+
h
)
*
input_width
+
w
)
*
input_depth
+
d
;
output_ptr
[
o_index
]
=
input_ptr
[
i_index
];
}
}
}
}
}
const
int
block_size_
;
};
template
<
typename
T
>
struct
SpaceToDepthOpFunctor
<
DeviceType
::
OPENCL
,
T
>
{
SpaceToDepthOpFunctor
(
const
int
block_size
)
:
block_size_
(
block_size
)
{}
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
);
cl
::
Kernel
kernel_
;
const
int
block_size_
;
std
::
vector
<
index_t
>
input_shape_
;
};
}
// namespace kernels
}
// namespace mace
#endif //MACE_KERNELS_SPACE_TO_DEPTH_H
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录