未验证 提交 f669bd6b 编写于 作者: J Jiaying Zhao 提交者: GitHub

Merge pull request #1203 from smilejames/opencl

update conv3x3 and fix CLImageConverterNWBlock
......@@ -248,8 +248,8 @@ void CLImageConverterNWBlock::NCHWToImage(float *tensor, half_t *image,
for (int c = 0; c < C; c++) {
for (int h = 0; h < H; ++h) {
for (int w = 0; w < W; ++w) {
int index = 4 * c * (width * H) + 4 * (n / 4) * H * W + h * W * 4 +
w * 4 + (n % 4);
int index = 4 * c * (width * H) + 4 * h * width + 4 * W * (n / 4) +
w * 4 + n % 4;
if (n < N) {
image[index] = Float2Half(*p);
p++;
......@@ -283,8 +283,8 @@ void CLImageConverterNWBlock::ImageToNCHW(half_t *image, float *tensor,
for (int c = 0; c < C; c++) {
for (int h = 0; h < H; ++h) {
for (int w = 0; w < W; ++w) {
int index = 4 * c * (width * H) + 4 * (n / 4) * H * W + h * W * 4 +
w * 4 + (n % 4);
int index = 4 * c * (width * H) + 4 * h * width + 4 * W * (n / 4) +
w * 4 + n % 4;
*p = Half2Float(image[index]);
p++;
if (index >= (width * height * 4)) {
......
......@@ -150,8 +150,8 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
} else if (param->Filter()->dims()[2] == 3 &&
param->Filter()->dims()[3] == 3) {
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
param->Filter()->InitNImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl");
DLOG << " conv add bn relu conv_3x3";
......
......@@ -45,7 +45,7 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) {
} else if (param->Filter()->dims()[2] == 3 &&
param->Filter()->dims()[3] == 3) {
param->Filter()->InitCLImage(cl_helper_.CLContext(),
param->Filter()->InitNImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", "conv_add_kernel.cl");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册