From 13ab061f13d0f7aaa55c8eb4511278ab9ad3aaef Mon Sep 17 00:00:00 2001 From: liuruilong Date: Tue, 16 Oct 2018 22:24:23 +0800 Subject: [PATCH] update conv kernel --- CMakeLists.txt | 1 - src/framework/cl/cl_half.cpp | 6 ++ src/framework/cl/cl_half.h | 6 ++ src/framework/cl/cl_image.cpp | 23 ++++--- src/framework/cl/cl_image.h | 56 +++++++++------- src/framework/executor.cpp | 12 ++-- src/framework/operator.cpp | 4 +- src/operators/kernel/cl/batchnorm_kernel.cpp | 6 +- .../kernel/cl/cl_kernel/conv_kernel.cl | 1 + .../kernel/cl/conv_add_bn_relu_kernel.cpp | 12 ++-- src/operators/kernel/cl/conv_add_kernel.cpp | 6 +- src/operators/kernel/cl/conv_kernel.cpp | 14 +++- .../kernel/cl/depthwise_conv_kernel.cpp | 3 +- src/operators/kernel/cl/feed_kernel.cpp | 2 +- src/operators/kernel/cl/fetch_kernel.cpp | 67 ++++++++++--------- src/operators/kernel/cl/reshape_kernel.cpp | 4 +- tools/pre-commit.hooks/clang-format.hook | 2 +- 17 files changed, 133 insertions(+), 92 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 54f724df79..020a9179d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,7 +16,6 @@ file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm) file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h) include_directories(src/) - if(IS_IOS) set(CMAKE_CXX_FLAGS "-mfpu=neon -marm -fobjc-abi-version=2 -fobjc-arc -std=gnu++11 -stdlib=libc++ -O3 -s -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CXX_FLAGS}") else() diff --git a/src/framework/cl/cl_half.cpp b/src/framework/cl/cl_half.cpp index 40f94c9d4d..2877289325 100644 --- a/src/framework/cl/cl_half.cpp +++ b/src/framework/cl/cl_half.cpp @@ -16,6 +16,9 @@ limitations under the License. */ #include "framework/cl/cl_half.h" +namespace paddle_mobile { +namespace framework { + static const uint32_t mantissatable[2048] = { 0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34a00000, 0x34c00000, 0x34e00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, @@ -510,3 +513,6 @@ void HalfArray2FloatArray(half_t *h_array, float *f_array, int count) { f_array[i] = Half2Float(h_array[i]); } } + +} // namespace framework +} // namespace paddle_mobile diff --git a/src/framework/cl/cl_half.h b/src/framework/cl/cl_half.h index fc864912b0..9b05740f1e 100644 --- a/src/framework/cl/cl_half.h +++ b/src/framework/cl/cl_half.h @@ -15,6 +15,9 @@ limitations under the License. */ #pragma once #include +namespace paddle_mobile { +namespace framework { + typedef uint16_t half_t; half_t Float2Half(float f); @@ -24,3 +27,6 @@ float Half2Float(half_t h); void FloatArray2HalfArray(float *f_array, half_t *h_array, int count); void HalfArray2FloatArray(half_t *h_array, float *f_array, int count); + +} // namespace framework +} // namespace paddle_mobile diff --git a/src/framework/cl/cl_image.cpp b/src/framework/cl/cl_image.cpp index a0cbdb4309..8b0316af4f 100644 --- a/src/framework/cl/cl_image.cpp +++ b/src/framework/cl/cl_image.cpp @@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "cl_image.h" +#include "framework/cl/cl_image.h" + namespace paddle_mobile { namespace framework { void CLImageToTensor(CLImage *cl_image, Tensor *tensor, @@ -63,7 +64,7 @@ void CLImageToTensor(CLImage *cl_image, Tensor *tensor, } if (err != CL_SUCCESS) { - // TODO: error handling + CL_CHECK_ERRORS(err); } } void TensorToCLImage(const Tensor *tensor, CLImage *cl_image, @@ -97,7 +98,7 @@ void TensorToCLImage(const Tensor *tensor, CLImage *cl_image, err = clEnqueueReadImage(commandQueue, image, CL_TRUE, origin, region, 0, 0, imageData, 0, NULL, NULL); if (err != CL_SUCCESS) { - // TODO: error handling + CL_CHECK_ERRORS(err); } size_t i0 = 0; for (int n = 0; n < N; n++) { @@ -117,7 +118,7 @@ void TensorToCLImage(const Tensor *tensor, CLImage *cl_image, } } #ifdef PADDLE_MOBILE_DEBUG -Print &operator<<(Print &printer, const CLImage &cl_image){ +Print &operator<<(Print &printer, const CLImage &cl_image) { printer << " dims: " << cl_image.dims() << "\n"; int stride = cl_image.numel() / 20; stride = stride > 0 ? stride : 1; @@ -148,8 +149,8 @@ Print &operator<<(Print &printer, const CLImage &cl_image){ cl_mem image = cl_image.GetCLImage(); size_t origin[3] = {0, 0, 0}; size_t region[3] = {width, height, 1}; - err = clEnqueueReadImage(cl_image.CommandQueue(), image, CL_TRUE, origin, region, 0, 0, - imageData, 0, NULL, NULL); + err = clEnqueueReadImage(cl_image.CommandQueue(), image, CL_TRUE, origin, + region, 0, 0, imageData, 0, NULL, NULL); size_t i0 = 0; for (int n = 0; n < N; n++) { for (int c = 0; c < C; c++) { @@ -168,13 +169,13 @@ Print &operator<<(Print &printer, const CLImage &cl_image){ } if (err != CL_SUCCESS) { - // TODO: error handling + CL_CHECK_ERRORS(err); + } + for (int i = 0; i < cl_image.numel(); i += stride) { + printer << data[i] << " "; } - for (int i = 0; i < cl_image.numel(); i += stride) { - printer << data[i] << " "; - } return printer; - } +} #endif } // namespace framework } // namespace paddle_mobile diff --git a/src/framework/cl/cl_image.h b/src/framework/cl/cl_image.h index e478408341..cac203ccc7 100644 --- a/src/framework/cl/cl_image.h +++ b/src/framework/cl/cl_image.h @@ -46,27 +46,28 @@ class CLImage { /* * need call SetTensorData first * */ - void InitCLImage(cl_context context,cl_command_queue command_queue) { + void InitCLImage(cl_context context, cl_command_queue command_queue) { if (tensor_data_ == nullptr) { PADDLE_MOBILE_THROW_EXCEPTION(" need call SetTensorData first"); } if (tensor_dims_.size() <= 2) { - InitCLImage2C(context, command_queue,tensor_data_, tensor_dims_); + InitCLImage2C(context, command_queue, tensor_data_, tensor_dims_); } else { - InitCLImage(context, command_queue,tensor_data_, tensor_dims_); + InitCLImage(context, command_queue, tensor_data_, tensor_dims_); } delete[](tensor_data_); tensor_data_ = nullptr; initialized_ = true; } - void InitEmptyImage(cl_context context, cl_command_queue command_queue,const DDim &dim) { + void InitEmptyImage(cl_context context, cl_command_queue command_queue, + const DDim &dim) { if (tensor_data_ != nullptr) { PADDLE_MOBILE_THROW_EXCEPTION( " empty image tensor data shouldn't have value"); } DLOG << " init empty image "; - InitCLImage(context, command_queue,nullptr, dim); + InitCLImage(context, command_queue, nullptr, dim); initialized_ = true; } @@ -93,7 +94,7 @@ class CLImage { * */ inline size_t HeightOfOneBlock() const { return height_of_one_block_; } - inline cl_command_queue CommandQueue() const{ return command_queue_;} + inline cl_command_queue CommandQueue() const { return command_queue_; } /* * resize original tensor dim @@ -124,7 +125,8 @@ class CLImage { const DDim &dims() const { return tensor_dims_; } private: - void InitCLImage2C(cl_context context, cl_command_queue command_queue,float *tensor_data, const DDim &dim) { + void InitCLImage2C(cl_context context, cl_command_queue command_queue, + float *tensor_data, const DDim &dim) { command_queue_ = command_queue; assert(dim.size() <= 2); int tdim[2] = {1, 1}; @@ -141,43 +143,44 @@ class CLImage { imageData.reset(new half_t[width * height * 4]); for (int h = 0; h < tdim[0]; h++) { for (int w = 0; w < tdim[1]; w++) { - imageData[(h * width + w / 4) * 4 + (w % 4)] = Float2Half(tensor_data[h * tdim[1] + w]); + imageData[(h * width + w / 4) * 4 + (w % 4)] = + Float2Half(tensor_data[h * tdim[1] + w]); } } } InitCLImage(context, width, height, imageData.get()); } - void InitCLImage(cl_context context,int width, int height, void *data) { + void InitCLImage(cl_context context, int width, int height, void *data) { cl_image_format cf = {.image_channel_order = CL_RGBA, .image_channel_data_type = CL_HALF_FLOAT}; cl_image_desc cid = { - .image_type = CL_MEM_OBJECT_IMAGE2D, - .image_width = width, - .image_height = height, - .image_depth = 1, - .image_array_size = 1, - .image_row_pitch = 0, - .image_slice_pitch = 0, - .num_mip_levels = 0, - .num_samples = 0, - // .buffer = nullptr + .image_type = CL_MEM_OBJECT_IMAGE2D, + .image_width = width, + .image_height = height, + .image_depth = 1, + .image_array_size = 1, + .image_row_pitch = 0, + .image_slice_pitch = 0, + .num_mip_levels = 0, + .num_samples = 0, + // .buffer = nullptr }; cid.buffer = nullptr; cl_int err; cl_image_ = clCreateImage( - context, CL_MEM_READ_WRITE | (data ? CL_MEM_COPY_HOST_PTR : 0), - &cf, // const cl_image_format *image_format - &cid, // const cl_image_desc *image_desc - data, // void *host_ptr - &err - ); + context, CL_MEM_READ_WRITE | (data ? CL_MEM_COPY_HOST_PTR : 0), + &cf, // const cl_image_format *image_format + &cid, // const cl_image_desc *image_desc + data, // void *host_ptr + &err); if (err != CL_SUCCESS) { CL_CHECK_ERRORS(err); PADDLE_MOBILE_THROW_EXCEPTION(" create image 2d error "); } } - void InitCLImage(cl_context context, cl_command_queue command_queue,float *tensor_data, const DDim &dim) { + void InitCLImage(cl_context context, cl_command_queue command_queue, + float *tensor_data, const DDim &dim) { DLOG << " tensor dim: " << dim; // NCHW -> [W * (C+3)/4, H * N] tensor_dims_ = dim; @@ -207,6 +210,7 @@ class CLImage { image_width_ = width; image_height_ = height; image_dims_ = make_ddim({image_width_, image_height_}); + c_block_ = W / width; std::unique_ptr imageData{}; int count = 0; diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp index 43725f6b2c..7980a2d1f6 100644 --- a/src/framework/executor.cpp +++ b/src/framework/executor.cpp @@ -37,7 +37,7 @@ limitations under the License. */ #include "framework/cl/cl_image.h" #endif -int debug_to = 115; +int debug_to = 3; namespace paddle_mobile { namespace framework { @@ -953,13 +953,14 @@ void Executor::InitMemory() { if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { auto cl_image = var->template GetMutable(); cl_context context = program_.scope->GetCLScpoe()->Context(); - cl_command_queue command_queue = program_.scope->GetCLScpoe()->CommandQueue(); + cl_command_queue command_queue = + program_.scope->GetCLScpoe()->CommandQueue(); const framework::TensorDesc &desc = var_desc->Tensor_desc(); // framework::DDim ddim = framework::make_ddim(desc.Dims()); framework::DDim ddim = cl_image->dims(); DLOG << var_desc->Name(); - cl_image->InitEmptyImage(context,command_queue, ddim); + cl_image->InitEmptyImage(context, command_queue, ddim); } } } @@ -1011,11 +1012,12 @@ void Executor::InitCombineMemory() { } else { auto cl_image = var->template GetMutable(); cl_context context = program_.scope->GetCLScpoe()->Context(); - cl_command_queue command_queue = program_.scope->GetCLScpoe()->CommandQueue(); + cl_command_queue command_queue = + program_.scope->GetCLScpoe()->CommandQueue(); const framework::TensorDesc &desc = var_desc->Tensor_desc(); framework::DDim ddim = cl_image->dims(); // framework::DDim ddim = framework::make_ddim(desc.Dims()); - cl_image->InitEmptyImage(context, command_queue,ddim); + cl_image->InitEmptyImage(context, command_queue, ddim); } } } diff --git a/src/framework/operator.cpp b/src/framework/operator.cpp index 75674f3cf0..e9596debe7 100644 --- a/src/framework/operator.cpp +++ b/src/framework/operator.cpp @@ -73,7 +73,7 @@ void OperatorBase::Run() { } else { CLImage *cl_image = vari->template GetMutable(); if (cl_image) { - DLOG << type_ << " input- " << key << "="<<*cl_image; + DLOG << type_ << " input- " << key << "=" << *cl_image; } } @@ -98,7 +98,7 @@ void OperatorBase::Run() { } else { CLImage *cl_image = vari->template GetMutable(); if (cl_image) { - DLOG << type_ << " output- " << key << "="<<*cl_image ; + DLOG << type_ << " output- " << key << "=" << *cl_image; } } diff --git a/src/operators/kernel/cl/batchnorm_kernel.cpp b/src/operators/kernel/cl/batchnorm_kernel.cpp index 136546a615..b3ef202782 100644 --- a/src/operators/kernel/cl/batchnorm_kernel.cpp +++ b/src/operators/kernel/cl/batchnorm_kernel.cpp @@ -49,11 +49,13 @@ bool BatchNormKernel::Init(BatchNormParam *param) { framework::CLImage *new_scale = new framework::CLImage(); new_scale->SetTensorData(new_scale_ptr, variance->dims()); - new_scale->InitCLImage(this->cl_helper_.CLContext(),this->cl_helper_.CLCommandQueue()); + new_scale->InitCLImage(this->cl_helper_.CLContext(), + this->cl_helper_.CLCommandQueue()); framework::CLImage *new_bias = new framework::CLImage(); new_bias->SetTensorData(new_bias_ptr, variance->dims()); - new_bias->InitCLImage(this->cl_helper_.CLContext(),this->cl_helper_.CLCommandQueue()); + new_bias->InitCLImage(this->cl_helper_.CLContext(), + this->cl_helper_.CLCommandQueue()); param->SetNewScale(new_scale); param->SetNewBias(new_bias); diff --git a/src/operators/kernel/cl/cl_kernel/conv_kernel.cl b/src/operators/kernel/cl/cl_kernel/conv_kernel.cl index b45e9738f8..2b9936e94f 100644 --- a/src/operators/kernel/cl/cl_kernel/conv_kernel.cl +++ b/src/operators/kernel/cl/cl_kernel/conv_kernel.cl @@ -19,6 +19,7 @@ __kernel void conv_3x3(__private const int global_size_dim0, __private const int global_size_dim2, __read_only image2d_t input_image, __read_only image2d_t filter, + #ifdef BIASE __read_only image2d_t bias, #endif diff --git a/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp index bc61e86d4b..610ea50efd 100644 --- a/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp @@ -29,8 +29,10 @@ bool ConvAddBNReluKernel::Init( param->Paddings()[0] == param->Paddings()[1], "need equal"); - param->Filter()->InitCLImage(cl_helper_.CLContext(),cl_helper_.CLCommandQueue()); - param->Bias()->InitCLImage(cl_helper_.CLContext(),cl_helper_.CLCommandQueue()); + param->Filter()->InitCLImage(cl_helper_.CLContext(), + cl_helper_.CLCommandQueue()); + param->Bias()->InitCLImage(cl_helper_.CLContext(), + cl_helper_.CLCommandQueue()); // const CL *mean = param->InputMean(); const framework::CLImage *mean = param->InputMean(); @@ -62,12 +64,14 @@ bool ConvAddBNReluKernel::Init( framework::CLImage *new_scale = new framework::CLImage(); new_scale->SetTensorData(new_scale_ptr, variance->dims()); - new_scale->InitCLImage(this->cl_helper_.CLContext(),cl_helper_.CLCommandQueue()); + new_scale->InitCLImage(this->cl_helper_.CLContext(), + cl_helper_.CLCommandQueue()); framework::CLImage *new_bias = new framework::CLImage(); new_bias->SetTensorData(new_bias_ptr, variance->dims()); - new_bias->InitCLImage(this->cl_helper_.CLContext(),cl_helper_.CLCommandQueue()); + new_bias->InitCLImage(this->cl_helper_.CLContext(), + cl_helper_.CLCommandQueue()); param->SetNewScale(new_scale); param->SetNewBias(new_bias); diff --git a/src/operators/kernel/cl/conv_add_kernel.cpp b/src/operators/kernel/cl/conv_add_kernel.cpp index 16e1a889df..f2e1e7a5d7 100644 --- a/src/operators/kernel/cl/conv_add_kernel.cpp +++ b/src/operators/kernel/cl/conv_add_kernel.cpp @@ -25,8 +25,10 @@ bool ConvAddKernel::Init(FusionConvAddParam *param) { param->Filter()->dims()[2] == param->Filter()->dims()[3] && param->Paddings()[0] == param->Paddings()[1], "need equal"); - param->Filter()->InitCLImage(cl_helper_.CLContext(),this->cl_helper_.CLCommandQueue()); - param->Bias()->InitCLImage(cl_helper_.CLContext(),this->cl_helper_.CLCommandQueue()); + param->Filter()->InitCLImage(cl_helper_.CLContext(), + this->cl_helper_.CLCommandQueue()); + param->Bias()->InitCLImage(cl_helper_.CLContext(), + this->cl_helper_.CLCommandQueue()); int offset = static_cast(param->Filter()->dims()[2]) / 2 - static_cast(param->Paddings()[1]); diff --git a/src/operators/kernel/cl/conv_kernel.cpp b/src/operators/kernel/cl/conv_kernel.cpp index 0251db1f83..27ebe18baf 100644 --- a/src/operators/kernel/cl/conv_kernel.cpp +++ b/src/operators/kernel/cl/conv_kernel.cpp @@ -26,7 +26,8 @@ bool ConvKernel::Init(ConvParam *param) { param->Paddings()[0] == param->Paddings()[1], "need equal"); - param->Filter()->InitCLImage(cl_helper_.CLContext(),this->cl_helper_.CLCommandQueue()); + param->Filter()->InitCLImage(cl_helper_.CLContext(), + this->cl_helper_.CLCommandQueue()); int offset = static_cast(param->Filter()->dims()[2]) / 2 - static_cast(param->Paddings()[1]); @@ -95,6 +96,17 @@ void ConvKernel::Compute(const ConvParam ¶m) { cl_int status; DLOG << " begin set kernel arg "; + DLOG << " c block " << c_block; + DLOG << " w " << w; + DLOG << " nh " << nh; + DLOG << " stride " << stride; + DLOG << " offset " << offset; + DLOG << " input_c " << input_c; + DLOG << " dilation " << dilation; + DLOG << " input width " << input_width; + DLOG << " input height " << input_height; + DLOG << " output width " << output_width; + DLOG << " output height " << output_height; status = clSetKernelArg(kernel, 0, sizeof(int), &c_block); CL_CHECK_ERRORS(status); diff --git a/src/operators/kernel/cl/depthwise_conv_kernel.cpp b/src/operators/kernel/cl/depthwise_conv_kernel.cpp index 65c31dca3d..ad0c9958f8 100644 --- a/src/operators/kernel/cl/depthwise_conv_kernel.cpp +++ b/src/operators/kernel/cl/depthwise_conv_kernel.cpp @@ -27,7 +27,8 @@ bool DepthwiseConvKernel::Init(ConvParam *param) { param->Filter()->dims()[2] == param->Filter()->dims()[3] && param->Paddings()[0] == param->Paddings()[1], "need equal"); - param->Filter()->InitCLImage(cl_helper_.CLContext(),this->cl_helper_.CLCommandQueue()); + param->Filter()->InitCLImage(cl_helper_.CLContext(), + this->cl_helper_.CLCommandQueue()); int offset = static_cast(param->Filter()->dims()[2]) / 2 - static_cast(param->Paddings()[1]); param->SetOffset(offset); diff --git a/src/operators/kernel/cl/feed_kernel.cpp b/src/operators/kernel/cl/feed_kernel.cpp index ba90e75ca8..f0587d69df 100644 --- a/src/operators/kernel/cl/feed_kernel.cpp +++ b/src/operators/kernel/cl/feed_kernel.cpp @@ -30,7 +30,7 @@ void FeedKernel::Compute(const FeedParam ¶m) { cl_int status; auto output = param.Out(); const Tensor *input = param.InputX(); - DLOG<<*input; + DLOG << *input; const float *input_data = input->data(); int numel = input->numel(); cl_mem cl_image = output->GetCLImage(); diff --git a/src/operators/kernel/cl/fetch_kernel.cpp b/src/operators/kernel/cl/fetch_kernel.cpp index 49f94ad895..ceaf2f365a 100644 --- a/src/operators/kernel/cl/fetch_kernel.cpp +++ b/src/operators/kernel/cl/fetch_kernel.cpp @@ -19,44 +19,45 @@ namespace operators { template <> bool FetchKernel::Init(FetchParam *param) { -// this->cl_helper_.AddKernel("fetch", "fetch_kernel.cl"); + // this->cl_helper_.AddKernel("fetch", "fetch_kernel.cl"); return true; } template <> void FetchKernel::Compute(const FetchParam ¶m) { -// auto kernel = this->cl_helper_.KernelAt(0); -// auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.InputX()); -// -// auto input = param.InputX()->GetCLImage(); -// auto *out = param.Out(); -// -// const auto &dims = param.InputX()->dims(); -// const int N = dims[0]; -// const int C = dims[1]; -// const int in_height = dims[2]; -// const int in_width = dims[3]; -// -// int size_ch = in_height * in_width; -// int size_block = size_ch * 4; -// int size_batch = size_ch * C; -// -// // need create outputBuffer -// cl_image_format imageFormat; -// imageFormat.image_channel_order = CL_RGBA; -// imageFormat.image_channel_data_type = CL_FLOAT; -// cl_mem outputBuffer; -// -// clSetKernelArg(kernel, 0, sizeof(int), &in_height); -// clSetKernelArg(kernel, 1, sizeof(int), &in_width); -// clSetKernelArg(kernel, 2, sizeof(int), &size_ch); -// clSetKernelArg(kernel, 3, sizeof(int), &size_block); -// clSetKernelArg(kernel, 4, sizeof(int), &size_batch); -// clSetKernelArg(kernel, 5, sizeof(cl_mem), &input); -// clSetKernelArg(kernel, 6, sizeof(cl_mem), &outputBuffer); -// -// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, -// default_work_size.data(), NULL, 0, NULL, NULL); + // auto kernel = this->cl_helper_.KernelAt(0); + // auto default_work_size = + // this->cl_helper_.DefaultWorkSize(*param.InputX()); + // + // auto input = param.InputX()->GetCLImage(); + // auto *out = param.Out(); + // + // const auto &dims = param.InputX()->dims(); + // const int N = dims[0]; + // const int C = dims[1]; + // const int in_height = dims[2]; + // const int in_width = dims[3]; + // + // int size_ch = in_height * in_width; + // int size_block = size_ch * 4; + // int size_batch = size_ch * C; + // + // // need create outputBuffer + // cl_image_format imageFormat; + // imageFormat.image_channel_order = CL_RGBA; + // imageFormat.image_channel_data_type = CL_FLOAT; + // cl_mem outputBuffer; + // + // clSetKernelArg(kernel, 0, sizeof(int), &in_height); + // clSetKernelArg(kernel, 1, sizeof(int), &in_width); + // clSetKernelArg(kernel, 2, sizeof(int), &size_ch); + // clSetKernelArg(kernel, 3, sizeof(int), &size_block); + // clSetKernelArg(kernel, 4, sizeof(int), &size_batch); + // clSetKernelArg(kernel, 5, sizeof(cl_mem), &input); + // clSetKernelArg(kernel, 6, sizeof(cl_mem), &outputBuffer); + // + // clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, + // default_work_size.data(), NULL, 0, NULL, NULL); } template class FetchKernel; diff --git a/src/operators/kernel/cl/reshape_kernel.cpp b/src/operators/kernel/cl/reshape_kernel.cpp index 1726eca9e6..b0d1537da2 100644 --- a/src/operators/kernel/cl/reshape_kernel.cpp +++ b/src/operators/kernel/cl/reshape_kernel.cpp @@ -37,10 +37,10 @@ void ReshapeKernel::Compute(const ReshapeParam ¶m) { int dims[4] = {1, 1, 1, 1}; int odims[4] = {1, 1, 1, 1}; for (int i = 0; i < inputDim.size(); i++) { - dims[4-inputDim.size()+i] = inputDim[i]; + dims[4 - inputDim.size() + i] = inputDim[i]; } for (int i = 0; i < outputDim.size(); i++) { - odims[4-outputDim.size()+i] = outputDim[i]; + odims[4 - outputDim.size() + i] = outputDim[i]; } clSetKernelArg(kernel, 2, sizeof(cl_int), &dims); clSetKernelArg(kernel, 3, sizeof(cl_int), &dims[1]); diff --git a/tools/pre-commit.hooks/clang-format.hook b/tools/pre-commit.hooks/clang-format.hook index ece9ebc598..92377d2dd6 100644 --- a/tools/pre-commit.hooks/clang-format.hook +++ b/tools/pre-commit.hooks/clang-format.hook @@ -17,7 +17,7 @@ shift perl -i -pe 's|^\s+#pragma\s+omp|// #pragma omp|' "$@" ( # remove clang format ios_io folder -flist=$(echo "$@" | perl -pe 's|src/ios_io/[^ ]*||') +flist=$(echo "$@" | perl -pe 's|src/io/ios_io/[^ ]*||') clang-format -i $flist ) perl -i -pe 's|// ||' "$@" -- GitLab