未验证 提交 cf70102b 编写于 作者: J Jiaying Zhao 提交者: GitHub

optimize GPU conv performance and structure (#1595)

* optimize GPU conv performance and structure

* add CL macro for test_conv_gpu

* fix build failure
上级 619acd23
......@@ -103,9 +103,9 @@ class CLEngine {
return std::move(event_ptr);
}
bool BuildProgram(cl_program program) {
bool BuildProgram(cl_program program, const std::string &options = "") {
cl_int status;
std::string path = "-cl-fast-relaxed-math -I " +
std::string path = options + " -cl-fast-relaxed-math -I " +
CLEngine::Instance()->GetCLPath() + "/cl_kernel";
status = clBuildProgram(program, 0, 0, path.c_str(), 0, 0);
......@@ -149,7 +149,7 @@ class CLEngine {
cl_int status_;
std::string cl_path_;
std::string cl_path_ = "/data/local/tmp/bin";
std::unique_ptr<_cl_program, CLProgramDeleter> program_;
std::unique_ptr<_cl_context, CLContextDeleter> context_ = nullptr;
......
......@@ -32,9 +32,10 @@ class CLHelper {
explicit CLHelper(CLScope *scope) : scope_(scope) {}
void AddKernel(const std::string &kernel_name, const std::string &file_name) {
void AddKernel(const std::string &kernel_name, const std::string &file_name,
const std::string &options = "") {
DLOG << " begin add kernel ";
auto kernel = scope_->GetKernel(kernel_name, file_name);
auto kernel = scope_->GetKernel(kernel_name, file_name, options);
DLOG << " add kernel ing ";
kernels.emplace_back(std::move(kernel));
}
......
......@@ -146,6 +146,20 @@ class CLImage {
DLOG << " end init cl image";
}
void InitEmpty(cl_context context, cl_command_queue command_queue,
const DDim &image_dims) {
DLOG << " to get image dims ";
image_dims_ = image_dims;
DLOG << " end get image dims " << image_dims_;
InitCLImage(context, image_dims_[0], image_dims_[1], nullptr);
command_queue_ = command_queue;
cl_event_ = CLEngine::Instance()->CreateEvent(context);
initialized_ = true;
DLOG << " end init cl image";
}
cl_mem GetCLImage() const { return cl_image_.get(); }
const DDim &ImageDims() const { return image_dims_; }
......
......@@ -426,5 +426,25 @@ void CLImageConverterNormal::ImageToNCHW(half_t *image, float *tensor,
default_converter.ImageToNCHW(image, tensor, image_dim, tensor_dim);
}
const DDim &CLImageConverterWinoTransWeight::InitImageDimInfoWith(
const DDim &tensor_dim) {
PADDLE_MOBILE_ENFORCE(tensor_dim.size() == 4, " tensor dim is not 4");
size_t N, C, H, W;
N = tensor_dim[0];
C = tensor_dim[1];
H = tensor_dim[2];
W = tensor_dim[3];
size_t width = (C + 3) / 4;
size_t height = N * 16; // N * (wino_blk_size + 2) * (wino_blk_size + 2)
return make_ddim({width, height});
}
void CLImageConverterWinoTransWeight::NCHWToImage(float *tensor, half_t *image,
const DDim &tensor_dim) {}
void CLImageConverterWinoTransWeight::ImageToNCHW(half_t *image, float *tensor,
const DDim &image_dim,
const DDim &tensor_dim) {}
} // namespace framework
} // namespace paddle_mobile
......@@ -101,5 +101,13 @@ class CLImageConverterDWBlock : public CLImageConverterBase {
const DDim &tensor_dim);
};
class CLImageConverterWinoTransWeight : public CLImageConverterBase {
public:
const DDim &InitImageDimInfoWith(const DDim &tensor_dim);
void NCHWToImage(float *tensor, half_t *image, const DDim &tensor_dim);
void ImageToNCHW(half_t *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim);
};
} // namespace framework
} // namespace paddle_mobile
......@@ -37,9 +37,10 @@ class CLScope {
cl_command_queue CommandQueue() { return command_queue_; }
std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel(
const std::string &kernel_name, const std::string &file_name) {
const std::string &kernel_name, const std::string &file_name,
const std::string &options) {
DLOG << " to get program " << file_name;
auto program = Program(file_name);
auto program = Program(file_name, options);
DLOG << " end get program ~ ";
DLOG << " to create kernel: " << kernel_name;
std::unique_ptr<_cl_kernel, CLKernelDeleter> kernel(
......@@ -51,8 +52,12 @@ class CLScope {
cl_context Context() { return context_; }
cl_program Program(const std::string &file_name) {
auto it = programs_.find(file_name);
cl_program Program(const std::string &file_name, const std::string &options) {
std::string program_key = file_name;
if (!options.empty()) {
program_key += options;
}
auto it = programs_.find(program_key);
if (it != programs_.end()) {
return it->second.get();
}
......@@ -61,13 +66,13 @@ class CLScope {
context_,
CLEngine::Instance()->GetCLPath() + "/cl_kernel/" + file_name);
DLOG << " --- begin build program -> " << file_name << " --- ";
CLEngine::Instance()->BuildProgram(program.get());
DLOG << " --- end build program -> " << file_name << " --- ";
DLOG << " --- begin build program -> " << program_key << " --- ";
CLEngine::Instance()->BuildProgram(program.get(), options);
DLOG << " --- end build program -> " << program_key << " --- ";
programs_[file_name] = std::move(program);
programs_[program_key] = std::move(program);
return programs_[file_name].get();
return programs_[program_key].get();
}
private:
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/cl/cl-kernel-func/conv_func.h"
#include <vector>
#include "framework/cl/cl_image_converter.h"
#include "framework/cl/cl_tensor.h"
......@@ -20,19 +21,23 @@ namespace paddle_mobile {
namespace operators {
template <>
void winograd_transform_weight<4, 3>(framework::CLHelper &cl_helper,
framework::CLImage &weight){};
void winograd_transform_weight<4, 3>(framework::CLHelper *cl_helper,
framework::CLImage *weight) {}
template <>
void WinogradConv3x3<4, 3>(framework::CLHelper &cl_helper,
const ConvParam<GPU_CL> &param) {}
void WinogradConv3x3<4, 3>(framework::CLHelper *cl_helper,
const ConvParam<GPU_CL> &param, bool ifRelu,
const framework::CLImage *biase,
const framework::CLImage *new_scale,
const framework::CLImage *new_bias) {}
void ConvAddBnRelu(framework::CLHelper &cl_helper,
void ConvAddBnRelu(framework::CLHelper *cl_helper,
const ConvParam<GPU_CL> &param, bool ifRelu,
const CLImage *biase, const CLImage *new_scale,
const CLImage *new_bias) {
auto kernel = cl_helper.KernelAt(0);
auto default_work_size = cl_helper.DefaultWorkSize(*param.Output());
const framework::CLImage *biase,
const framework::CLImage *new_scale,
const framework::CLImage *new_bias) {
auto kernel = cl_helper->KernelAt(0);
auto default_work_size = cl_helper->DefaultWorkSize(*param.Output());
int c_block = default_work_size[0];
int w = default_work_size[1];
int nh = default_work_size[2];
......@@ -137,7 +142,7 @@ void ConvAddBnRelu(framework::CLHelper &cl_helper,
static_cast<const uint32_t>(maped_w),
static_cast<const uint32_t>(default_work_size.data()[2])};
status = clEnqueueNDRangeKernel(cl_helper.CLCommandQueue(), kernel,
status = clEnqueueNDRangeKernel(cl_helper->CLCommandQueue(), kernel,
default_work_size.size(), NULL, work_size,
NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
......@@ -201,7 +206,7 @@ void ConvAddBnRelu(framework::CLHelper &cl_helper,
CL_CHECK_ERRORS(status);
status = clEnqueueNDRangeKernel(
cl_helper.CLCommandQueue(), kernel, default_work_size.size(), NULL,
cl_helper->CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
}
......
......@@ -22,23 +22,24 @@ limitations under the License. */
namespace paddle_mobile {
namespace operators {
using namespace framework;
inline int maptofactor(int i, int factor) { return (i + factor - 1) / factor; }
template <int tile, int kernel>
void winograd_transform_weight(framework::CLHelper &cl_helper,
framework::CLImage &weight);
void winograd_transform_weight(framework::CLHelper *cl_helper,
framework::CLImage *weight);
template <int tile, int kernel>
void WinogradConv3x3(framework::CLHelper &cl_helper,
const ConvParam<GPU_CL> &param);
void WinogradConv3x3(framework::CLHelper *cl_helper,
const ConvParam<GPU_CL> &param, bool ifRelu = false,
const framework::CLImage *biase = nullptr,
const framework::CLImage *new_scale = nullptr,
const framework::CLImage *new_bias = nullptr);
void ConvAddBnRelu(framework::CLHelper &cl_helper,
void ConvAddBnRelu(framework::CLHelper *cl_helper,
const ConvParam<GPU_CL> &param, bool ifRelu = false,
const CLImage *biase = nullptr,
const CLImage *new_scale = nullptr,
const CLImage *new_bias = nullptr);
const framework::CLImage *biase = nullptr,
const framework::CLImage *new_scale = nullptr,
const framework::CLImage *new_bias = nullptr);
} // namespace operators
} // namespace paddle_mobile
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define BIASE
#define BATCH_NORM
#define RELU
#include "conv_kernel.inc.cl"
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define BIASE
#include "conv_kernel.inc.cl"
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define BIASE
#define RELU
#include "conv_kernel.inc.cl"
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define BATCH_NORM
#define BIASE
#define RELU
#include "conv_kernel.inc.cl"
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define BATCH_NORM
#define RELU
#include "conv_kernel.inc.cl"
......@@ -126,6 +126,10 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
param->SetOffset(offset);
const std::string conv_kernel_file = "conv_kernel.cl";
const std::string wino_kernel_file = "winograd_transform.cl";
const std::string build_options = "-DBIASE -DBATCH_NORM -DRELU";
/*
if (param->Filter()->dims()[2] == 1 &&
param->Filter()->dims()[3] == 1 &&
......@@ -137,26 +141,44 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
}
*/
if (param->Filter()->dims()[2] == 1 && param->Filter()->dims()[3] == 1) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT;
param->Filter()->InitNImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_1x1_spl", "conv_add_bn_relu_kernel.cl");
DLOG << " conv add bn relu conv 1x1";
this->cl_helper_.AddKernel("conv_1x1_spl", conv_kernel_file, build_options);
} else if (param->Filter()->dims()[1] == 1 &&
param->Input()->dims()[1] == param->Output()->dims()[1] &&
param->Filter()->dims()[2] == 3) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT;
param->Filter()->InitDWImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_bn_relu_kernel.cl");
DLOG << " conv add bn relu depth_conv_3x3";
this->cl_helper_.AddKernel("depth_conv_3x3", conv_kernel_file,
build_options);
} else if (param->Filter()->dims()[2] == 3 &&
param->Filter()->dims()[3] == 3) {
// if (param->Strides()[0] == param->Strides()[1] &&
// param->Strides()[0] == 1 && param->Input()->dims()[2] >= 32) {
// param->ExecMode() = ConvParam<GPU_CL>::EXEC_WINOGRAD3X3_FLOAT;
// this->cl_helper_.AddKernel("winograd_filter_transform_2x2",
// wino_kernel_file, build_options);
// this->cl_helper_.AddKernel("winograd_input_transform_2x2",
// wino_kernel_file, build_options);
// this->cl_helper_.AddKernel("matmul", "matmul.cl");
// this->cl_helper_.AddKernel("winograd_output_transform_2x2",
// wino_kernel_file, build_options);
//
// winograd_transform_weight<4, 3>(&this->cl_helper_, param->Filter());
//
// } else {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl");
DLOG << " conv add bn relu conv_3x3";
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file, build_options);
// }
} else {
PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
}
......@@ -167,8 +189,21 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
template <>
void ConvAddBNReluKernel<GPU_CL, float>::Compute(
const FusionConvAddBNReluParam<GPU_CL> &param) {
ConvAddBnRelu(this->cl_helper_, param, true, param.Bias(), param.NewScale(),
param.NewBias());
switch (param.ExecMode()) {
case ConvParam<GPU_CL>::EXEC_WINOGRAD3X3_FLOAT:
WinogradConv3x3<4, 3>(&this->cl_helper_, param, true, param.Bias(),
param.NewScale(), param.NewBias());
break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
ConvAddBnRelu(&this->cl_helper_, param, true, param.Bias(),
param.NewScale(), param.NewBias());
break;
default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode());
}
}
template class ConvAddBNReluKernel<GPU_CL, float>;
......
......@@ -33,34 +33,65 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) {
static_cast<int>(param->Paddings()[1]);
param->SetOffset(offset);
const std::string conv_kernel_file = "conv_kernel.cl";
const std::string wino_kernel_file = "winograd_transform.cl";
const std::string build_options = "-DBIASE";
if (param->Filter()->dims()[2] == 1 && param->Filter()->dims()[3] == 1) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT;
param->Filter()->InitNImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_1x1_spl", "conv_add_kernel.cl");
this->cl_helper_.AddKernel("conv_1x1_spl", conv_kernel_file, build_options);
} else if (param->Filter()->dims()[1] == 1 &&
param->Input()->dims()[1] == param->Output()->dims()[1] &&
param->Filter()->dims()[2] == 3) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT;
param->Filter()->InitDWImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_kernel.cl");
this->cl_helper_.AddKernel("depth_conv_3x3", conv_kernel_file,
build_options);
} else if (param->Filter()->dims()[2] == 3 &&
param->Filter()->dims()[3] == 3) {
// if (param->Strides()[0] == param->Strides()[1] &&
// param->Strides()[0] == 1 && param->Input()->dims()[2] >= 32) {
// param->ExecMode() = ConvParam<GPU_CL>::EXEC_WINOGRAD3X3_FLOAT;
// this->cl_helper_.AddKernel("winograd_filter_transform_2x2",
// wino_kernel_file, build_options);
// this->cl_helper_.AddKernel("winograd_input_transform_2x2",
// wino_kernel_file, build_options);
// this->cl_helper_.AddKernel("matmul", "matmul.cl");
// this->cl_helper_.AddKernel("winograd_output_transform_2x2",
// wino_kernel_file, build_options);
//
// winograd_transform_weight<4, 3>(&this->cl_helper_, param->Filter());
//
// } else {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", "conv_add_kernel.cl");
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file, build_options);
// }
} else if (param->Filter()->dims()[2] == 7 &&
param->Filter()->dims()[3] == 7) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW7x7_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_7x7", "conv_add_kernel.cl");
this->cl_helper_.AddKernel("conv_7x7", conv_kernel_file, build_options);
} else if (param->Filter()->dims()[2] == 5 &&
param->Filter()->dims()[3] == 5) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW5x5_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_5x5", "conv_add_kernel.cl");
this->cl_helper_.AddKernel("conv_5x5", conv_kernel_file, build_options);
}
return true;
......@@ -69,7 +100,21 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) {
template <>
void ConvAddKernel<GPU_CL, float>::Compute(
const FusionConvAddParam<GPU_CL> &param) {
ConvAddBnRelu(this->cl_helper_, param, false, param.Bias());
switch (param.ExecMode()) {
case ConvParam<GPU_CL>::EXEC_WINOGRAD3X3_FLOAT:
WinogradConv3x3<4, 3>(&this->cl_helper_, param, false, param.Bias());
break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW5x5_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW7x7_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
ConvAddBnRelu(&this->cl_helper_, param, false, param.Bias());
break;
default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode());
}
}
template class ConvAddKernel<GPU_CL, float>;
......
......@@ -34,35 +34,66 @@ bool ConvAddReluKernel<GPU_CL, float>::Init(
static_cast<int>(param->Paddings()[1]);
param->SetOffset(offset);
const std::string conv_kernel_file = "conv_kernel.cl";
const std::string wino_kernel_file = "winograd_transform.cl";
const std::string build_options = "-DBIASE -DRELU";
if (param->Filter()->dims()[2] == 1 && param->Filter()->dims()[3] == 1) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT;
param->Filter()->InitNImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_1x1_spl", "conv_add_relu_kernel.cl");
this->cl_helper_.AddKernel("conv_1x1_spl", conv_kernel_file, build_options);
} else if (param->Filter()->dims()[1] == 1 &&
param->Input()->dims()[1] == param->Output()->dims()[1] &&
param->Filter()->dims()[2] == 3) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT;
param->Filter()->InitDWImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_relu_kernel.cl");
this->cl_helper_.AddKernel("depth_conv_3x3", conv_kernel_file,
build_options);
} else if (param->Filter()->dims()[2] == 3 &&
param->Filter()->dims()[3] == 3) {
// if (param->Strides()[0] == param->Strides()[1] &&
// param->Strides()[0] == 1 && param->Input()->dims()[2] >= 32) {
// param->ExecMode() = ConvParam<GPU_CL>::EXEC_WINOGRAD3X3_FLOAT;
// this->cl_helper_.AddKernel("winograd_filter_transform_2x2",
// wino_kernel_file, build_options);
// this->cl_helper_.AddKernel("winograd_input_transform_2x2",
// wino_kernel_file, build_options);
// this->cl_helper_.AddKernel("matmul", "matmul.cl");
// this->cl_helper_.AddKernel("winograd_output_transform_2x2",
// wino_kernel_file, build_options);
//
// winograd_transform_weight<4, 3>(&this->cl_helper_, param->Filter());
//
// } else {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", "conv_add_relu_kernel.cl");
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file, build_options);
// }
} else if (param->Filter()->dims()[2] == 7 &&
param->Filter()->dims()[3] == 7) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW7x7_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_7x7", "conv_add_relu_kernel.cl");
this->cl_helper_.AddKernel("conv_7x7", conv_kernel_file, build_options);
} else if (param->Filter()->dims()[2] == 5 &&
param->Filter()->dims()[3] == 5) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW5x5_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_5x5", "conv_add_relu_kernel.cl");
this->cl_helper_.AddKernel("conv_5x5", conv_kernel_file, build_options);
} else {
PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
}
......@@ -73,7 +104,21 @@ bool ConvAddReluKernel<GPU_CL, float>::Init(
template <>
void ConvAddReluKernel<GPU_CL, float>::Compute(
const FusionConvAddReluParam<GPU_CL> &param) {
ConvAddBnRelu(this->cl_helper_, param, true, param.Bias());
switch (param.ExecMode()) {
case ConvParam<GPU_CL>::EXEC_WINOGRAD3X3_FLOAT:
WinogradConv3x3<4, 3>(&this->cl_helper_, param, true, param.Bias());
break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW5x5_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW7x7_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
ConvAddBnRelu(&this->cl_helper_, param, true, param.Bias());
break;
default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode());
}
}
template class ConvAddReluKernel<GPU_CL, float>;
......
......@@ -100,28 +100,51 @@ bool ConvBNAddReluKernel<GPU_CL, float>::Init(
param->SetOffset(offset);
const std::string conv_kernel_file = "conv_kernel.cl";
const std::string wino_kernel_file = "winograd_transform.cl";
const std::string build_options = "-DBIASE -DBATCH_NORM -DRELU";
if (param->Filter()->dims()[2] == 1 && param->Filter()->dims()[3] == 1) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT;
param->Filter()->InitNImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("convBNAdd_1x1_spl",
"conv_bn_add_relu_kernel.cl");
DLOG << " conv bn add relu conv 1x1";
this->cl_helper_.AddKernel("convBNAdd_1x1_spl", conv_kernel_file,
build_options);
} else if (param->Filter()->dims()[1] == 1 &&
param->Input()->dims()[1] == param->Output()->dims()[1] &&
param->Filter()->dims()[2] == 3) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT;
param->Filter()->InitDWImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("depth_convBNAdd_3x3",
"conv_bn_add_relu_kernel.cl");
DLOG << " conv bn add relu depth_conv_3x3";
this->cl_helper_.AddKernel("depth_convBNAdd_3x3", conv_kernel_file,
build_options);
} else if (param->Filter()->dims()[2] == 3 &&
param->Filter()->dims()[3] == 3) {
// if (param->Strides()[0] == param->Strides()[1] &&
// param->Strides()[0] == 1 && param->Input()->dims()[2] >= 32) {
// param->ExecMode() = ConvParam<GPU_CL>::EXEC_WINOGRAD3X3_FLOAT;
// this->cl_helper_.AddKernel("winograd_filter_transform_2x2",
// wino_kernel_file, build_options);
// this->cl_helper_.AddKernel("winograd_input_transform_2x2",
// wino_kernel_file, build_options);
// this->cl_helper_.AddKernel("matmul", "matmul.cl");
// this->cl_helper_.AddKernel("winograd_output_transform_2x2_bn_add",
// wino_kernel_file, build_options);
//
// winograd_transform_weight<4, 3>(&this->cl_helper_, param->Filter());
//
// } else {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("convBNAdd_3x3", "conv_bn_add_relu_kernel.cl");
DLOG << " conv bn add relu conv_3x3";
this->cl_helper_.AddKernel("convBNAdd_3x3", conv_kernel_file,
build_options);
// }
} else {
PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
}
......@@ -132,8 +155,21 @@ bool ConvBNAddReluKernel<GPU_CL, float>::Init(
template <>
void ConvBNAddReluKernel<GPU_CL, float>::Compute(
const FusionConvBNAddReluParam<GPU_CL> &param) {
ConvAddBnRelu(this->cl_helper_, param, true, param.Bias(), param.NewScale(),
param.NewBias());
switch (param.ExecMode()) {
case ConvParam<GPU_CL>::EXEC_WINOGRAD3X3_FLOAT:
WinogradConv3x3<4, 3>(&this->cl_helper_, param, true, param.Bias(),
param.NewScale(), param.NewBias());
break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
ConvAddBnRelu(&this->cl_helper_, param, true, param.Bias(),
param.NewScale(), param.NewBias());
break;
default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode());
}
}
template class ConvBNAddReluKernel<GPU_CL, float>;
......
......@@ -98,26 +98,49 @@ bool ConvBNReluKernel<GPU_CL, float>::Init(
param->SetOffset(offset);
const std::string conv_kernel_file = "conv_kernel.cl";
const std::string wino_kernel_file = "winograd_transform.cl";
const std::string build_options = "-DBATCH_NORM -DRELU";
if (param->Filter()->dims()[2] == 1 && param->Filter()->dims()[3] == 1) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT;
param->Filter()->InitNImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_1x1_spl", "conv_bn_relu_kernel.cl");
DLOG << " conv bn relu conv 1x1";
this->cl_helper_.AddKernel("conv_1x1_spl", conv_kernel_file, build_options);
} else if (param->Filter()->dims()[1] == 1 &&
param->Input()->dims()[1] == param->Output()->dims()[1] &&
param->Filter()->dims()[2] == 3) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT;
param->Filter()->InitDWImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("depth_conv_3x3", "conv_bn_relu_kernel.cl");
DLOG << " conv bn relu depth_conv_3x3";
this->cl_helper_.AddKernel("depth_conv_3x3", conv_kernel_file,
build_options);
} else if (param->Filter()->dims()[2] == 3 &&
param->Filter()->dims()[3] == 3) {
// if (param->Strides()[0] == param->Strides()[1] &&
// param->Strides()[0] == 1 && param->Input()->dims()[2] >= 32) {
// param->ExecMode() = ConvParam<GPU_CL>::EXEC_WINOGRAD3X3_FLOAT;
// this->cl_helper_.AddKernel("winograd_filter_transform_2x2",
// wino_kernel_file, build_options);
// this->cl_helper_.AddKernel("winograd_input_transform_2x2",
// wino_kernel_file, build_options);
// this->cl_helper_.AddKernel("matmul", "matmul.cl");
// this->cl_helper_.AddKernel("winograd_output_transform_2x2",
// wino_kernel_file, build_options);
//
// winograd_transform_weight<4, 3>(&this->cl_helper_, param->Filter());
//
// } else {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", "conv_bn_relu_kernel.cl");
DLOG << " conv bn relu conv_3x3";
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file, build_options);
// }
} else {
PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
}
......@@ -127,8 +150,21 @@ bool ConvBNReluKernel<GPU_CL, float>::Init(
template <>
void ConvBNReluKernel<GPU_CL, float>::Compute(
const FusionConvBNReluParam<GPU_CL> &param) {
ConvAddBnRelu(this->cl_helper_, param, true, nullptr, param.NewScale(),
param.NewBias());
switch (param.ExecMode()) {
case ConvParam<GPU_CL>::EXEC_WINOGRAD3X3_FLOAT:
WinogradConv3x3<4, 3>(&this->cl_helper_, param, true, nullptr,
param.NewScale(), param.NewBias());
break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
ConvAddBnRelu(&this->cl_helper_, param, true, nullptr, param.NewScale(),
param.NewBias());
break;
default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode());
}
}
template class ConvBNReluKernel<GPU_CL, float>;
......
......@@ -37,25 +37,49 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
DLOG << " height of one block: " << param->Filter()->dims()[2];
DLOG << " filter dims: " << param->Filter()->dims();
const std::string conv_kernel_file = "conv_kernel.cl";
const std::string wino_kernel_file = "winograd_transform.cl";
if (param->Filter()->dims()[2] == 1 && param->Filter()->dims()[3] == 1) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT;
param->Filter()->InitNImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_1x1_spl", "conv_kernel.cl");
this->cl_helper_.AddKernel("conv_1x1_spl", conv_kernel_file);
DLOG << "conv 1x1";
} else if (param->Filter()->dims()[1] == 1 &&
param->Input()->dims()[1] == param->Output()->dims()[1] &&
param->Filter()->dims()[2] == 3) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT;
param->Filter()->InitDWImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("depth_conv_3x3", "depthwise_conv_kernel.cl");
this->cl_helper_.AddKernel("depth_conv_3x3", conv_kernel_file);
DLOG << "depth_conv 3x3";
} else if (param->Filter()->dims()[2] == 3 &&
param->Filter()->dims()[3] == 3) {
// if (param->Strides()[0] == param->Strides()[1] &&
// param->Strides()[0] == 1 && param->Input()->dims()[2] >= 32) {
// param->ExecMode() = ConvParam<GPU_CL>::EXEC_WINOGRAD3X3_FLOAT;
// this->cl_helper_.AddKernel("winograd_filter_transform_2x2",
// wino_kernel_file);
// this->cl_helper_.AddKernel("winograd_input_transform_2x2",
// wino_kernel_file);
// this->cl_helper_.AddKernel("matmul", "matmul.cl");
// this->cl_helper_.AddKernel("winograd_output_transform_2x2",
// wino_kernel_file);
//
// winograd_transform_weight<4, 3>(&this->cl_helper_, param->Filter());
//
// } else {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", "conv_kernel.cl");
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file);
// }
DLOG << "conv 3x3";
} else {
......@@ -67,7 +91,19 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
template <>
void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
ConvAddBnRelu(this->cl_helper_, param);
switch (param.ExecMode()) {
case ConvParam<GPU_CL>::EXEC_WINOGRAD3X3_FLOAT:
WinogradConv3x3<4, 3>(&this->cl_helper_, param);
break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
ConvAddBnRelu(&this->cl_helper_, param);
break;
default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode());
}
}
template class ConvKernel<GPU_CL, float>;
......
......@@ -479,6 +479,11 @@ class ConvParam : public OpParam {
EXEC_DEPTHWISE5x5_INT8,
EXEC_SLIDINGWINDOW3x3S1_FLOAT,
EXEC_SLIDINGWINDOW3x3S2_FLOAT,
EXEC_DEPTHWISE3x3_FLOAT,
EXEC_SLIDINGWINDOW1x1_FLOAT,
EXEC_SLIDINGWINDOW3x3_FLOAT,
EXEC_SLIDINGWINDOW5x5_FLOAT,
EXEC_SLIDINGWINDOW7x7_FLOAT,
};
ExecMode &ExecMode() const { return exec_mode_; }
......
......@@ -494,4 +494,7 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE(test-dwconv-bn-relu-op operators/test_dwconv_bn_relu_op.cpp test_helper.h test_include.h)
target_link_libraries(test-dwconv-bn-relu-op paddle-mobile)
ADD_EXECUTABLE(test-conv-gpu operators/test_conv_gpu.cpp test_helper.h test_include.h)
target_link_libraries(test-conv-gpu paddle-mobile)
endif ()
......@@ -21,7 +21,10 @@ int main() {
paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
#endif
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CL
paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile;
paddle_mobile.SetCLPath("/data/local/tmp/bin");
#else
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
#endif
paddle_mobile.SetThreadNum(4);
......@@ -38,13 +41,13 @@ int main() {
input_tensor.data<float>() + input_tensor.numel());
#ifndef PADDLE_MOBILE_FPGA
// 预热十次
for (int i = 0; i < 10; ++i) {
paddle_mobile.Predict(input, dims);
}
// for (int i = 0; i < 10; ++i) {
// paddle_mobile.Predict(input, dims);
// }
auto time3 = time();
for (int i = 0; i < 10; ++i) {
paddle_mobile.Predict(input, dims);
}
// for (int i = 0; i < 10; ++i) {
paddle_mobile.Predict(input, dims);
// }
auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
<< std::endl;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_MOBILE_CL
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
#include "common/common.h"
#include "framework/cl/cl_helper.h"
#include "framework/cl/cl_image.h"
#include "operators/conv_op.h"
#include "operators/kernel/cl/cl-kernel-func/conv_func.h"
namespace paddle_mobile {
template <typename Itype, typename Otype, int Kernel, int Pad, int Stride>
int TestConvOp(int in_channels, int in_height, int in_width, int out_channels,
int groups) {
int kernel_h = Kernel;
int kernel_w = Kernel;
int pad_h = Pad;
int pad_w = Pad;
int stride_h = Stride;
int stride_w = Stride;
int dilation_h = 1;
int dilation_w = 1;
int batch_size = 1;
int input_c = in_channels;
int input_h = in_height;
int input_w = in_width;
int output_c = out_channels;
framework::DDim input_shape =
framework::make_ddim({batch_size, input_c, input_h, input_w});
framework::DDim filter_shape =
framework::make_ddim({output_c, input_c / groups, kernel_h, kernel_w});
// std::cerr << " init " << std::endl;
VariableNameMap inputs;
VariableNameMap outputs;
auto scope = std::make_shared<framework::Scope>();
inputs["Input"] = std::vector<std::string>({"input"});
inputs["Filter"] = std::vector<std::string>({"filter"});
outputs["Output"] = std::vector<std::string>({"output"});
cl_context context = scope->GetCLScpoe()->Context();
cl_command_queue command_queue = scope->GetCLScpoe()->CommandQueue();
// std::cerr << " input " << std::endl;
auto input_var = scope.get()->Var("input");
auto input = input_var->template GetMutable<framework::CLImage>();
const int in_numel = framework::product(input_shape);
float *in_data = new float[in_numel];
for (int i = 0; i < in_numel; ++i) {
in_data[i] = (i % 36 / 6) + 1;
}
input->SetTensorData(in_data, input_shape);
input->InitNormalCLImage(context, command_queue);
DLOG << "input image \n" << *input;
// std::cerr << " filter " << std::endl;
auto filter_var = scope.get()->Var("filter");
auto filter = filter_var->template GetMutable<framework::CLImage>();
const int filter_numel = product(filter_shape);
float *filter_data = new float[filter_numel];
for (int i = 0; i < filter_numel; ++i) {
filter_data[i] = i % 9;
}
filter->SetTensorData(filter_data, filter_shape);
// std::cerr << " attrs " << std::endl;
framework::AttributeMap attrs;
attrs["strides"].Set<vector<int>>(std::vector<int>({stride_h, stride_w}));
attrs["paddings"].Set<vector<int>>(std::vector<int>({pad_h, pad_w}));
attrs["dilations"].Set<vector<int>>(
std::vector<int>({dilation_h, dilation_w}));
attrs["groups"].Set<int>(groups);
std::cerr << " output " << std::endl;
auto output_var = scope.get()->Var("output");
auto output = output_var->template GetMutable<framework::CLImage>();
auto *op = new operators::ConvOp<GPU_CL, float>("conv2d", inputs, outputs,
attrs, scope.get());
op->InferShape();
framework::DDim ddim = output->dims();
DLOG << "output dims = " << ddim;
output->InitEmptyImage(context, command_queue, ddim);
// std::cerr << " op->init " << std::endl;
op->Init();
auto time1 = time();
op->Run();
auto time2 = time();
std::cerr << "time cost : " << time_diff(time1, time2) << std::endl;
delete op;
return 0;
}
} // namespace paddle_mobile
int TestAll(const int in_channels, const int in_height, const int in_width,
const int out_channels, const int groups) {
std::cerr << "in_channels=" << in_channels << ", in_height=" << in_height
<< ", in_width=" << in_width << ", out_channels=" << out_channels
<< ", groups=" << groups << std::endl;
std::cerr << "float, kernel=3, pad=1, stride=1" << std::endl;
paddle_mobile::TestConvOp<float, float, 3, 1, 1>(
in_channels, in_height, in_width, out_channels, groups);
return 0;
}
#endif
int main() {
// TestAll(4, 6, 6, 4, 1);
// TestAll(6, 32, 32, 24, 1);
// TestAll(12, 32, 32, 24, 1);
// TestAll(24, 32, 32, 24, 1);
// TestAll(36, 32, 32, 24, 1);
// TestAll(48, 32, 32, 24, 1);
// TestAll(60, 32, 32, 24, 1);
// TestAll(72, 32, 32, 24, 1);
// TestAll(116, 32, 32, 24, 1);
// TestAll(232, 32, 32, 24, 1);
// TestAll(464, 32, 32, 24, 1);
//
// TestAll(6, 64, 64, 24, 1);
// TestAll(12, 64, 64, 24, 1);
// TestAll(24, 64, 64, 24, 1);
// TestAll(36, 64, 64, 24, 1);
// TestAll(48, 64, 64, 24, 1);
// TestAll(60, 64, 64, 24, 1);
// TestAll(72, 64, 64, 24, 1);
// TestAll(116, 64, 64, 24, 1);
// TestAll(232, 64, 64, 24, 1);
// TestAll(464, 64, 64, 24, 1);
//
// TestAll(6, 128, 128, 24, 1);
// TestAll(12, 128, 128, 24, 1);
// TestAll(24, 128, 128, 24, 1);
// TestAll(36, 128, 128, 24, 1);
// TestAll(48, 128, 128, 24, 1);
// TestAll(60, 128, 128, 24, 1);
// TestAll(72, 128, 128, 24, 1);
// TestAll(116, 128, 128, 24, 1);
// TestAll(232, 128, 128, 24, 1);
// TestAll(464, 128, 128, 24, 1);
//
//
// TestAll(6, 32, 32, 6, 1);
// TestAll(12, 32, 32, 12, 1);
// TestAll(24, 32, 32, 24, 1);
// TestAll(36, 32, 32, 36, 1);
// TestAll(48, 32, 32, 48, 1);
// TestAll(60, 32, 32, 60, 1);
// TestAll(72, 32, 32, 72, 1);
// TestAll(116, 32, 32, 116, 1);
// TestAll(232, 32, 32, 232, 1);
// TestAll(464, 32, 32, 464, 1);
//
// TestAll(6, 64, 64, 6, 1);
// TestAll(12, 64, 64, 12, 1);
// TestAll(24, 64, 64, 24, 1);
// TestAll(36, 64, 64, 36, 1);
// TestAll(48, 64, 64, 48, 1);
// TestAll(60, 64, 64, 60, 1);
// TestAll(72, 64, 64, 72, 1);
// TestAll(116, 64, 64, 116, 1);
// TestAll(232, 64, 64, 232, 1);
// TestAll(464, 64, 64, 464, 1);
//
// TestAll(6, 128, 128, 6, 1);
// TestAll(12, 128, 128, 12, 1);
// TestAll(24, 128, 128, 24, 1);
// TestAll(36, 128, 128, 36, 1);
// TestAll(48, 128, 128, 48, 1);
// TestAll(60, 128, 128, 60, 1);
// TestAll(72, 128, 128, 72, 1);
// TestAll(116, 128, 128, 116, 1);
// TestAll(232, 128, 128, 232, 1);
// TestAll(464, 128, 128, 464, 1);
return 0;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册