提交 8d37f0df 编写于 作者: Y yangfei

imp prior_box kernel and add superrelution test file

上级 34964572
...@@ -26,40 +26,34 @@ __kernel void prior_box(__private const int global_size_dim0, ...@@ -26,40 +26,34 @@ __kernel void prior_box(__private const int global_size_dim0,
__private const int img_width, __private const int img_width,
__private const int img_height, __private const int img_height,
__private const int num_priors, __private const int num_priors,
__private const int C){ __private const int C,
__private const int clip){
const int out_c = get_global_id(0); const int out_c = get_global_id(0);
const int out_nh = get_global_id(1); const int out_nh = get_global_id(1);
const int out_n = out_nh/num_priors; const int out_n = out_nh/num_priors;
const int out_h = out_nh%num_priors; const int out_h = out_nh%num_priors;
if (out_c >= global_size_dim0 ||out_nh >= global_size_dim2) {
return;
}
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
int2 output_pos; int2 output_pos;
output_pos.x = out_c * 4; output_pos.x = out_c * 4;
output_pos.y = out_nh; output_pos.y = out_nh;
float center_x0 = (offset + out_c * 4) * step_width; float center_x0 = (offset + (float)(out_c * 4)) * step_width;
float center_x1 = (offset + out_c * 4 + 1) * step_width; float center_x1 = (offset + (float)(out_c * 4 + 1)) * step_width;
float center_x2 = (offset + out_c * 4 + 2) * step_width; float center_x2 = (offset + (float)(out_c * 4 + 2)) * step_width;
float center_x3 = (offset + out_c * 4 + 3) * step_width; float center_x3 = (offset + (float)(out_c * 4 + 3)) * step_width;
float center_y = (out_n + offset) * step_height; float center_y = ((float)out_n + offset) * step_height;
half4 output[4]; half4 output[4];
output[0].x = convert_half((center_x0 - box_width[out_h]) / img_width); output[0].x = convert_half((center_x0 - box_width[out_h]) / (float)img_width);
output[1].x = convert_half((center_y - box_height[out_h]) / img_height); output[1].x = convert_half((center_y - box_height[out_h]) / (float)img_height);
output[2].x = convert_half((center_x0 + box_width[out_h]) / img_width); output[2].x = convert_half((center_x0 + box_width[out_h]) / (float)img_width);
output[3].x = convert_half((center_y + box_height[out_h]) / img_height); output[3].x = convert_half((center_y + box_height[out_h]) / (float)img_height);
if(C - 4 * out_c>=2){ if(C - 4 * out_c>=2){
output[0].y = convert_half((center_x1 - box_width[out_h]) / img_width); output[0].y = convert_half((center_x1 - box_width[out_h]) / (float)img_width);
output[1].y = convert_half((center_y - box_height[out_h]) / img_height); output[1].y = convert_half((center_y - box_height[out_h]) / (float)img_height);
output[2].y = convert_half((center_x1 + box_width[out_h]) / img_width); output[2].y = convert_half((center_x1 + box_width[out_h]) / (float)img_width);
output[3].y = convert_half((center_y + box_height[out_h]) / img_height); output[3].y = convert_half((center_y + box_height[out_h]) / (float)img_height);
}else{ }else{
output[0].y = 0.0f; output[0].y = 0.0f;
output[1].y = 0.0f; output[1].y = 0.0f;
...@@ -67,10 +61,10 @@ __kernel void prior_box(__private const int global_size_dim0, ...@@ -67,10 +61,10 @@ __kernel void prior_box(__private const int global_size_dim0,
output[3].y = 0.0f; output[3].y = 0.0f;
} }
if(C - 4 * out_c>=3){ if(C - 4 * out_c>=3){
output[0].z = convert_half((center_x2 - box_width[out_h]) / img_width); output[0].z = convert_half((center_x2 - box_width[out_h]) / (float)img_width);
output[1].z = convert_half((center_y - box_height[out_h]) / img_height); output[1].z = convert_half((center_y - box_height[out_h]) / (float)img_height);
output[2].z = convert_half((center_x2 + box_width[out_h]) / img_width); output[2].z = convert_half((center_x2 + box_width[out_h]) / (float)img_width);
output[3].z = convert_half((center_y + box_height[out_h]) / img_height); output[3].z = convert_half((center_y + box_height[out_h]) / (float)img_height);
}else{ }else{
output[0].z = 0.0f; output[0].z = 0.0f;
output[1].z = 0.0f; output[1].z = 0.0f;
...@@ -78,23 +72,26 @@ __kernel void prior_box(__private const int global_size_dim0, ...@@ -78,23 +72,26 @@ __kernel void prior_box(__private const int global_size_dim0,
output[3].z = 0.0f; output[3].z = 0.0f;
} }
if(C - 4 * out_c>=4){ if(C - 4 * out_c>=4){
output[0].w = convert_half((center_x3 - box_width[out_h]) / img_width); output[0].w = convert_half((center_x3 - box_width[out_h]) / (float)img_width);
output[1].w = convert_half((center_y - box_height[out_h]) / img_height); output[1].w = convert_half((center_y - box_height[out_h]) / (float)img_height);
output[2].w = convert_half((center_x3 + box_width[out_h]) / img_width); output[2].w = convert_half((center_x3 + box_width[out_h]) / (float)img_width);
output[3].w = convert_half((center_y + box_height[out_h]) / img_height); output[3].w = convert_half((center_y + box_height[out_h]) / (float)img_height);
}else{ }else{
output[0].z = 0.0f; output[0].w = 0.0f;
output[1].z = 0.0f; output[1].w = 0.0f;
output[2].z = 0.0f; output[2].w = 0.0f;
output[3].z = 0.0f; output[3].w = 0.0f;
}
if(clip==1){
output[0] = min(max((half4)(0.0f, 0.0f, 0.0f, 0.0f), output[0]),(half4)(1.0f, 1.0f, 1.0f, 1.0f));
output[1] = min(max((half4)(0.0f, 0.0f, 0.0f, 0.0f), output[1]),(half4)(1.0f, 1.0f, 1.0f, 1.0f));
output[2] = min(max((half4)(0.0f, 0.0f, 0.0f, 0.0f), output[2]),(half4)(1.0f, 1.0f, 1.0f, 1.0f));
output[3] = min(max((half4)(0.0f, 0.0f, 0.0f, 0.0f), output[3]),(half4)(1.0f, 1.0f, 1.0f, 1.0f));
} }
output[0] = min(max((half4)(0.0f, 0.0f, 0.0f, 0.0f), output[0]),(half4)(1.0f, 1.0f, 1.0f, 1.0f));
output[1] = min(max((half4)(0.0f, 0.0f, 0.0f, 0.0f), output[1]),(half4)(1.0f, 1.0f, 1.0f, 1.0f)); write_imageh(output_image, (int2)(output_pos.x + 0, output_pos.y), output[0]);
output[2] = min(max((half4)(0.0f, 0.0f, 0.0f, 0.0f), output[2]),(half4)(1.0f, 1.0f, 1.0f, 1.0f)); write_imageh(output_image, (int2)(output_pos.x + 1, output_pos.y), output[1]);
output[3] = min(max((half4)(0.0f, 0.0f, 0.0f, 0.0f), output[3]),(half4)(1.0f, 1.0f, 1.0f, 1.0f)); write_imageh(output_image, (int2)(output_pos.x + 2, output_pos.y), output[2]);
write_imageh(output_image, (int2)(output_pos.x + 1, output_pos.y), output[0]); write_imageh(output_image, (int2)(output_pos.x + 3, output_pos.y), output[3]);
write_imageh(output_image, (int2)(output_pos.x + 2, output_pos.y), output[1]);
write_imageh(output_image, (int2)(output_pos.x + 3, output_pos.y), output[2]);
write_imageh(output_image, (int2)(output_pos.x + 4, output_pos.y), output[3]);
} }
\ No newline at end of file
...@@ -39,6 +39,10 @@ void PriorBoxKernel<GPU_CL, float>::Compute( ...@@ -39,6 +39,10 @@ void PriorBoxKernel<GPU_CL, float>::Compute(
const auto &input_aspect_ratio = param.AspectRatios(); const auto &input_aspect_ratio = param.AspectRatios();
const bool &flip = param.Flip(); const bool &flip = param.Flip();
const bool &clip = param.Clip(); const bool &clip = param.Clip();
int isclip =0;
if(clip){
isclip = 1;
}
const float &step_w = param.StepW(); const float &step_w = param.StepW();
const float &step_h = param.StepH(); const float &step_h = param.StepH();
const float &offset = param.Offset(); const float &offset = param.Offset();
...@@ -116,7 +120,7 @@ void PriorBoxKernel<GPU_CL, float>::Compute( ...@@ -116,7 +120,7 @@ void PriorBoxKernel<GPU_CL, float>::Compute(
int w = default_work_size[1]; int w = default_work_size[1];
int nh = default_work_size[2]; int nh = default_work_size[2];
std::vector<int64_t> box_shape({1, 1, 1, num_priors}); std::vector<int64_t> box_shape({num_priors});
framework::DDim ddim = framework::make_ddim(box_shape); framework::DDim ddim = framework::make_ddim(box_shape);
framework::CLTensor box_width_cl_tensor(this->cl_helper_.CLContext(), framework::CLTensor box_width_cl_tensor(this->cl_helper_.CLContext(),
...@@ -141,6 +145,13 @@ void PriorBoxKernel<GPU_CL, float>::Compute( ...@@ -141,6 +145,13 @@ void PriorBoxKernel<GPU_CL, float>::Compute(
DLOG << "img_height:" << img_height; DLOG << "img_height:" << img_height;
DLOG << "num_priors:" << num_priors; DLOG << "num_priors:" << num_priors;
DLOG << "C:" << C; DLOG << "C:" << C;
DLOG << "isclip:" << isclip;
for(int i=0;i<num_priors;i++){
DLOG<<box_width[i];
}
for(int i=0;i<num_priors;i++){
DLOG<<box_height[i];
}
status = clSetKernelArg(kernel, 0, sizeof(int), &c_block); status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(int), &w); status = clSetKernelArg(kernel, 1, sizeof(int), &w);
...@@ -167,6 +178,8 @@ void PriorBoxKernel<GPU_CL, float>::Compute( ...@@ -167,6 +178,8 @@ void PriorBoxKernel<GPU_CL, float>::Compute(
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 12, sizeof(int), &C); status = clSetKernelArg(kernel, 12, sizeof(int), &C);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 13, sizeof(int), &isclip);
CL_CHECK_ERRORS(status);
size_t global_work_size[2] = {c_block, nh}; size_t global_work_size[2] = {c_block, nh};
status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, NULL); NULL, global_work_size, NULL, 0, NULL, NULL);
......
...@@ -364,5 +364,8 @@ if (NOT FOUND_MATCH) ...@@ -364,5 +364,8 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE(test-eng net/test_eng.cpp test_helper.h test_include.h) ADD_EXECUTABLE(test-eng net/test_eng.cpp test_helper.h test_include.h)
target_link_libraries(test-eng paddle-mobile) target_link_libraries(test-eng paddle-mobile)
# gen test
ADD_EXECUTABLE(test-super net/test_super.cpp test_helper.h test_include.h)
target_link_libraries(test-super paddle-mobile)
#add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp) #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)
endif () endif ()
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include "../../src/common/types.h"
#include "../test_helper.h"
#include "../test_include.h"
int main() {
paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile;
// paddle_mobile.SetThreadNum(4);
auto time1 = paddle_mobile::time();
#ifdef PADDLE_MOBILE_CL
paddle_mobile.SetCLPath("/data/local/tmp/bin");
#endif
auto isok =
paddle_mobile.Load(std::string(g_super) + "/model",
std::string(g_super) + "/params",true, false,1,true);
// auto isok = paddle_mobile.Load(std::string(g_mobilenet_mul), true);
if (isok) {
auto time2 = paddle_mobile::time();
std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms"
<< std::endl;
std::vector<float> input;
std::vector<int64_t> dims{1, 1,300, 300};
GetInput<float>(g_yolo_img, &input, dims);
std::vector<float> vec_result ;
auto time3 = paddle_mobile::time();
int max = 10;
for (int i = 0; i < max; ++i) {
vec_result = paddle_mobile.Predict(input, dims);
}
auto time4 = paddle_mobile::time();
std::cout << "predict cost :"
<< paddle_mobile::time_diff(time3, time4) / max << "ms"
<< std::endl;
std::vector<float>::iterator biggest =
std::max_element(std::begin(vec_result), std::end(vec_result));
std::cout << " Max element is " << *biggest << " at position "
<< std::distance(std::begin(vec_result), biggest) << std::endl;
}
std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
"是否存在?"
<< std::endl;
return 0;
}
...@@ -36,16 +36,19 @@ static const char *g_squeezenet = "../models/squeezenet"; ...@@ -36,16 +36,19 @@ static const char *g_squeezenet = "../models/squeezenet";
static const char *g_googlenet = "../models/googlenet"; static const char *g_googlenet = "../models/googlenet";
static const char *g_googlenet_quali = "../models/googlenet_combine_quali"; static const char *g_googlenet_quali = "../models/googlenet_combine_quali";
static const char *g_mobilenet = "../models/mobilenet"; static const char *g_mobilenet = "../models/mobilenet";
static const char *g_mobilenet_mul = "../models/mobilenet_mul"; static const char *g_mobilenet_mul = "../models/r";
static const char *g_alexnet = "../models/alexnet"; static const char *g_alexnet = "../models/alexnet";
static const char *g_inceptionv4 = "../models/inceptionv4"; static const char *g_inceptionv4 = "../models/inceptionv4";
static const char *g_inceptionv3 =
"../models/InceptionV3_Spatial_Attention_Model";
static const char *g_nlp = "../models/nlp"; static const char *g_nlp = "../models/nlp";
static const char *g_super = "../models/superresoltion";
static const char *g_resnet_50 = "../models/resnet_50"; static const char *g_resnet_50 = "../models/resnet_50";
static const char *g_resnet = "../models/resnet"; static const char *g_resnet = "../models/resnet";
static const char *g_googlenet_combine = "../models/googlenet_combine"; static const char *g_googlenet_combine = "../models/googlenet_combine";
static const char *g_yolo = "../models/yolo"; static const char *g_yolo = "../models/yolo";
static const char *g_yolo_combined = "../models/yolo_combined"; static const char *g_yolo_combined = "../models/yolo_combined";
static const char *g_yolo_mul = "../models/yolo_mul"; static const char *g_yolo_mul = "../models/d";
static const char *g_fluid_fssd_new = "../models/fluid_fssd_new"; static const char *g_fluid_fssd_new = "../models/fluid_fssd_new";
static const char *g_test_image_1x3x224x224 = static const char *g_test_image_1x3x224x224 =
"../images/test_image_1x3x224x224_float"; "../images/test_image_1x3x224x224_float";
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册