imp prior_box kernel and add superrelution test file

8d37f0df · yangfei · 34964572 · 8d37f0df · 8d37f0df · 8d37f0df
5 changed file
--- a/src/operators/kernel/cl/cl_kernel/prior_box_kernel.cl
+++ b/src/operators/kernel/cl/cl_kernel/prior_box_kernel.cl
@@ -26,40 +26,34 @@ __kernel void prior_box(__private const int global_size_dim0,
                        __private const int img_width,
                        __private const int img_height,
                        __private const int num_priors,
-                        __private const int C){
+                        __private const int C,
+                        __private const int clip){
                        const int out_c = get_global_id(0);
                        const int out_nh = get_global_id(1);
                        const int out_n = out_nh/num_priors;
                        const int out_h = out_nh%num_priors;
-                        if (out_c >= global_size_dim0 ||out_nh >= global_size_dim2) {
-                             return;
-                         }
-                        const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
-                                                  CLK_ADDRESS_CLAMP          |
-                                                  CLK_FILTER_NEAREST;
                        int2 output_pos;
                        output_pos.x = out_c * 4;
                        output_pos.y = out_nh;
-                        float center_x0 = (offset + out_c * 4) * step_width;
+                        float center_x0 = (offset + (float)(out_c * 4)) * step_width;
-                        float center_x1 = (offset + out_c * 4 + 1) * step_width;
+                        float center_x1 = (offset + (float)(out_c * 4 + 1)) * step_width;
-                        float center_x2 = (offset + out_c * 4 + 2) * step_width;
+                        float center_x2 = (offset + (float)(out_c * 4 + 2)) * step_width;
-                        float center_x3 = (offset + out_c * 4 + 3) * step_width;
+                        float center_x3 = (offset + (float)(out_c * 4 + 3)) * step_width;
-                        float center_y = (out_n + offset) * step_height;
+                        float center_y = ((float)out_n + offset) * step_height;
                        half4 output[4];
-                        output[0].x = convert_half((center_x0 - box_width[out_h]) / img_width);
+                        output[0].x = convert_half((center_x0 - box_width[out_h]) / (float)img_width);
-                        output[1].x = convert_half((center_y - box_height[out_h]) / img_height);
+                        output[1].x = convert_half((center_y - box_height[out_h]) / (float)img_height);
-                        output[2].x = convert_half((center_x0 + box_width[out_h]) / img_width);
+                        output[2].x = convert_half((center_x0 + box_width[out_h]) / (float)img_width);
-                        output[3].x = convert_half((center_y + box_height[out_h]) / img_height);
+                        output[3].x = convert_half((center_y + box_height[out_h]) / (float)img_height);
                        if(C - 4 * out_c>=2){
-                        output[0].y = convert_half((center_x1 - box_width[out_h]) / img_width);
+                        output[0].y = convert_half((center_x1 - box_width[out_h]) / (float)img_width);
-                        output[1].y = convert_half((center_y - box_height[out_h]) / img_height);
+                        output[1].y = convert_half((center_y - box_height[out_h]) / (float)img_height);
-                        output[2].y = convert_half((center_x1 + box_width[out_h]) / img_width);
+                        output[2].y = convert_half((center_x1 + box_width[out_h]) / (float)img_width);
-                        output[3].y = convert_half((center_y + box_height[out_h]) / img_height);
+                        output[3].y = convert_half((center_y + box_height[out_h]) / (float)img_height);
                        }else{
                         output[0].y = 0.0f;
                         output[1].y = 0.0f;
@@ -67,10 +61,10 @@ __kernel void prior_box(__private const int global_size_dim0,
                         output[3].y = 0.0f;
                        }
                        if(C - 4 * out_c>=3){
-                        output[0].z = convert_half((center_x2 - box_width[out_h]) / img_width);
+                        output[0].z = convert_half((center_x2 - box_width[out_h]) / (float)img_width);
-                        output[1].z = convert_half((center_y - box_height[out_h]) / img_height);
+                        output[1].z = convert_half((center_y - box_height[out_h]) / (float)img_height);
-                        output[2].z = convert_half((center_x2 + box_width[out_h]) / img_width);
+                        output[2].z = convert_half((center_x2 + box_width[out_h]) / (float)img_width);
-                        output[3].z = convert_half((center_y + box_height[out_h]) / img_height);
+                        output[3].z = convert_half((center_y + box_height[out_h]) / (float)img_height);
                        }else{
                        output[0].z = 0.0f;
                        output[1].z = 0.0f;
@@ -78,23 +72,26 @@ __kernel void prior_box(__private const int global_size_dim0,
                        output[3].z = 0.0f;
                        }
                        if(C - 4 * out_c>=4){
-                        output[0].w = convert_half((center_x3 - box_width[out_h]) / img_width);
+                        output[0].w = convert_half((center_x3 - box_width[out_h]) / (float)img_width);
-                        output[1].w = convert_half((center_y - box_height[out_h]) / img_height);
+                        output[1].w = convert_half((center_y - box_height[out_h]) / (float)img_height);
-                        output[2].w = convert_half((center_x3 + box_width[out_h]) / img_width);
+                        output[2].w = convert_half((center_x3 + box_width[out_h]) / (float)img_width);
-                        output[3].w = convert_half((center_y + box_height[out_h]) / img_height);
+                        output[3].w = convert_half((center_y + box_height[out_h]) / (float)img_height);
                        }else{
-                        output[0].z = 0.0f;
+                        output[0].w = 0.0f;
-                        output[1].z = 0.0f;
+                        output[1].w = 0.0f;
-                        output[2].z = 0.0f;
+                        output[2].w = 0.0f;
-                        output[3].z = 0.0f;
+                        output[3].w = 0.0f;
+                        }
+                        if(clip==1){
+                         output[0] = min(max((half4)(0.0f, 0.0f, 0.0f, 0.0f), output[0]),(half4)(1.0f, 1.0f, 1.0f, 1.0f));
+                         output[1] = min(max((half4)(0.0f, 0.0f, 0.0f, 0.0f), output[1]),(half4)(1.0f, 1.0f, 1.0f, 1.0f));
+                         output[2] = min(max((half4)(0.0f, 0.0f, 0.0f, 0.0f), output[2]),(half4)(1.0f, 1.0f, 1.0f, 1.0f));
+                         output[3] = min(max((half4)(0.0f, 0.0f, 0.0f, 0.0f), output[3]),(half4)(1.0f, 1.0f, 1.0f, 1.0f));
                        }
-                        output[0] = min(max((half4)(0.0f, 0.0f, 0.0f, 0.0f), output[0]),(half4)(1.0f, 1.0f, 1.0f, 1.0f));
-                        output[1] = min(max((half4)(0.0f, 0.0f, 0.0f, 0.0f), output[1]),(half4)(1.0f, 1.0f, 1.0f, 1.0f));
+                        write_imageh(output_image, (int2)(output_pos.x + 0, output_pos.y), output[0]);
-                        output[2] = min(max((half4)(0.0f, 0.0f, 0.0f, 0.0f), output[2]),(half4)(1.0f, 1.0f, 1.0f, 1.0f));
+                        write_imageh(output_image, (int2)(output_pos.x + 1, output_pos.y), output[1]);
-                        output[3] = min(max((half4)(0.0f, 0.0f, 0.0f, 0.0f), output[3]),(half4)(1.0f, 1.0f, 1.0f, 1.0f));
+                        write_imageh(output_image, (int2)(output_pos.x + 2, output_pos.y), output[2]);
-                        write_imageh(output_image, (int2)(output_pos.x + 1, output_pos.y), output[0]);
+                        write_imageh(output_image, (int2)(output_pos.x + 3, output_pos.y), output[3]);
-                        write_imageh(output_image, (int2)(output_pos.x + 2, output_pos.y), output[1]);
-                        write_imageh(output_image, (int2)(output_pos.x + 3, output_pos.y), output[2]);
-                        write_imageh(output_image, (int2)(output_pos.x + 4, output_pos.y), output[3]);
 }
\ No newline at end of file
--- a/src/operators/kernel/cl/prior_box_kernel.cpp
+++ b/src/operators/kernel/cl/prior_box_kernel.cpp
@@ -39,6 +39,10 @@ void PriorBoxKernel<GPU_CL, float>::Compute(
  const auto &input_aspect_ratio = param.AspectRatios();
  const bool &flip = param.Flip();
  const bool &clip = param.Clip();
+  int isclip =0;
+    if(clip){
+        isclip = 1;
+    }
  const float &step_w = param.StepW();
  const float &step_h = param.StepH();
  const float &offset = param.Offset();
@@ -116,7 +120,7 @@ void PriorBoxKernel<GPU_CL, float>::Compute(
  int w = default_work_size[1];
  int nh = default_work_size[2];
-  std::vector<int64_t> box_shape({1, 1, 1, num_priors});
+  std::vector<int64_t> box_shape({num_priors});
  framework::DDim ddim = framework::make_ddim(box_shape);
  framework::CLTensor box_width_cl_tensor(this->cl_helper_.CLContext(),
@@ -141,6 +145,13 @@ void PriorBoxKernel<GPU_CL, float>::Compute(
  DLOG << "img_height:" << img_height;
  DLOG << "num_priors:" << num_priors;
  DLOG << "C:" << C;
+  DLOG << "isclip:" << isclip;
+  for(int i=0;i<num_priors;i++){
+    DLOG<<box_width[i];
+  }
+  for(int i=0;i<num_priors;i++){
+    DLOG<<box_height[i];
+  }
  status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
  CL_CHECK_ERRORS(status);
  status = clSetKernelArg(kernel, 1, sizeof(int), &w);
@@ -167,6 +178,8 @@ void PriorBoxKernel<GPU_CL, float>::Compute(
  CL_CHECK_ERRORS(status);
  status = clSetKernelArg(kernel, 12, sizeof(int), &C);
  CL_CHECK_ERRORS(status);
+  status = clSetKernelArg(kernel, 13, sizeof(int), &isclip);
+  CL_CHECK_ERRORS(status);
  size_t global_work_size[2] = {c_block, nh};
  status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
                                  NULL, global_work_size, NULL, 0, NULL, NULL);

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -364,5 +364,8 @@ if (NOT FOUND_MATCH)
    ADD_EXECUTABLE(test-eng net/test_eng.cpp test_helper.h test_include.h)
    target_link_libraries(test-eng paddle-mobile)
+    # gen test
+    ADD_EXECUTABLE(test-super net/test_super.cpp test_helper.h test_include.h)
+    target_link_libraries(test-super paddle-mobile)
    #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)
 endif ()
--- a/test/net/test_super.cpp
+++ b/test/net/test_super.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include <iostream>
+#include "../../src/common/types.h"
+#include "../test_helper.h"
+#include "../test_include.h"
+int main() {
+    paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile;
+    //    paddle_mobile.SetThreadNum(4);
+    auto time1 = paddle_mobile::time();
+#ifdef PADDLE_MOBILE_CL
+    paddle_mobile.SetCLPath("/data/local/tmp/bin");
+#endif
+    auto isok =
+            paddle_mobile.Load(std::string(g_super) + "/model",
+                               std::string(g_super) + "/params",true, false,1,true);
+    //  auto isok = paddle_mobile.Load(std::string(g_mobilenet_mul), true);
+    if (isok) {
+        auto time2 = paddle_mobile::time();
+        std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms"
+                  << std::endl;
+        std::vector<float> input;
+        std::vector<int64_t> dims{1, 1,300, 300};
+        GetInput<float>(g_yolo_img, &input, dims);
+        std::vector<float> vec_result ;
+        auto time3 = paddle_mobile::time();
+        int max = 10;
+        for (int i = 0; i < max; ++i) {
+            vec_result = paddle_mobile.Predict(input, dims);
+        }
+        auto time4 = paddle_mobile::time();
+        std::cout << "predict cost :"
+                  << paddle_mobile::time_diff(time3, time4) / max << "ms"
+                  << std::endl;
+        std::vector<float>::iterator biggest =
+                std::max_element(std::begin(vec_result), std::end(vec_result));
+        std::cout << " Max element is " << *biggest << " at position "
+                  << std::distance(std::begin(vec_result), biggest) << std::endl;
+    }
+    std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
+            "是否存在?"
+              << std::endl;
+    return 0;
+}
--- a/test/test_helper.h
+++ b/test/test_helper.h
@@ -36,16 +36,19 @@ static const char *g_squeezenet = "../models/squeezenet";
 static const char *g_googlenet = "../models/googlenet";
 static const char *g_googlenet_quali = "../models/googlenet_combine_quali";
 static const char *g_mobilenet = "../models/mobilenet";
-static const char *g_mobilenet_mul = "../models/mobilenet_mul";
+static const char *g_mobilenet_mul = "../models/r";
 static const char *g_alexnet = "../models/alexnet";
 static const char *g_inceptionv4 = "../models/inceptionv4";
+static const char *g_inceptionv3 =
+    "../models/InceptionV3_Spatial_Attention_Model";
 static const char *g_nlp = "../models/nlp";
+static const char *g_super = "../models/superresoltion";
 static const char *g_resnet_50 = "../models/resnet_50";
 static const char *g_resnet = "../models/resnet";
 static const char *g_googlenet_combine = "../models/googlenet_combine";
 static const char *g_yolo = "../models/yolo";
 static const char *g_yolo_combined = "../models/yolo_combined";
-static const char *g_yolo_mul = "../models/yolo_mul";
+static const char *g_yolo_mul = "../models/d";
 static const char *g_fluid_fssd_new = "../models/fluid_fssd_new";
 static const char *g_test_image_1x3x224x224 =
    "../images/test_image_1x3x224x224_float";