diff --git a/paddle/fluid/operators/detection/box_coder_op.cu b/paddle/fluid/operators/detection/box_coder_op.cu index 9b735722740dd59c077e1d2200b96b379756b60e..e078af3eb478a8bebc6a7fc6460d169d803a3c4b 100644 --- a/paddle/fluid/operators/detection/box_coder_op.cu +++ b/paddle/fluid/operators/detection/box_coder_op.cu @@ -11,6 +11,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/operators/detection/box_coder_op.h" #include "paddle/fluid/platform/cuda_primitives.h" @@ -95,47 +96,33 @@ __global__ void DecodeCenterSizeKernel( prior_box_data[prior_box_offset + 1] + prior_box_height / 2; T target_box_width, target_box_height; T target_box_center_x, target_box_center_y; + T box_var_x = T(1), box_var_y = T(1); + T box_var_w = T(1), box_var_h = T(1); if (prior_box_var_data) { int prior_var_offset = 0; if (prior_box_var_size == 2) { prior_var_offset = axis == 0 ? col_idx * len : row_idx * len; } - target_box_width = exp(prior_box_var_data[prior_var_offset + 2] * - target_box_data[idx * len + 2]) * - prior_box_width; - target_box_height = exp(prior_box_var_data[prior_var_offset + 3] * - target_box_data[idx * len + 3]) * - prior_box_height; - target_box_center_x = prior_box_var_data[prior_var_offset] * - target_box_data[idx * len] * prior_box_width + - prior_box_center_x; - target_box_center_y = prior_box_var_data[prior_var_offset + 1] * - target_box_data[idx * len + 1] * - prior_box_height + - prior_box_center_y; + box_var_x = prior_box_var_data[prior_var_offset]; + box_var_y = prior_box_var_data[prior_var_offset + 1]; + box_var_w = prior_box_var_data[prior_var_offset + 2]; + box_var_h = prior_box_var_data[prior_var_offset + 3]; } else if (var_size == 4) { - target_box_width = - exp(static_cast(variance[2]) * target_box_data[idx * len + 2]) * - prior_box_width; - target_box_height = - exp(static_cast(variance[3]) * target_box_data[idx * len + 3]) * - prior_box_height; - target_box_center_x = static_cast(variance[0]) * - target_box_data[idx * len] * prior_box_width + - prior_box_center_x; - target_box_center_y = static_cast(variance[1]) * - target_box_data[idx * len + 1] * - prior_box_height + - prior_box_center_y; - } else { - target_box_width = exp(target_box_data[idx * len + 2]) * prior_box_width; - target_box_height = - exp(target_box_data[idx * len + 3]) * prior_box_height; - target_box_center_x = - target_box_data[idx * len] * prior_box_width + prior_box_center_x; - target_box_center_y = target_box_data[idx * len + 1] * prior_box_height + - prior_box_center_y; + box_var_x = static_cast(variance[0]); + box_var_y = static_cast(variance[1]); + box_var_w = static_cast(variance[2]); + box_var_h = static_cast(variance[3]); } + target_box_width = + exp(box_var_w * target_box_data[idx * len + 2]) * prior_box_width; + target_box_height = + exp(box_var_h * target_box_data[idx * len + 3]) * prior_box_height; + target_box_center_x = + box_var_x * target_box_data[idx * len] * prior_box_width + + prior_box_center_x; + target_box_center_y = + box_var_y * target_box_data[idx * len + 1] * prior_box_height + + prior_box_center_y; output[idx * len] = target_box_center_x - target_box_width / 2; output[idx * len + 1] = target_box_center_y - target_box_height / 2; @@ -177,9 +164,8 @@ class BoxCoderCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(target_box->lod().size(), 1, "Only support 1 level of LoD."); } - const int var_size = static_cast(variance.size()); - thrust::device_vector dev_variance(variance.begin(), variance.end()); - const float* dev_var_data = thrust::raw_pointer_cast(dev_variance.data()); + const int var_size = static_cast(variance.size()); + auto code_type = GetBoxCodeType(context.Attr("code_type")); bool normalized = context.Attr("box_normalized"); int axis = context.Attr("axis"); @@ -194,6 +180,16 @@ class BoxCoderCUDAKernel : public framework::OpKernel { int grid = (row * col + block - 1) / block; auto& device_ctx = context.cuda_device_context(); + auto& allocator = + platform::DeviceTemporaryAllocator::Instance().Get(device_ctx); + int bytes = var_size * sizeof(float); + auto dev_var = allocator.Allocate(bytes); + float* dev_var_data = reinterpret_cast(dev_var->ptr()); + auto cplace = platform::CPUPlace(); + const auto gplace = boost::get(context.GetPlace()); + memory::Copy(gplace, dev_var_data, cplace, &variance[0], bytes, + device_ctx.stream()); + output_box->mutable_data({row, col, len}, context.GetPlace()); T* output = output_box->data(); diff --git a/paddle/fluid/operators/detection/box_coder_op.h b/paddle/fluid/operators/detection/box_coder_op.h index b61cff1b1d04e1879e08c608f54000d46c7cfcad..a0b1faf7bdc7001eba2d92b4d03fbaf9feb7bcbb 100644 --- a/paddle/fluid/operators/detection/box_coder_op.h +++ b/paddle/fluid/operators/detection/box_coder_op.h @@ -133,6 +133,8 @@ class BoxCoderKernel : public framework::OpKernel { T target_box_center_x = 0, target_box_center_y = 0; T target_box_width = 0, target_box_height = 0; + T box_var_x = T(1), box_var_y = T(1); + T box_var_w = T(1), box_var_h = T(1); if (prior_box_var) { int prior_var_offset = 0; if (prior_box_var->dims().size() == 2) { @@ -141,44 +143,26 @@ class BoxCoderKernel : public framework::OpKernel { else if (axis == 1) prior_var_offset = i * len; } - target_box_center_x = prior_box_var_data[prior_var_offset] * - target_box_data[offset] * prior_box_width + - prior_box_center_x; - target_box_center_y = prior_box_var_data[prior_var_offset + 1] * - target_box_data[offset + 1] * - prior_box_height + - prior_box_center_y; - target_box_width = std::exp(prior_box_var_data[prior_var_offset + 2] * - target_box_data[offset + 2]) * - prior_box_width; - target_box_height = - std::exp(prior_box_var_data[prior_var_offset + 3] * - target_box_data[offset + 3]) * - prior_box_height; + box_var_x = prior_box_var_data[prior_var_offset]; + box_var_y = prior_box_var_data[prior_var_offset + 1]; + box_var_w = prior_box_var_data[prior_var_offset + 2]; + box_var_h = prior_box_var_data[prior_var_offset + 3]; } else if (!(variance.empty())) { - target_box_center_x = static_cast(variance[0]) * - target_box_data[offset] * prior_box_width + - prior_box_center_x; - target_box_center_y = static_cast(variance[1]) * - target_box_data[offset + 1] * - prior_box_height + - prior_box_center_y; - target_box_width = std::exp(static_cast(variance[2]) * - target_box_data[offset + 2]) * - prior_box_width; - target_box_height = std::exp(static_cast(variance[3]) * - target_box_data[offset + 3]) * - prior_box_height; - } else { - target_box_center_x = - target_box_data[offset] * prior_box_width + prior_box_center_x; - target_box_center_y = target_box_data[offset + 1] * prior_box_height + - prior_box_center_y; - target_box_width = - std::exp(target_box_data[offset + 2]) * prior_box_width; - target_box_height = - std::exp(target_box_data[offset + 3]) * prior_box_height; + box_var_x = static_cast(variance[0]); + box_var_y = static_cast(variance[1]); + box_var_w = static_cast(variance[2]); + box_var_h = static_cast(variance[3]); } + target_box_center_x = + box_var_x * target_box_data[offset] * prior_box_width + + prior_box_center_x; + target_box_center_y = + box_var_y * target_box_data[offset + 1] * prior_box_height + + prior_box_center_y; + target_box_width = + std::exp(box_var_w * target_box_data[offset + 2]) * prior_box_width; + target_box_height = std::exp(box_var_h * target_box_data[offset + 3]) * + prior_box_height; output[offset] = target_box_center_x - target_box_width / 2; output[offset + 1] = target_box_center_y - target_box_height / 2; diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index 2dbcfa31fc15f06721f7066749ea7f281dcd2577..869da5804329953c751a62f1701573ea448c7dad 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -50,6 +50,19 @@ class TestDetection(unittest.TestCase): self.assertEqual(out.shape[-1], 6) print(str(program)) + def test_box_coder_api(self): + program = Program() + with program_guard(program): + x = layers.data(name='x', shape=[4], dtype='float32') + y = layers.data(name='z', shape=[4], dtype='float32', lod_level=1) + bcoder = layers.box_coder( + prior_box=x, + prior_box_var=[0.1, 0.2, 0.1, 0.2], + target_box=y, + code_type='encode_center_size') + self.assertIsNotNone(bcoder) + print(str(program)) + def test_detection_api(self): program = Program() with program_guard(program): @@ -59,7 +72,7 @@ class TestDetection(unittest.TestCase): iou = layers.iou_similarity(x=x, y=y) bcoder = layers.box_coder( prior_box=x, - prior_box_var=[0.2, 0.3, 0.3, 0.2], + prior_box_var=y, target_box=z, code_type='encode_center_size') self.assertIsNotNone(iou)