From b594ed99b834e38a25cb8175d839810a30a83d83 Mon Sep 17 00:00:00 2001 From: Supernovae <51359628+shubham-shahh@users.noreply.github.com> Date: Sun, 28 Nov 2021 18:24:29 +0530 Subject: [PATCH] Merge pull request #20933 from shubham-shahh:master Improved overall readability of the code * grid_nms.cu: minor fix-ups * Update grid_stride_range.hpp * Update tf_importer.cpp --- modules/dnn/src/cuda/grid_nms.cu | 10 +++++----- modules/dnn/src/cuda/grid_stride_range.hpp | 2 +- modules/dnn/src/tensorflow/tf_importer.cpp | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/dnn/src/cuda/grid_nms.cu b/modules/dnn/src/cuda/grid_nms.cu index 0aeb34add3..51b95d3b80 100644 --- a/modules/dnn/src/cuda/grid_nms.cu +++ b/modules/dnn/src/cuda/grid_nms.cu @@ -68,7 +68,7 @@ namespace raw { * to compute IOU(GROUP_B, GROUP_A). We still have to compute IOU(GROUP_A, GROUP_A) though since * each group has many boxes and we need IOUs amongst boxes within a group. * - * We arbitarily choose a scheme to exit : exit if group_i is greater than group_j. This way we only + * We arbitrarily choose a scheme to exit : exit if group_i is greater than group_j. This way we only * compute IOUs between groups once. While nearly half the blocks are wasted, it's ok since they exit * early on and the working blocks are compute heavy. */ @@ -92,7 +92,7 @@ namespace raw { */ /* The `j` box is fixed for each thread. All `i` boxes will be required for every thread. - * We store the `i` boxes in shared memory to allow global memory coalesing. + * We store the `i` boxes in shared memory to allow global memory coalescing. */ using vector_type = get_vector_type_t; __shared__ vector_type group_i_boxes[BLOCK_SIZE]; @@ -162,7 +162,7 @@ namespace raw { * this loop has been highly tuned. Please profile and verify carefully before making changes. */ /* UNROLL_SIZE is the number of boxes that must be processed per iteration. We manually unroll - * the loop since the compiler cannot effectively unroll on its own preassumably due to presence + * the loop since the compiler cannot effectively unroll on its own presumably due to presence * of instructions forcing warp synchronization. */ constexpr int UNROLL_SIZE = 4; @@ -290,7 +290,7 @@ namespace raw { if (boxes == 0) return; - /* We have a fixed number of threads and an arbitary number of boxes. We use an array of + /* We have a fixed number of threads and an arbitrary number of boxes. We use an array of * bits to store which boxes haven't been eliminated and which are still active. We organize * the array of bits into a matrix of bits of the shape (num_rows, BLOCK_SIZE, 32) which * is equivalent to (num_rows, BLOCK_SIZE) where the type is a 32-bit unsigned integer. @@ -464,4 +464,4 @@ std::size_t getGridNMSWorkspaceSizePerBatchItem(std::size_t num_classes, std::si template void grid_nms(const Stream& stream, Span workspace, TensorSpan indices, TensorSpan count, TensorView<__half> bboxes, int, bool normalized_bbox, float nms_threshold); template void grid_nms(const Stream& stream, Span workspace, TensorSpan indices, TensorSpan count, TensorView bboxes, int, bool normalized_bbox, float nms_threshold); -}}}} /* namespace cv::dnn::cuda4dnn::kernels */ \ No newline at end of file +}}}} /* namespace cv::dnn::cuda4dnn::kernels */ diff --git a/modules/dnn/src/cuda/grid_stride_range.hpp b/modules/dnn/src/cuda/grid_stride_range.hpp index cf263c54e2..36d14a9368 100644 --- a/modules/dnn/src/cuda/grid_stride_range.hpp +++ b/modules/dnn/src/cuda/grid_stride_range.hpp @@ -36,7 +36,7 @@ public: __device__ bool operator!=(const iterator& other) const { /* NOTE HACK * 'pos' can move in large steps (see operator++) - * expansion of range for loop uses != as the loop conditioion + * expansion of range for loop uses != as the loop condition * => operator!= must return false if 'pos' crosses the end */ return pos < other.pos; diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index a6f9c07980..9fb8f60b41 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1154,7 +1154,7 @@ void TFImporter::parseExpandDims(tensorflow::GraphDef& net, const tensorflow::No // Convert OpenCV's NHC to NCH first. if(outShapeSize == 3) { - // If axis equal to outShapeSize, that mean we expand in Channel dimmension, and do not add permuteLayer. + // If axis equal to outShapeSize, that mean we expand in Channel dimension, and do not add permuteLayer. if(axis != outShapeSize) { int order[] = {0, 2, 1}; // From OpenCV's NHC to NCH. -- GitLab