[Perception] add CUDA implementation of connected component generator (#3743)

* add CUDA implementation of connected componenet generator * integrate ConnectedComponentGeneratorGPU into 'cuda_util'

[Perception] add CUDA implementation of connected component generator (#3743)
* add CUDA implementation of connected componenet generator * integrate ConnectedComponentGeneratorGPU into 'cuda_util'
200fda09 · Jun Zhu · Liangliang Zhang · 784eee50 · 200fda09 · 200fda09
20 changed file
--- a/modules/perception/cuda_util/BUILD
+++ b/modules/perception/cuda_util/BUILD
@@ -26,6 +26,7 @@ cc_library(
        "*.h",
    ]),
    deps = [
+        "//modules/perception/obstacle/camera/lane_post_process/common:connected_component",
        "@caffe//:lib",
        "@cuda",
    ],

--- a/modules/perception/cuda_util/CMakeLists.txt
+++ b/modules/perception/cuda_util/CMakeLists.txt
@@ -7,7 +7,7 @@ include(cmake/Packages.cmake)
 file(GLOB _SRCS "*.cc")
 list(APPEND CUDA_NVCC_FLAGS -Wno-deprecated-gpu-targets -std=c++11)

-set(cuda_file undistortion.cu util.cu region_output.cu network.cu)
+set(cuda_file undistortion.cu util.cu region_output.cu network.cu connected_component_gpu.cu block_uf.cu)
 cuda_compile(cuda_obj ${cuda_file} SHARED)

 include_directories(/usr/local/cuda/targets/x86_64-linux/include/)

--- a/modules/perception/cuda_util/block_uf.cu
+++ b/modules/perception/cuda_util/block_uf.cu
+/******************************************************************************
+ * Copyright (c) 2014, Victor Matheus de Araujo Oliveira All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ *ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+/******************************************************************************
+ * Copyright 2018 The Apollo Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *****************************************************************************/
+
+#include <iostream>
+
+#include "block_uf.h"
+#include "texture.h"
+
+namespace apollo {
+namespace perception {
+namespace block_uf {
+
+__device__ int Find(int* parent, int i) {
+  while (parent[i] != i) {
+    i = parent[i];
+  }
+  return i;
+}
+
+__device__ void Union(int* parent, int i, int j) {
+  bool done;
+  do {
+    i = Find(parent, i);
+    j = Find(parent, j);
+
+    if (i < j) {
+      int old = atomicMin(&parent[j], i);
+      done = (old == j);
+      j = old;
+    } else if (i > j) {
+      int old = atomicMin(&parent[i], j);
+      done = (old == i);
+      i = old;
+    } else {
+      done = true;
+    }
+  } while (!done);
+}
+
+__global__ void BlockUnionFindInternal(int* label_array, int width,
+                                       int height) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int y = blockIdx.y * blockDim.y + threadIdx.y;
+  int global_index = y * width + x;
+  int block_index = blockDim.x * threadIdx.y + threadIdx.x;
+
+  __shared__ int s_parent[UF_BLOCK_WIDTH * UF_BLOCK_HEIGHT];
+  __shared__ unsigned char s_img[UF_BLOCK_WIDTH * UF_BLOCK_HEIGHT];
+
+  bool is_valid = x < width && y < height;
+
+  s_parent[block_index] = block_index;
+  s_img[block_index] = is_valid ? tex2D(img_tex, x, y) : 0xFF;
+  __syncthreads();
+
+  unsigned char v = s_img[block_index];
+
+  if (is_valid && threadIdx.x > 0 && v != 0 && s_img[block_index - 1] == v) {
+    Union(s_parent, block_index, block_index - 1);
+  }
+  __syncthreads();
+
+  if (is_valid && threadIdx.y > 0 && v != 0 &&
+      s_img[block_index - blockDim.x] == v) {
+    Union(s_parent, block_index, block_index - blockDim.x);
+  }
+  __syncthreads();
+
+  if (is_valid) {
+    int f = Find(s_parent, block_index);
+    int fx = f % UF_BLOCK_WIDTH;
+    int fy = f / UF_BLOCK_WIDTH;
+    label_array[global_index] =
+        (blockIdx.y * blockDim.y + fy) * width +
+            (blockIdx.x * blockDim.x + fx);
+  }
+}
+
+__global__ void BlockUnionFindBoundary(int* label_array, int width,
+                                       int height) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int y = blockIdx.y * blockDim.y + threadIdx.y;
+  int global_index = y * width + x;
+
+  bool is_valid = x < width && y < height;
+  unsigned char v = is_valid ? tex2D(img_tex, x, y) : 0xFF;
+
+  if (is_valid && y > 0 && threadIdx.y == 0 && v != 0 &&
+      tex2D(img_tex, x, y - 1) == v) {
+    Union(label_array, global_index, global_index - width);
+  }
+
+  if (is_valid && x > 0 && threadIdx.x == 0 && v != 0 &&
+      tex2D(img_tex, x - 1, y) == v) {
+    Union(label_array, global_index, global_index - 1);
+  }
+}
+
+__global__ void BlockUnionFindRoot(int* label_array, int width, int height) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int y = blockIdx.y * blockDim.y + threadIdx.y;
+  int global_index = y * width + x;
+
+  bool is_valid = x < width && y < height;
+  if (is_valid) {
+    label_array[global_index] =
+        tex2D(img_tex, x, y) > 0 ? Find(label_array, global_index) : -1;
+  }
+}
+
+bool BlockUnionFind(const unsigned char* img, int width, int height,
+                    int image_width, int* labels) {
+  cudaError_t cuda_err;
+
+  cudaChannelFormatDesc uchar_desc = cudaCreateChannelDesc<unsigned char>();
+  cudaArray* img_array = NULL;
+  cudaMallocArray(&img_array, &uchar_desc, static_cast<size_t>(width),
+                  static_cast<size_t>(height));
+  cudaBindTextureToArray(img_tex, img_array, uchar_desc);
+
+  if (image_width == width) {
+    size_t siz = static_cast<size_t>(width) * static_cast<size_t>(height) *
+                 sizeof(unsigned char);
+    cudaMemcpyToArray(img_array, 0, 0, img, siz, cudaMemcpyHostToDevice);
+  } else {
+    size_t siz = static_cast<size_t>(width) * sizeof(unsigned char);
+    for (size_t i = 0; i < static_cast<size_t>(height); ++i) {
+      cudaMemcpyToArray(img_array, 0, i, img, siz, cudaMemcpyHostToDevice);
+      img += image_width;
+    }
+  }
+
+  int* label_array;
+  cudaMalloc(
+      (void**)&label_array,
+      static_cast<size_t>(width) * static_cast<size_t>(height) * sizeof(int));
+
+  dim3 block(UF_BLOCK_WIDTH, UF_BLOCK_HEIGHT);
+  dim3 grid(
+      static_cast<unsigned int>((width + UF_BLOCK_WIDTH - 1) / UF_BLOCK_WIDTH),
+      static_cast<unsigned int>((height + UF_BLOCK_HEIGHT - 1) /
+                                UF_BLOCK_HEIGHT));
+  cuda_err = cudaGetLastError();
+  if (cuda_err != cudaSuccess) {
+    std::cerr << "failed to start block union find with CUDA: "
+              << cudaGetErrorString(cuda_err) << std::endl;
+    return false;
+  }
+
+  cudaThreadSetCacheConfig(cudaFuncCachePreferShared);
+
+  BlockUnionFindInternal<<<grid, block>>>(label_array, width, height);
+
+  cudaThreadSetCacheConfig(cudaFuncCachePreferL1);
+
+  BlockUnionFindBoundary<<<grid, block>>>(label_array, width, height);
+
+  BlockUnionFindRoot<<<grid, block>>>(label_array, width, height);
+
+  cudaMemcpy(labels, label_array,
+             static_cast<size_t>(width) *
+                 static_cast<size_t>(height) * sizeof(int),
+             cudaMemcpyDeviceToHost);
+
+  cudaFree(label_array);
+  cudaFreeArray(img_array);
+  cuda_err = cudaGetLastError();
+  if (cuda_err != cudaSuccess) {
+    std::cerr << "failed to end block union find with CUDA: "
+              << cudaGetErrorString(cuda_err) << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+}  // namespace block_uf
+}  // namespace perception
+}  // namespace apollo
--- a/modules/perception/cuda_util/block_uf.h
+++ b/modules/perception/cuda_util/block_uf.h
+/******************************************************************************
+ * Copyright (c) 2014, Victor Matheus de Araujo Oliveira All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+/******************************************************************************
+ * Copyright 2018 The Apollo Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *****************************************************************************/
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#ifndef MODULES_PERCEPTION_OBSTACLE_CAMERA_LANE_POST_PROCESS_BLOCK_UF_H_
+#define MODULES_PERCEPTION_OBSTACLE_CAMERA_LANE_POST_PROCESS_BLOCK_UF_H_
+
+namespace apollo {
+namespace perception {
+namespace block_uf {
+
+#ifndef UF_BLOCK_WIDTH
+#define UF_BLOCK_WIDTH 32
+#endif
+
+#ifndef UF_BLOCK_HEIGHT
+#define UF_BLOCK_HEIGHT 16
+#endif
+
+__device__ int Find(int* parent, int i);
+
+__device__ void Union(int* parent, int i, int j);
+
+__global__ void BlockUnionFindInternal(int* label_array, int width, int height);
+
+__global__ void BlockUnionFindBoundary(int* label_array, int width, int height);
+
+__global__ void BlockUnionFindRoot(int* label_array, int width, int height);
+
+}  // namespace block_uf
+}  // namespace perception
+}  // namespace apollo
+
+#endif  // MODULES_PERCEPTION_OBSTACLE_CAMERA_LANE_POST_PROCESS_BLOCK_UF_H_
--- a/modules/perception/cuda_util/cmake/Eigen.cmake
+++ b/modules/perception/cuda_util/cmake/Eigen.cmake
+###########################################################################
+# Copyright 2018 The Apollo Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###########################################################################
+
+find_package(Eigen3 REQUIRED)
+include_directories(SYSTEM ${EIGEN3_INCLUDE_DIR})
+link_directories(${EIGEN3_LIBRARY_DIRS})
\ No newline at end of file
--- a/modules/perception/cuda_util/cmake/Packages.cmake
+++ b/modules/perception/cuda_util/cmake/Packages.cmake
@@ -4,5 +4,6 @@ list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)

 include(${CMAKE_SOURCE_DIR}/cmake/Cuda.cmake)
+include(${CMAKE_SOURCE_DIR}/cmake/Eigen.cmake)

-include_directories(SYSTEM ${CMAKE_SOURCE_DIR})
+include_directories(SYSTEM ${CMAKE_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/../../../)
--- a/modules/perception/cuda_util/connected_component_gpu.cu
+++ b/modules/perception/cuda_util/connected_component_gpu.cu
+/******************************************************************************
+ * Copyright 2018 The Apollo Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *****************************************************************************/
+
+#include "connected_component_gpu.h"
+
+#include "texture.h"
+#include "block_uf.h"
+
+namespace apollo {
+namespace perception {
+
+using std::shared_ptr;
+using std::unordered_set;
+using std::vector;
+
+ConnectedComponentGeneratorGPU::ConnectedComponentGeneratorGPU(int image_width,
+                                                               int image_height)
+    : image_width_(image_width),
+      image_height_(image_height),
+      width_(image_width),
+      height_(image_height),
+      roi_x_min_(0),
+      roi_y_min_(0),
+      roi_x_max_(image_width - 1),
+      roi_y_max_(image_height - 1) {
+  total_pix_ =
+      static_cast<size_t>(image_width_) * static_cast<size_t>(image_height_);
+  cudaChannelFormatDesc uchar_desc = cudaCreateChannelDesc<unsigned char>();
+  cudaMallocArray(&img_array_, &uchar_desc, static_cast<size_t>(width_),
+                  static_cast<size_t>(height_));
+  cudaBindTextureToArray(img_tex, img_array_, uchar_desc);
+  cudaMalloc(
+      reinterpret_cast<void**>(&label_array_),
+      static_cast<size_t>(width_) * static_cast<size_t>(height_) * sizeof(int));
+  cudaError_t cuda_err = cudaGetLastError();
+  if (cuda_err != cudaSuccess) {
+    std::cerr
+        << "failed to initialize 'img_array' and 'label_array' with CUDA: "
+        << cudaGetErrorString(cuda_err) << std::endl;
+  }
+  labels_ = static_cast<int*>(malloc(total_pix_ * sizeof(int)));
+  root_map_.reserve(total_pix_);
+}
+
+ConnectedComponentGeneratorGPU::ConnectedComponentGeneratorGPU(int image_width,
+                                                               int image_height,
+                                                               cv::Rect roi)
+    : image_width_(image_width),
+      image_height_(image_height),
+      width_(roi.width),
+      height_(roi.height),
+      roi_x_min_(roi.x),
+      roi_y_min_(roi.y),
+      roi_x_max_(roi.x + roi.width - 1),
+      roi_y_max_(roi.y + roi.height - 1) {
+  if (roi_x_min_ < 0) {
+    std::cerr << "x_min is less than 0: " << roi_x_min_ << std::endl;
+  }
+  if (roi_y_min_ < 0) {
+    std::cerr << "y_min is less than 0: " << roi_y_min_ << std::endl;
+  }
+  if (roi_x_max_ >= image_width_) {
+    std::cerr << "x_max is larger than image width: "
+              << roi_x_max_ << "|"
+              << image_width_ << std::endl;
+  }
+  if (roi_y_max_ >= image_height_) {
+    std::cerr << "y_max is larger than image height: "
+              << roi_y_max_ << "|"
+              << image_height_ << std::endl;
+  }
+  total_pix_ = static_cast<size_t>(width_) * static_cast<size_t>(height_);
+  cudaChannelFormatDesc uchar_desc = cudaCreateChannelDesc<unsigned char>();
+  cudaMallocArray(&img_array_, &uchar_desc, static_cast<size_t>(width_),
+                  static_cast<size_t>(height_));
+  cudaBindTextureToArray(img_tex, img_array_, uchar_desc);
+  cudaMalloc(
+      reinterpret_cast<void**>(&label_array_),
+      static_cast<size_t>(width_) * static_cast<size_t>(height_) * sizeof(int));
+  cudaError_t cuda_err = cudaGetLastError();
+  if (cuda_err != cudaSuccess) {
+    std::cerr << "failed to initialize 'img_array' and 'label_array' with CUDA: "
+              << cudaGetErrorString(cuda_err) << std::endl;
+  }
+  labels_ = static_cast<int*>(malloc(total_pix_ * sizeof(int)));
+  root_map_.reserve(total_pix_);
+}
+
+bool ConnectedComponentGeneratorGPU::BlockUnionFind(const unsigned char* img) {
+  cudaError_t cuda_err;
+
+  if (width_ == image_width_) {
+    size_t siz = static_cast<size_t>(width_) * static_cast<size_t>(height_) *
+                 sizeof(unsigned char);
+    cudaMemcpyToArray(img_array_, 0, 0, img, siz, cudaMemcpyHostToDevice);
+  } else {
+    size_t siz = static_cast<size_t>(width_) * sizeof(unsigned char);
+    for (size_t i = 0; i < static_cast<size_t>(height_); ++i) {
+      cudaMemcpyToArray(img_array_, 0, i, img, siz, cudaMemcpyHostToDevice);
+      img += image_width_;
+    }
+  }
+
+  dim3 block(UF_BLOCK_WIDTH, UF_BLOCK_HEIGHT);
+  dim3 grid(
+      static_cast<unsigned int>((width_ + UF_BLOCK_WIDTH - 1) / UF_BLOCK_WIDTH),
+      static_cast<unsigned int>((height_ + UF_BLOCK_HEIGHT - 1) /
+                                UF_BLOCK_HEIGHT));
+
+  cuda_err = cudaGetLastError();
+  if (cuda_err != cudaSuccess) {
+    std::cerr << "failed to start block union find with CUDA: "
+              << cudaGetErrorString(cuda_err) << std::endl;
+    return false;
+  }
+
+  cudaThreadSetCacheConfig(cudaFuncCachePreferShared);
+
+  block_uf::BlockUnionFindInternal<<<grid, block>>>(label_array_, width_,
+                                                    height_);
+
+  cudaThreadSetCacheConfig(cudaFuncCachePreferL1);
+
+  block_uf::BlockUnionFindBoundary<<<grid, block>>>(label_array_, width_,
+                                                    height_);
+
+  block_uf::BlockUnionFindRoot<<<grid, block>>>(label_array_, width_, height_);
+
+  cudaMemcpy(
+      labels_, label_array_,
+      static_cast<size_t>(width_) * static_cast<size_t>(height_) * sizeof(int),
+      cudaMemcpyDeviceToHost);
+
+  cuda_err = cudaGetLastError();
+  if (cuda_err != cudaSuccess) {
+    std::cerr << "failed to finish block union find with CUDA: "
+              << cudaGetErrorString(cuda_err) << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+bool ConnectedComponentGeneratorGPU::FindConnectedComponents(
+    const cv::Mat& lane_map, vector<shared_ptr<ConnectedComponent>>* cc) {
+  if (lane_map.empty()) {
+    std::cerr << "The input lane map is empty." << std::endl;
+    return false;
+  }
+  if (lane_map.type() != CV_8UC1) {
+    std::cerr << "The input lane map type is not CV_8UC1." << std::endl;
+    return false;
+  }
+
+  if (lane_map.cols != image_width_) {
+    std::cerr << "The width of input lane map does not match." << std::endl;
+    return false;
+  }
+  if (lane_map.rows != image_height_) {
+    std::cerr << "The height of input lane map does not match." << std::endl;
+    return false;
+  }
+
+  if (cc == NULL) {
+    std::cerr << "The pointer of output connected components is null."
+              << std::endl;
+    return false;
+  }
+
+  cc->clear();
+
+  const unsigned char* img =
+      lane_map.data + roi_y_min_ * image_width_ + roi_x_min_;
+
+  BlockUnionFind(img);
+
+  int cur_idx = 0;
+  int curt_label = 0;
+  int cc_count = 0;
+  root_map_.assign(total_pix_, -1);
+  for (int y = roi_y_min_; y <= roi_y_max_; ++y) {
+    for (int x = roi_x_min_; x <= roi_x_max_; ++x) {
+      curt_label = labels_[cur_idx];
+
+      if (curt_label >= 0) {
+        if (curt_label >= static_cast<int>(total_pix_)) {
+          std::cerr << "curt_label should be smaller than root_map.size() "
+                    << curt_label << " (" << total_pix_ << ")." << std::endl;
+          return false;
+        }
+        if (root_map_[curt_label] != -1) {
+          cc->at(root_map_[curt_label])->AddPixel(x, y);
+        } else {
+          cc->push_back(std::make_shared<ConnectedComponent>(x, y));
+          root_map_[curt_label] = cc_count++;
+        }
+      }
+
+      ++cur_idx;
+    }  // end for x
+  }    // end for y
+
+  std::cout << "#cc = " << cc_count << std::endl;
+
+  return true;
+}
+
+}  // namespace perception
+}  // namespace apollo
--- a/modules/perception/cuda_util/connected_component_gpu.h
+++ b/modules/perception/cuda_util/connected_component_gpu.h
+/******************************************************************************
+ * Copyright 2018 The Apollo Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *****************************************************************************/
+
+#ifndef MODULES_PERCEPTION_OBSTACLE_CAMERA_LANE_POST_PROCESS_CC_GPU_H_
+#define MODULES_PERCEPTION_OBSTACLE_CAMERA_LANE_POST_PROCESS_CC_GPU_H_
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <opencv2/core/core.hpp>
+
+#include <memory>
+#include <vector>
+#include <iostream>
+
+#include "modules/perception/obstacle/camera/lane_post_process/common/connected_component.h"
+
+namespace apollo {
+namespace perception {
+
+class ConnectedComponentGeneratorGPU {
+ public:
+  ConnectedComponentGeneratorGPU(int image_width, int image_height);
+  ConnectedComponentGeneratorGPU(int image_width, int image_height,
+                                 cv::Rect roi);
+
+  ~ConnectedComponentGeneratorGPU() {
+    cudaFree(label_array_);
+    cudaFreeArray(img_array_);
+
+    cudaError_t cuda_err = cudaGetLastError();
+    if (cuda_err != cudaSuccess) {
+      std::cerr << "failed to release label_array and img_array with CUDA: "
+                << cudaGetErrorString(cuda_err) << std::endl;
+    }
+
+    free(labels_);
+  }
+
+  bool FindConnectedComponents(
+      const cv::Mat& lane_map,
+      std::vector<std::shared_ptr<ConnectedComponent>>* cc);
+
+ private:
+  bool BlockUnionFind(const unsigned char* img);
+
+ private:
+  size_t total_pix_;
+  int image_width_;
+  int image_height_;
+
+  int width_;
+  int height_;
+  int roi_x_min_;
+  int roi_y_min_;
+  int roi_x_max_;
+  int roi_y_max_;
+
+  int* labels_ = NULL;
+  cudaArray* img_array_ = NULL;
+  int* label_array_ = NULL;
+  std::vector<int> root_map_;
+};
+
+}  // namespace perception
+}  // namespace apollo
+
+#endif  // MODULES_PERCEPTION_OBSTACLE_CAMERA_LANE_POST_PROCESS_CC_GPU_H_
--- a/modules/perception/cuda_util/texture.h
+++ b/modules/perception/cuda_util/texture.h
+/******************************************************************************
+ * Copyright (c) 2014, Victor Matheus de Araujo Oliveira All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+/******************************************************************************
+ * Copyright 2018 The Apollo Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *****************************************************************************/
+
+texture<unsigned char, 2, cudaReadModeElementType> img_tex;
--- a/modules/perception/cuda_util/util.h
+++ b/modules/perception/cuda_util/util.h
@@ -14,8 +14,9 @@
 * limitations under the License.
 *****************************************************************************/

-#ifndef MODULES_PERCEPTION_OBSTACLE_CAMERA_DETECTOR_COMMON_UTIL_H
-#define MODULES_PERCEPTION_OBSTACLE_CAMERA_DETECTOR_COMMON_UTIL_H
+#ifndef MODULES_PERCEPTION_OBSTACLE_CAMERA_DETECTOR_COMMON_UTIL_H_
+#define MODULES_PERCEPTION_OBSTACLE_CAMERA_DETECTOR_COMMON_UTIL_H_
+
 #include <cuda_runtime.h>
 #include <caffe/caffe.hpp>
 #include <memory>
@@ -110,6 +111,7 @@ class SyncedMemory {
  int gpu_device_;
  DISABLE_COPY_AND_ASSIGN(SyncedMemory);
 };  // class SyncedMemory
+
 int divup(int a, int b);

 void resize(cv::Mat frame, caffe::Blob<float> *dst,
@@ -123,4 +125,4 @@ void resize(cv::Mat frame, caffe::Blob<float> *dst,
 }  // namespace perception
 }  // namespace apollo

-#endif  // MODULES_PERCEPTION_OBSTACLE_CAMERA_DETECTOR_COMMON_UTIL_H
+#endif  // MODULES_PERCEPTION_OBSTACLE_CAMERA_DETECTOR_COMMON_UTIL_H_
--- a/modules/perception/obstacle/camera/detector/yolo_camera_detector/yolo_camera_detector.cc
+++ b/modules/perception/obstacle/camera/detector/yolo_camera_detector/yolo_camera_detector.cc
@@ -141,15 +141,16 @@ void YoloCameraDetector::load_intrinsic(

  offset_y_ = static_cast<int>(offset_ratio * image_height_ + .5);
  float roi_ratio = cropped_ratio * image_height_ / image_width_;
-  width_ = static_cast<int>(resized_width + aligned_pixel / 2) / aligned_pixel *
-           aligned_pixel;
+  width_ = static_cast<int>(resized_width + aligned_pixel / 2) /
+      aligned_pixel * aligned_pixel;
  height_ = static_cast<int>(width_ * roi_ratio + aligned_pixel / 2) /
-            aligned_pixel * aligned_pixel;
+      aligned_pixel * aligned_pixel;
  ADEBUG << "image_height=" << image_height_ << ", "
         << "image_width=" << image_width_ << ", "
         << "roi_ratio=" << roi_ratio;
  ADEBUG << "offset_y=" << offset_y_ << ", height=" << height_
         << ", width=" << width_;
+
  min_2d_height_ /= height_;

  int roi_w = image_width_;
@@ -186,8 +187,6 @@ bool YoloCameraDetector::init_cnn(const string &yolo_root) {

  vector<string> input_names;
  vector<string> output_names;
-  // init Net
-
  input_names.push_back(net_param.input_blob());
  output_names.push_back(net_param.loc_blob());
  output_names.push_back(net_param.obj_blob());

--- a/modules/perception/obstacle/camera/detector/yolo_camera_detector/yolo_camera_detector.h
+++ b/modules/perception/obstacle/camera/detector/yolo_camera_detector/yolo_camera_detector.h
@@ -94,9 +94,9 @@ class YoloCameraDetector : public BaseCameraDetector {
  std::shared_ptr<SyncedMemory> anchor_ = nullptr;
  int height_ = 0;
  int width_ = 0;
-  float min_2d_height_ = 0;
-  float min_3d_height_ = 0;
-  float top_k_ = 1000;
+  float min_2d_height_ = 0.0f;
+  float min_3d_height_ = 0.0f;
+  int top_k_ = 1000;
  int obj_size_ = 0;
  int output_height_ = 0;
  int output_width_ = 0;
@@ -108,9 +108,9 @@ class YoloCameraDetector : public BaseCameraDetector {
  std::vector<ObjectType> types_;
  int offset_y_ = 0;
  NMSParam nms_;
-  float inter_cls_nms_thresh_ = 1;
-  float cross_class_merge_threshold_ = 1;
-  float confidence_threshold_ = 0.1;
+  float inter_cls_nms_thresh_ = 1.0f;
+  float cross_class_merge_threshold_ = 1.0f;
+  float confidence_threshold_ = 0.1f;
  std::shared_ptr<BaseProjector> projector_;
  obstacle::yolo::YoloParam yolo_param_;
  int image_height_ = 0;

--- a/modules/perception/obstacle/camera/lane_post_process/cc_lane_post_processor/BUILD
+++ b/modules/perception/obstacle/camera/lane_post_process/cc_lane_post_processor/BUILD
@@ -30,6 +30,7 @@ cc_library(
        "//modules/common:log",
        "//modules/perception/lib/config_manager",
        "//modules/perception/obstacle/camera/common:util",
+        "//modules/perception/cuda_util",
        "//modules/perception/obstacle/camera/interface",
        "//modules/perception/proto:lane_post_process_config_lib_proto",
        "@eigen",

--- a/modules/perception/obstacle/camera/lane_post_process/cc_lane_post_processor/cc_lane_post_processor.cc
+++ b/modules/perception/obstacle/camera/lane_post_process/cc_lane_post_processor/cc_lane_post_processor.cc
@@ -208,8 +208,15 @@ bool CCLanePostProcessor::Init() {

  time_stamp_ = 0.0;
  frame_id_ = 0;
+
+#if CUDA_CC
+  cc_generator_.reset(
+      new ConnectedComponentGeneratorGPU(image_width_, image_height_, roi_));
+#else
  cc_generator_.reset(
      new ConnectedComponentGenerator(image_width_, image_height_, roi_));
+#endif
+
  cur_frame_.reset(new LaneFrame);

  is_init_ = true;

--- a/modules/perception/obstacle/camera/lane_post_process/cc_lane_post_processor/cc_lane_post_processor.h
+++ b/modules/perception/obstacle/camera/lane_post_process/cc_lane_post_processor/cc_lane_post_processor.h
@@ -34,6 +34,7 @@
 #include "modules/common/log.h"
 #include "modules/perception/lib/config_manager/config_manager.h"
 #include "modules/perception/obstacle/camera/common/util.h"
+#include "modules/perception/cuda_util/connected_component_gpu.h"
 #include "modules/perception/obstacle/camera/interface/base_lane_post_processor.h"
 #include "modules/perception/obstacle/camera/lane_post_process/cc_lane_post_processor/lane_frame.h"
 #include "modules/perception/obstacle/base/object_supplement.h"
@@ -41,6 +42,8 @@
 namespace apollo {
 namespace perception {

+#define CUDA_CC false
+
 struct CCLanePostProcessorOptions {
  SpaceType space_type;
  ScalarType lane_map_conf_thresh;
@@ -111,7 +114,13 @@ class CCLanePostProcessor : public BaseCameraLanePostProcessor {

  double time_stamp_ = 0.0;
  int frame_id_ = -1;
+
+#if CUDA_CC
+  std::shared_ptr<ConnectedComponentGeneratorGPU> cc_generator_;
+#else
  std::shared_ptr<ConnectedComponentGenerator> cc_generator_;
+#endif
+
  std::shared_ptr<LaneFrame> cur_frame_;
  LaneInstancesPtr cur_lane_instances_;


--- a/modules/perception/obstacle/camera/lane_post_process/common/BUILD
+++ b/modules/perception/obstacle/camera/lane_post_process/common/BUILD
@@ -5,7 +5,10 @@ package(default_visibility = ["//visibility:public"])
 cc_library(
    name = "type",
    srcs = ["type.cc"],
-    hdrs = ["type.h"],
+    hdrs = [
+        "type.h",
+        "base_type.h",
+    ],
    deps = [
        "//modules/common:log",
        "//modules/perception/proto:perception_proto",
@@ -27,10 +30,12 @@ cc_library(
 cc_library(
    name = "connected_component",
    srcs = ["connected_component.cc"],
-    hdrs = ["connected_component.h"],
+    hdrs = [
+        "connected_component.h",
+        "base_type.h",
+    ],
    deps = [
-        ":type",
-        "//modules/common:log",
+        #"//modules/common:log",
        "@eigen",
        "@opencv2//:core",
    ],

--- a/modules/perception/obstacle/camera/lane_post_process/common/base_type.h
+++ b/modules/perception/obstacle/camera/lane_post_process/common/base_type.h
+/******************************************************************************
+ * Copyright 2018 The Apollo Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *****************************************************************************/
+
+#ifndef MODULES_PERCEPTION_OBSTACLE_CAMERA_LANE_POST_PROCESS_BASE_TYPE_H_
+#define MODULES_PERCEPTION_OBSTACLE_CAMERA_LANE_POST_PROCESS_BASE_TYPE_H_
+
+namespace apollo {
+namespace perception {
+
+typedef float ScalarType;
+
+}  // namespace perception
+}  // namespace apollo
+
+#endif  // MODULES_PERCEPTION_OBSTACLE_CAMERA_LANE_POST_PROCESS_BASE_TYPE_H_
--- a/modules/perception/obstacle/camera/lane_post_process/common/connected_component.cc
+++ b/modules/perception/obstacle/camera/lane_post_process/common/connected_component.cc
@@ -14,13 +14,13 @@
 * limitations under the License.
 *****************************************************************************/

+#include "modules/perception/obstacle/camera/lane_post_process/common/connected_component.h"
+
 #include <algorithm>
 #include <cmath>
 #include <limits>
 #include <utility>

-#include "modules/perception/obstacle/camera/lane_post_process/common/connected_component.h"
-
 namespace apollo {
 namespace perception {

@@ -244,9 +244,9 @@ void ConnectedComponent::FindVertices() {
          vertices_->push_back(Vertex(p->x, p->y));
        }
      } else {
-        AERROR << "the point "
-               << "(" << p->x << ", " << p->y << ")"
-               << " is not on bounding box.";
+        std::cerr << "the point "
+                  << "(" << p->x << ", " << p->y << ")"
+                  << " is not on bounding box." << std::endl;
      }
    }
  }
@@ -584,7 +584,6 @@ void ConnectedComponent::SplitContourHorizontal(int len_split,
  }
 }

-// version 2
 void ConnectedComponent::SplitContour(int split_len) {
  if (bbox_.split == BoundingBoxSplitType::NONE) {
    return;
@@ -616,7 +615,8 @@ void ConnectedComponent::SplitContour(int split_len) {
    }

  } else {
-    AERROR << "unknown bounding box split type: " << bbox_.split;
+    std::cerr << "unknown bounding box split type: "
+              << bbox_.split << std::endl;
  }
 }

@@ -631,13 +631,14 @@ void ConnectedComponent::Process(ScalarType split_siz, int split_len) {
  }
 }

-/** split a CC into several smaller ones **/
 vector<int> ConnectedComponent::GetSplitRanges(int siz, int len_split) {
  if (siz <= 0) {
-    AERROR << "siz should be a positive number: " << siz;
+    std::cerr << "siz should be a positive number: "
+              << siz << std::endl;
  }
  if (len_split <= 0) {
-    AERROR << "len_split should be a positive number: " << len_split;
+    std::cerr << "len_split should be a positive number: "
+              << len_split << std::endl;
  }

  int num_split = siz / len_split;
@@ -654,7 +655,7 @@ vector<int> ConnectedComponent::GetSplitRanges(int siz, int len_split) {
  return lens;
 }

-/* connected component generator */
+/** connected component generator **/
 ConnectedComponentGenerator::ConnectedComponentGenerator(int image_width,
                                                         int image_height)
    : image_width_(image_width),
@@ -667,19 +668,8 @@ ConnectedComponentGenerator::ConnectedComponentGenerator(int image_width,
      roi_y_max_(image_height - 1) {
  total_pix_ =
      static_cast<size_t>(image_width_) * static_cast<size_t>(image_height_);
-#if CUDA_CC
-  cudaChannelFormatDesc uchar_desc = cudaCreateChannelDesc<unsigned char>();
-  cudaMallocArray(&img_array_, &uchar_desc, static_cast<size_t>(width_),
-                  static_cast<size_t>(height_));
-  cudaBindTextureToArray(img_tex, img_array_, uchar_desc);
-  cudaMalloc(
-      reinterpret_cast<void**>(&label_array_),
-      static_cast<size_t>(width_) * static_cast<size_t>(height_) * sizeof(int));
-  labels_ = static_cast<int*>(malloc(total_pix_ * sizeof(int)));
-#else
  labels_.Init(total_pix_);
  frame_label_.resize(total_pix_, -1);
-#endif
  root_map_.reserve(total_pix_);
 }

@@ -695,67 +685,48 @@ ConnectedComponentGenerator::ConnectedComponentGenerator(int image_width,
      roi_x_max_(roi.x + roi.width - 1),
      roi_y_max_(roi.y + roi.height - 1) {
  if (roi_x_min_ < 0) {
-    AERROR << "x_min is less than zero: " << roi_x_min_;
+    std::cerr << "x_min is less than zero: " << roi_x_min_ << std::endl;
  }
  if (roi_y_min_ < 0) {
-    AERROR << "y_min is less than zero: " << roi_y_min_;
+    std::cerr << "y_min is less than zero: " << roi_y_min_ << std::endl;
  }
  if (roi_x_max_ >= image_width_) {
-    AERROR << "x_max is larger than image width: " << roi_x_max_ << "|"
-           << image_width_;
+    std::cerr << "x_max is larger than image width: " << roi_x_max_ << "|"
+              << image_width_ << std::endl;
  }
  if (roi_y_max_ >= image_height_) {
-    AERROR << "y_max is larger than image height: " << roi_y_max_ << "|"
-           << image_height_;
+    std::cerr << "y_max is larger than image height: " << roi_y_max_ << "|"
+              << image_height_ << std::endl;
  }
  total_pix_ = static_cast<size_t>(width_) * static_cast<size_t>(height_);
-#if _CUDA_CC
-  cudaChannelFormatDesc uchar_desc = cudaCreateChannelDesc<unsigned char>();
-  img_array_ = NULL;
-  cudaMallocArray(&img_array_, &uchar_desc, static_cast<size_t>(width_),
-                  static_cast<size_t>(height_));
-  cudaBindTextureToArray(img_tex, img_array_, uchar_desc);
-
-  cudaMalloc(
-      reinterpret_cast<void**>(&label_array_),
-      static_cast<size_t>(width_) * static_cast<size_t>(height_) * sizeof(int));
-
-  cudaError_t cuda_err = cudaGetLastError();
-  if (cuda_err != cudaSuccess) {
-    AERROR << "failed to initialize 'img_array' and 'label_array' with CUDA: "
-           << cudaGetErrorString(cuda_err);
-  }
-
-  labels_ = static_cast<int*>(malloc(total_pix_ * sizeof(int)));
-#else
  labels_.Init(total_pix_);
  frame_label_.resize(total_pix_, -1);
-#endif
  root_map_.reserve(total_pix_);
 }

 bool ConnectedComponentGenerator::FindConnectedComponents(
    const cv::Mat& lane_map, vector<shared_ptr<ConnectedComponent>>* cc) {
  if (lane_map.empty()) {
-    AERROR << "input lane map is empty";
+    std::cerr << "input lane map is empty" << std::endl;
    return false;
  }
  if (lane_map.type() != CV_8UC1) {
-    AERROR << "input lane map type is not CV_8UC1";
+    std::cerr << "input lane map type is not CV_8UC1" << std::endl;
    return false;
  }

  if (lane_map.cols != image_width_) {
-    AERROR << "The width of input lane map does not match";
+    std::cerr << "The width of input lane map does not match" << std::endl;
    return false;
  }
  if (lane_map.rows != image_height_) {
-    AERROR << "The height of input lane map does not match";
+    std::cerr << "The height of input lane map does not match" << std::endl;
    return false;
  }

  if (cc == NULL) {
-    AERROR << "the pointer of output connected components is null.";
+    std::cerr << "the pointer of output connected components is null."
+              << std::endl;
    return false;
  }

@@ -820,10 +791,10 @@ bool ConnectedComponentGenerator::FindConnectedComponents(
    prev_p = cur_p;
  }  // end for y
  if (root_map_.size() != labels_.Num()) {
-    AERROR << "the size of root map and labels are not equal.";
+    std::cerr << "the size of root map and labels are not equal." << std::endl;
    return false;
  }
-  AINFO << "subset number = " << labels_.Size();
+  std::cout << "subset number = " << labels_.Size() << std::endl;

  // second loop logic
  cur_idx = 0;
@@ -834,14 +805,14 @@ bool ConnectedComponentGenerator::FindConnectedComponents(
      curt_label = frame_label_[cur_idx];
      if (curt_label >= 0) {
        if (curt_label >= static_cast<int>(labels_.Num())) {
-          AERROR << "curt_label should be smaller than labels.num(): "
-                 << curt_label << " vs. " << labels_.Num();
+          std::cerr << "curt_label should be smaller than labels.num(): "
+                    << curt_label << " vs. " << labels_.Num() << std::endl;
          return false;
        }
        curt_label = labels_.Find(curt_label);
        if (curt_label >= static_cast<int>(root_map_.size())) {
-          AERROR << "curt_label should be smaller than root_map.size() "
-                 << curt_label << " vs. " << root_map_.size();
+          std::cerr << "curt_label should be smaller than root_map.size() "
+                    << curt_label << " vs. " << root_map_.size() << std::endl;
          return false;
        }
        if (root_map_[curt_label] != -1) {
@@ -853,7 +824,7 @@ bool ConnectedComponentGenerator::FindConnectedComponents(
      }
    }  // end for x
  }    // end for y
-  AINFO << "cc number = " << cc_count;
+  std::cout << "The number of cc = " << cc_count << std::endl;

  return true;
 }

--- a/modules/perception/obstacle/camera/lane_post_process/common/connected_component.h
+++ b/modules/perception/obstacle/camera/lane_post_process/common/connected_component.h
@@ -24,9 +24,10 @@
 #include <string>
 #include <unordered_set>
 #include <vector>
+#include <iostream>

-#include "modules/common/log.h"
-#include "modules/perception/obstacle/camera/lane_post_process/common/type.h"
+// #include "modules/common/log.h"
+#include "modules/perception/obstacle/camera/lane_post_process/common/base_type.h"

 namespace apollo {
 namespace perception {
@@ -39,8 +40,6 @@ namespace perception {
 #define NUM_RESERVE_EDGES 6
 #endif

-#define CUDA_CC false
-
 class DisjointSet {
 public:
  DisjointSet() : subset_num_(0) {}
@@ -186,6 +185,35 @@ class ConnectedComponent {

  // CC pixels
  void AddPixel(int x, int y);
+  /*
+  void AddPixel(int x, int y) {
+    if (pixel_count_ == 0) {
+      // new bounding box
+      bbox_.x_min = x;  // x_min
+      bbox_.y_min = y;  // y_min
+      bbox_.x_max = x;  // x_max
+      bbox_.y_max = y;  // y_max
+    } else {
+      // extend bounding box if necessary
+      if (x < bbox_.x_min) {
+        bbox_.x_min = x;
+      }
+      if (x > bbox_.x_max) {
+        bbox_.x_max = x;
+      }
+      if (y < bbox_.y_min) {
+        bbox_.y_min = y;
+      }
+      if (y > bbox_.y_max) {
+        bbox_.y_max = y;
+      }
+    }
+
+    pixels_->push_back(cv::Point(x, y));
+    pixel_count_++;
+  }
+  */
+
  int GetPixelCount() const { return pixel_count_; }
  std::shared_ptr<const std::vector<cv::Point2i>> GetPixels() const {
    return pixels_;
@@ -302,30 +330,10 @@ class ConnectedComponentGenerator {
  ConnectedComponentGenerator(int image_width, int image_height);
  ConnectedComponentGenerator(int image_width, int image_height, cv::Rect roi);

-  ~ConnectedComponentGenerator() {
-#if CUDA_CC
-    cudaFree(label_array_);
-    cudaFreeArray(img_array_);
-
-    cudaError_t cuda_err = cudaGetLastError();
-    if (cuda_err != cudaSuccess) {
-      AERROR << "failed to release label_array and img_array with CUDA: "
-             << cudaGetErrorString(cuda_err);
-    }
-
-    free(labels_);
-#endif
-  }
-
  bool FindConnectedComponents(
      const cv::Mat& lane_map,
      std::vector<std::shared_ptr<ConnectedComponent>>* cc);

- private:
-#if CUDA_CC
-  bool BlockUnionFind(const unsigned char* img);
-#endif
-
 private:
  size_t total_pix_;
  int image_width_;
@@ -338,14 +346,8 @@ class ConnectedComponentGenerator {
  int roi_x_max_;
  int roi_y_max_;

-#if CUDA_CC
-  int* labels_;
-  cudaArray* img_array_;
-  int* label_array_;
-#else
  DisjointSet labels_;
  std::vector<int> frame_label_;
-#endif
  std::vector<int> root_map_;
 };


--- a/modules/perception/obstacle/camera/lane_post_process/common/type.h
+++ b/modules/perception/obstacle/camera/lane_post_process/common/type.h
@@ -25,8 +25,8 @@
 #include "opencv2/opencv.hpp"

 #include "modules/perception/proto/perception_obstacle.pb.h"
-
 #include "modules/common/log.h"
+#include "modules/perception/obstacle/camera/lane_post_process/common/base_type.h"

 #ifndef MODULES_PERCEPTION_OBSTACLE_CAMERA_LANE_POST_PROCESS_COMMON_TYPE_H_
 #define MODULES_PERCEPTION_OBSTACLE_CAMERA_LANE_POST_PROCESS_COMMON_TYPE_H_
@@ -38,14 +38,6 @@ namespace perception {
 #define MAX_LANE_HISTORY 10
 #endif

-#ifndef UF_BLOCK_WIDTH
-#define UF_BLOCK_WIDTH 32
-#endif
-
-#ifndef UF_BLOCK_HEIGHT
-#define UF_BLOCK_HEIGHT 16
-#endif
-
 #ifndef MAX_GROUP_PREDICTION_MARKER_NUM
 #define MAX_GROUP_PREDICTION_MARKER_NUM 10
 #endif
@@ -66,11 +58,6 @@ namespace perception {
 #define AVEAGE_LANE_WIDTH_METER 3.7
 #endif

-
-
-typedef float ScalarType;
-
-
 constexpr ScalarType INVERSE_AVEAGE_LANE_WIDTH_METER
                     = 1.0 / AVEAGE_LANE_WIDTH_METER;

@@ -214,7 +201,7 @@ struct LaneInstance {
  }
 };

-struct L3CubicCurve {
+struct CubicCurve {
  float x_start;
  float x_end;
  float a;
@@ -223,6 +210,7 @@ struct L3CubicCurve {
  float d;
 };

+/*
 struct L3LaneInfo {
  int lane_id;
  int left_idx;
@@ -231,6 +219,7 @@ struct L3LaneInfo {
  int carleft_idx;
  int carright_idx;
 };
+*/

 struct LaneObject {
  LaneObject() {
@@ -256,9 +245,9 @@ struct LaneObject {
  PolyModel model;
  ScalarType lateral_distance = 0.0;

-  L3CubicCurve pos_curve;
-  L3CubicCurve img_curve;
-  L3LaneInfo lane_info;
+  CubicCurve pos_curve;
+  CubicCurve img_curve;
+  // L3LaneInfo lane_info;
  double timestamp = 0.0;
  int32_t seq_num = 0;

@@ -298,6 +287,7 @@ struct LaneObject {
  }
 };

+/*
 // struct for L3 Lane information
 struct L3LaneLine {
  SpatialLabelType spatial;
@@ -312,6 +302,7 @@ struct RoadInfo {
  std::vector<L3LaneLine> lane_line_vec;
  std::vector<L3LaneInfo> lane_vec;
 };
+*/

 typedef std::vector<LaneObject> LaneObjects;
 typedef std::shared_ptr<LaneObjects> LaneObjectsPtr;