From 730bdcd5e1f16f469ba3394b54357c556ba28952 Mon Sep 17 00:00:00 2001
From: BohaoWu <37072443+BohaoWu@users.noreply.github.com>
Date: Mon, 12 Oct 2020 16:21:06 +0800
Subject: [PATCH] Add nvdec-extractframe code and adjust code style.

---
 core/preprocess/nvdec-extractframe/README.md  |   1 +
 .../include/ExtractFrameBGRARaw.h             |  63 ++++++
 .../include/ExtractFrameBase.h                | 172 +++++++++++++++
 .../include/ExtractFrameJpeg.h                | 104 +++++++++
 .../pybind/pybind_frame_extract.cpp           |  53 +++++
 .../src/ExtractFrameBGRARaw.cpp               | 120 +++++++++++
 .../src/ExtractFrameBase.cpp                  |  61 ++++++
 .../src/ExtractFrameJpeg.cpp                  | 203 ++++++++++++++++++
 .../nvdec-extractframe/src/main.cpp           |  98 +++++++++
 9 files changed, 875 insertions(+)
 create mode 100644 core/preprocess/nvdec-extractframe/README.md
 create mode 100644 core/preprocess/nvdec-extractframe/include/ExtractFrameBGRARaw.h
 create mode 100644 core/preprocess/nvdec-extractframe/include/ExtractFrameBase.h
 create mode 100644 core/preprocess/nvdec-extractframe/include/ExtractFrameJpeg.h
 create mode 100644 core/preprocess/nvdec-extractframe/pybind/pybind_frame_extract.cpp
 create mode 100644 core/preprocess/nvdec-extractframe/src/ExtractFrameBGRARaw.cpp
 create mode 100644 core/preprocess/nvdec-extractframe/src/ExtractFrameBase.cpp
 create mode 100644 core/preprocess/nvdec-extractframe/src/ExtractFrameJpeg.cpp
 create mode 100644 core/preprocess/nvdec-extractframe/src/main.cpp
diff --git a/core/preprocess/nvdec-extractframe/README.md b/core/preprocess/nvdec-extractframe/README.md
new file mode 100644
index 00000000..72458214
--- /dev/null
+++ b/core/preprocess/nvdec-extractframe/README.md
@@ -0,0 +1 @@
+# hw-frame-extract
diff --git a/core/preprocess/nvdec-extractframe/include/ExtractFrameBGRARaw.h b/core/preprocess/nvdec-extractframe/include/ExtractFrameBGRARaw.h
new file mode 100644
index 00000000..739e516b
--- /dev/null
+++ b/core/preprocess/nvdec-extractframe/include/ExtractFrameBGRARaw.h
@@ -0,0 +1,63 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*******************************************
+ *
+ * Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
+ *
+ ******************************************/
+/**
+ * @file ExtractFrameBGRARaw.h
+ * @author chengang06@baidu.com
+ * @date 2020-04-15
+ **/
+#ifndef CORE_PREPROCESS_NVDEC_EXTRACTFRAME_INCLUDE_EXTRACTFRAMEBGRARAW_H_
+#define CORE_PREPROCESS_NVDEC_EXTRACTFRAME_INCLUDE_EXTRACTFRAMEBGRARAW_H_
+#include <string>
+
+#include "ExtractFrameBase.h"
+namespace baidu {
+namespace xvision {
+class ExtractFrameBGRARaw : public ExtractFrameBase {
+  /**
+   * @name:
+   *     ExtractFrameBGRARaw
+   * @author:
+   *     chengang06@baidu.com
+   * @main feature:
+   *     extract video and output bgr raw data
+   * @example:
+   *
+   **/
+ public:
+  explicit ExtractFrameBGRARaw(int gpu_index) : ExtractFrameBase(gpu_index) {}
+  ExtractFrameBGRARaw() {}
+  /**
+   * @Name:
+   *     extract_frame
+   * @Feature:
+   *     extract video frame frames from video file,
+   * @params
+   *     file_path: video local path
+   *     n:         n frames per second
+   * @returns
+   *     IMGDataList
+   **/
+  IMGDataList extract_frame(const std::string &file_path,
+                            int n = 1,
+                            int count = 200);
+};
+}  // namespace xvision
+}  // namespace baidu
+#endif  // CORE_PREPROCESS_NVDEC_EXTRACTFRAME_INCLUDE_EXTRACTFRAMEBGRARAW_H_
diff --git a/core/preprocess/nvdec-extractframe/include/ExtractFrameBase.h b/core/preprocess/nvdec-extractframe/include/ExtractFrameBase.h
new file mode 100644
index 00000000..382aee6b
--- /dev/null
+++ b/core/preprocess/nvdec-extractframe/include/ExtractFrameBase.h
@@ -0,0 +1,172 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*******************************************
+ *
+ * Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
+ *
+ ******************************************/
+/**
+ * @file ExtractFrameBase.h
+ * @author chengang06@baidu.com
+ * @date 2020-04-15
+ **/
+#include <cuda.h>
+
+#include <list>
+#include <memory>
+#include <string>
+
+#include "NvDecoder/NvDecoder.h"
+#include "Utils/ColorSpace.h"
+#include "Utils/FFmpegDemuxer.h"
+#ifndef CORE_PREPROCESS_NVDEC_EXTRACTFRAME_INCLUDE_EXTRACTFRAMEBASE_H_
+#define CORE_PREPROCESS_NVDEC_EXTRACTFRAME_INCLUDE_EXTRACTFRAMEBASE_H_
+namespace baidu {
+namespace xvision {
+
+class FrameResult {
+  /**
+   * @name:
+   *     FrameResult
+   * @author:
+   *     chengang06@baidu.com
+   * @main feature:
+   *     store image info
+   * @example:
+   *     None
+   **/
+ public:
+  // constructor
+  FrameResult() {
+    _height = 0;
+    _width = 0;
+    _p_frame = nullptr;
+    _cols = 0;
+    _rows = 0;
+    _thick = 0;
+  }
+  /*
+  FrameResult deepcopy() const {
+      FrameResult tmp;
+      std::cout << "copy constructor called" << std::endl;
+      tmp.set_height(_height);
+      tmp.set_width(_width);
+      tmp.set_cols(_cols);
+      tmp.set_rows(_rows);
+      tmp.set_thick(_thick);
+
+      auto frame_tmp = new uint8_t(this -> get_frame_buff_len());
+      std::copy(this -> get_frame(), this -> get_frame() + this ->
+  get_frame_buff_len(), frame_tmp);
+      tmp.set_frame_buffer(frame_tmp);
+
+  }
+  */
+  // attributes
+  size_t rows() const { return _rows; }
+  size_t cols() const { return _cols; }
+  size_t thick() const { return _thick; }
+  size_t height() const { return _height; }
+  size_t width() const { return _width; }
+  size_t len() const { return _rows * _cols * _thick; }
+  void set_rows(const size_t rows) { _rows = rows; }
+  void set_cols(const size_t cols) { _cols = cols; }
+  void set_thick(const size_t thick) { _thick = thick; }
+  void set_width(const size_t width) { _width = width; }
+  void set_height(const size_t height) { _height = height; }
+  // free buffer data manually
+
+  void free_memory() {
+    if (_p_frame) {
+      delete[] _p_frame;
+      _p_frame = nullptr;
+    }
+    _height = 0;
+    _width = 0;
+    _rows = 0;
+    _cols = 0;
+    _thick = 0;
+  }
+  // set frame buffer
+  void set_frame_buffer(uint8_t* buff) {
+    if (_p_frame) {
+      delete[] _p_frame;
+      _p_frame = nullptr;
+    }
+    _p_frame = buff;
+  }
+  // get frame buffer
+  uint8_t* get_frame() const { return _p_frame; }
+  size_t get_frame_buff_len() const { return _rows * _cols * _thick; }
+  virtual ~FrameResult() {}
+
+ private:
+  uint8_t* _p_frame;
+  size_t _height, _width;       // pic width and height
+  size_t _rows, _cols, _thick;  // buffer sizes
+};
+typedef std::list<FrameResult> IMGDataList;
+class ExtractFrameBase {
+  /**
+   * @name:
+   *     ExtractFrameBase
+   * @author:
+   *     chengang06@baidu.com
+   * @main feature:
+   *     base class for extract frame
+   * @example:
+   *
+   **/
+ public:
+  explicit ExtractFrameBase(int gpu_index) {
+    this->gpu_index = gpu_index;
+    p_cu_context = nullptr;
+  }
+  ExtractFrameBase() {
+    gpu_index = 0;
+    p_cu_context = nullptr;
+  }
+  virtual int init();
+  virtual IMGDataList extract_frame(const std::string& file_path,
+                                    int n = 1,
+                                    int count = 200) = 0;
+  virtual ~ExtractFrameBase() {
+    if (p_cu_context != nullptr) {
+      cuCtxDestroy(p_cu_context);
+    }
+  }
+  /**
+   * @name select_frame
+   * @param, frame_rate, double like 25.00,
+   * @param, pre_frame_time, int_64, last selected frame timestamp
+   * @param, cur_frame_time, int_64, current frame timestamp
+   * @param, fps, extract frame num per seconds
+   * @param, mode, 0, use time stamp to select frame, 1, use framerate to select
+   *frame
+   **/
+  static bool select_frame(const double frame_rate,
+                           const int64_t pre_frame_time,
+                           const int64_t cur_frame_time,
+                           const size_t frame_index,
+                           const double fps,
+                           const int mode);
+
+ protected:
+  int gpu_index;  // gpu index
+  CUcontext p_cu_context;
+};
+}  // namespace xvision
+}  // namespace baidu
+#endif  // CORE_PREPROCESS_NVDEC_EXTRACTFRAME_INCLUDE_EXTRACTFRAMEBASE_H_
diff --git a/core/preprocess/nvdec-extractframe/include/ExtractFrameJpeg.h b/core/preprocess/nvdec-extractframe/include/ExtractFrameJpeg.h
new file mode 100644
index 00000000..2c49a3cf
--- /dev/null
+++ b/core/preprocess/nvdec-extractframe/include/ExtractFrameJpeg.h
@@ -0,0 +1,104 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*******************************************
+ *
+ * Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
+ *
+ ******************************************/
+/**
+ * @file ExtractFrameJpeg.h
+ * @author chengang06@baidu.com
+ * @date 2020-04-20
+ **/
+#ifndef CORE_PREPROCESS_NVDEC_EXTRACTFRAME_INCLUDE_EXTRACTFRAMEJPEG_H_
+#define CORE_PREPROCESS_NVDEC_EXTRACTFRAME_INCLUDE_EXTRACTFRAMEJPEG_H_
+#include <nvjpeg.h>
+
+#include <string>
+
+#include "ExtractFrameBase.h"
+namespace baidu {
+namespace xvision {
+class ExtractFrameJpeg : public ExtractFrameBase {
+  /**
+   * @name:
+   *     ExtractFrameJpeg
+   * @author:
+   *     chengang06@baidu.com
+   * @main feature:
+   *     extract video and output jpeg format data
+   * @example:
+   *
+   **/
+
+ public:
+  explicit ExtractFrameJpeg(int gpu_index) : ExtractFrameBase(gpu_index) {}
+  ExtractFrameJpeg() {}
+  /**
+   * @Name:
+   *     extract_frame
+   * @Feature:
+   *     extract video frame frames from video file,
+   * @params
+   *     file_path: video local path
+   *     n:         n frames per second
+   * @returns
+   *     IMGDataList
+   **/
+  IMGDataList extract_frame(const std::string& file_path,
+                            int n = 1,
+                            int count = 200);
+
+  int init();
+  virtual ~ExtractFrameJpeg() {
+    if (_nv_enc_params) {
+      nvjpegEncoderParamsDestroy(_nv_enc_params);
+    }
+    if (_nv_enc_state) {
+      nvjpegEncoderStateDestroy(_nv_enc_state);
+    }
+    if (_nv_jpeg_handler) {
+      nvjpegDestroy(_nv_jpeg_handler);
+    }
+    if (_cuda_stream) {
+      cudaStreamDestroy(_cuda_stream);
+    }
+  }
+
+ private:
+  /**
+   * @Name:
+   *     jpeg_encode
+   * @Feature:
+   *     use cuda to encode jpeg picture
+   * @params
+   *     p_image, rgba raw pointer, can be memory on gpu card
+   *     width, height: image size info
+   *     FrameResult result, output, jpeg picture
+   * @returns
+   *
+   **/
+  int jpeg_encode(uint8_t* p_image,
+                  int width,
+                  int height,
+                  const FrameResult& result);
+  nvjpegHandle_t _nv_jpeg_handler;
+  nvjpegEncoderState_t _nv_enc_state;
+  nvjpegEncoderParams_t _nv_enc_params;
+  cudaStream_t _cuda_stream;
+};
+}  // namespace xvision
+}  // namespace baidu
+#endif  // CORE_PREPROCESS_NVDEC_EXTRACTFRAME_INCLUDE_EXTRACTFRAMEJPEG_H_
diff --git a/core/preprocess/nvdec-extractframe/pybind/pybind_frame_extract.cpp b/core/preprocess/nvdec-extractframe/pybind/pybind_frame_extract.cpp
new file mode 100644
index 00000000..c321eb2b
--- /dev/null
+++ b/core/preprocess/nvdec-extractframe/pybind/pybind_frame_extract.cpp
@@ -0,0 +1,53 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <pybind11/buffer_info.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include "ExtractFrameBGRARaw.h"
+#include "ExtractFrameJpeg.h"
+simplelogger::Logger* logger =
+    simplelogger::LoggerFactory::CreateConsoleLogger();
+
+PYBIND11_MODULE(hwextract, m) {
+  pybind11::class_<baidu::xvision::ExtractFrameJpeg>(m, "HwExtractFrameJpeg")
+      .def(pybind11::init<int>())
+      .def("init_handler", &baidu::xvision::ExtractFrameJpeg::init)
+      .def("extract_frame", &baidu::xvision::ExtractFrameJpeg::extract_frame);
+  pybind11::class_<baidu::xvision::ExtractFrameBGRARaw>(m,
+                                                        "HwExtractFrameBGRARaw")
+      .def(pybind11::init<int>())
+      .def("init_handler", &baidu::xvision::ExtractFrameBGRARaw::init)
+      .def("extract_frame",
+           &baidu::xvision::ExtractFrameBGRARaw::extract_frame);
+  pybind11::class_<baidu::xvision::FrameResult>(
+      m, "HwFrameResult", pybind11::buffer_protocol())
+      .def(pybind11::init())
+      .def("height", &baidu::xvision::FrameResult::height)
+      .def("width", &baidu::xvision::FrameResult::width)
+      .def("len", &baidu::xvision::FrameResult::len)
+      .def("free_memory",
+           &baidu::xvision::FrameResult::free_memory)  // for gcc 4.8.2 , this
+                                                       // must be called ,both
+                                                       // in cpp or python
+      .def_buffer([](baidu::xvision::FrameResult& m) -> pybind11::buffer_info {
+        return pybind11::buffer_info(
+            m.get_frame(),
+            sizeof(uint8_t),
+            pybind11::format_descriptor<uint8_t>::format(),
+            2,
+            {m.rows(), m.cols()},
+            {sizeof(uint8_t) * m.cols(), sizeof(uint8_t)});
+      });
+}
diff --git a/core/preprocess/nvdec-extractframe/src/ExtractFrameBGRARaw.cpp b/core/preprocess/nvdec-extractframe/src/ExtractFrameBGRARaw.cpp
new file mode 100644
index 00000000..87b648b3
--- /dev/null
+++ b/core/preprocess/nvdec-extractframe/src/ExtractFrameBGRARaw.cpp
@@ -0,0 +1,120 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ExtractFrameBGRARaw.h"
+#include <nppi.h>
+#include <memory>
+#include <string>
+baidu::xvision::IMGDataList baidu::xvision::ExtractFrameBGRARaw::extract_frame(
+    const std::string &file_path, int n, int count) {
+  FFmpegDemuxer demuxer(file_path.c_str());
+  NvDecoder nvdec(
+      p_cu_context, true, FFmpeg2NvCodecId(demuxer.GetVideoCodec()));
+  size_t n_width = demuxer.GetWidth();
+  size_t n_height = demuxer.GetHeight();
+  double frame_rate = demuxer.GetFrameRate();
+  size_t n_frame_size = n_height * n_width * 4;
+  uint8_t *p_video = nullptr;
+  uint8_t **pp_frame = nullptr;
+  CUdeviceptr p_tmp_image = 0;
+  cuMemAlloc(&p_tmp_image, n_frame_size);
+
+  int n_video_bytes = 0;
+  int frame_count = -1;
+  int frame_returned = 0;
+  int64_t cur_frame_time(0), pre_frame_time(0), pts(0);
+  int64_t *p_timestamp = nullptr;
+  if (n == 0) {
+    n = 1000;
+  }
+  IMGDataList result_list;
+  do {
+    demuxer.Demux(&p_video, &n_video_bytes, &pts);
+    nvdec.Decode(p_video,
+                 n_video_bytes,
+                 &pp_frame,
+                 &frame_returned,
+                 0,
+                 &p_timestamp,
+                 pts);
+    for (auto i = 0; i < frame_returned; ++i) {
+      cur_frame_time = p_timestamp[i];
+      frame_count += 1;
+      if (!select_frame(
+              frame_rate, pre_frame_time, cur_frame_time, frame_count, n, 0)) {
+        continue;
+      }
+      pre_frame_time = cur_frame_time;
+      FrameResult fm_tmp;
+      fm_tmp.set_rows(n_height);
+      fm_tmp.set_cols(n_width);
+      fm_tmp.set_thick(4);
+      fm_tmp.set_frame_buffer(new uint8_t[n_frame_size]);
+      fm_tmp.set_height(n_height);
+      fm_tmp.set_width(n_width);
+      result_list.push_back(fm_tmp);
+      if (nvdec.GetBitDepth() == 8) {
+        if (nvdec.GetOutputFormat() == cudaVideoSurfaceFormat_YUV444) {
+          YUV444ToColor32<BGRA32>(reinterpret_cast<uint8_t *>(pp_frame[i]),
+                                  nvdec.GetWidth(),
+                                  reinterpret_cast<uint8_t *>(p_tmp_image),
+                                  4 * nvdec.GetWidth(),
+                                  nvdec.GetWidth(),
+                                  nvdec.GetHeight());
+        } else {
+          Nv12ToColor32<BGRA32>(reinterpret_cast<uint8_t *>(pp_frame[i]),
+                                nvdec.GetWidth(),
+                                reinterpret_cast<uint8_t *>(p_tmp_image),
+                                4 * nvdec.GetWidth(),
+                                nvdec.GetWidth(),
+                                nvdec.GetHeight());
+        }
+        GetImage(p_tmp_image,
+                 fm_tmp.get_frame(),
+                 nvdec.GetWidth(),
+                 4 * nvdec.GetHeight());
+      } else {
+        if (nvdec.GetOutputFormat() == cudaVideoSurfaceFormat_YUV444_16Bit) {
+          YUV444P16ToColor32<BGRA32>(reinterpret_cast<uint8_t *>(pp_frame[i]),
+                                     2 * nvdec.GetWidth(),
+                                     reinterpret_cast<uint8_t *>(p_tmp_image),
+                                     4 * nvdec.GetWidth(),
+                                     nvdec.GetWidth(),
+                                     nvdec.GetHeight());
+        } else {
+          P016ToColor32<BGRA32>(reinterpret_cast<uint8_t *>(pp_frame[i]),
+                                nvdec.GetWidth(),
+                                reinterpret_cast<uint8_t *>(p_tmp_image),
+                                4 * nvdec.GetWidth(),
+                                nvdec.GetWidth(),
+                                nvdec.GetHeight());
+        }
+        GetImage(p_tmp_image,
+                 fm_tmp.get_frame(),
+                 nvdec.GetWidth(),
+                 4 * nvdec.GetHeight());
+      }
+      /*GetImage((CUdeviceptr) pp_frame[i], reinterpret_cast<uint8_t
+         *>(fm_tmp.p_frame.get()),
+               nvdec.GetWidth(),
+               nvdec.GetHeight() + (nvdec.GetChromaHeight() *
+         nvdec.GetNumChromaPlanes()));*/
+    }
+    if (result_list.size() >= count) {
+      break;
+    }
+  } while (n_video_bytes);
+  cuMemFree(p_tmp_image);
+  return result_list;
+}
diff --git a/core/preprocess/nvdec-extractframe/src/ExtractFrameBase.cpp b/core/preprocess/nvdec-extractframe/src/ExtractFrameBase.cpp
new file mode 100644
index 00000000..b881b2eb
--- /dev/null
+++ b/core/preprocess/nvdec-extractframe/src/ExtractFrameBase.cpp
@@ -0,0 +1,61 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ExtractFrameBase.h"
+#include <cmath>
+int baidu::xvision::ExtractFrameBase::init() {
+  ck(cuInit(0));
+  int gpu_sum = 0;
+  ck(cuDeviceGetCount(&gpu_sum));
+  if (gpu_sum < 0 || gpu_sum < this->gpu_index) {
+    return -1;
+  }
+  createCudaContext(&p_cu_context, gpu_index, 0);
+  return 0;
+}
+bool baidu::xvision::ExtractFrameBase::select_frame(
+    const double frame_rate,
+    const int64_t pre_frame_time,
+    const int64_t cur_frame_time,
+    const size_t frame_index,
+    const double fps,
+    const int mode) {
+  // TODO(Zelda): select frame function flattens ffmpeg FPS filter
+  bool ret = false;
+  int gap_time = 1000 / fps;
+  int64_t pre_frame_timestamp_in_sec = pre_frame_time / gap_time;
+  int64_t cur_frame_timestamp_in_sec = cur_frame_time / gap_time;
+  int i_frame_rate = std::round(
+      frame_rate);  // frame to int, 24.9999 will be 25, 24.02 will be 24
+  int frame_gap = i_frame_rate / fps;  // frame gap, 24.99 will be 24
+  if (frame_index == 0) {
+    ret = true;
+  }
+  switch (mode) {
+    case 0:  // recommended , same as ffmpeg fps filter, round:inf
+      if (pre_frame_timestamp_in_sec != cur_frame_timestamp_in_sec) {
+        ret = true;
+      }
+      break;
+    case 1:
+      if (frame_index % frame_gap == 0) {
+        ret = true;
+      }
+      break;
+    default:
+      ret = false;
+      break;
+  }
+  return ret;
+}
diff --git a/core/preprocess/nvdec-extractframe/src/ExtractFrameJpeg.cpp b/core/preprocess/nvdec-extractframe/src/ExtractFrameJpeg.cpp
new file mode 100644
index 00000000..1a2e6d37
--- /dev/null
+++ b/core/preprocess/nvdec-extractframe/src/ExtractFrameJpeg.cpp
@@ -0,0 +1,203 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ExtractFrameJpeg.h"
+
+#include <nppi.h>
+
+#include <memory>
+#include <string>
+
+int baidu::xvision::ExtractFrameJpeg::init() {
+  auto result = ExtractFrameBase::init();
+  if (result != 0) {
+    return result;
+  }
+  auto cuda_init = cudaStreamCreate(&_cuda_stream);
+  if (cuda_init != ::cudaError::cudaSuccess) {
+    return -2;
+  }
+  _nv_jpeg_handler = nullptr;
+  _nv_enc_state = nullptr;
+  _nv_enc_params = nullptr;
+  nvjpegStatus_t stats = nvjpegCreateSimple(&_nv_jpeg_handler);
+  // nvjpeg initialization error return -2
+  if (stats != nvjpegStatus_t::NVJPEG_STATUS_SUCCESS) {
+    return -2;
+  }
+  stats = nvjpegEncoderParamsCreate(
+      _nv_jpeg_handler, &_nv_enc_params, _cuda_stream);
+  if (stats != nvjpegStatus_t::NVJPEG_STATUS_SUCCESS) {
+    return -2;
+  }
+  // high quality
+  stats = nvjpegEncoderParamsSetQuality(_nv_enc_params, 100, _cuda_stream);
+  if (stats != nvjpegStatus_t::NVJPEG_STATUS_SUCCESS) {
+    return -2;
+  }
+  stats =
+      nvjpegEncoderStateCreate(_nv_jpeg_handler, &_nv_enc_state, _cuda_stream);
+  if (stats != nvjpegStatus_t::NVJPEG_STATUS_SUCCESS) {
+    return -2;
+  }
+  stats = nvjpegEncoderParamsSetSamplingFactors(
+      _nv_enc_params, NVJPEG_CSS_444, _cuda_stream);
+  if (stats != nvjpegStatus_t::NVJPEG_STATUS_SUCCESS) {
+    return -2;
+  }
+  return 0;
+}
+baidu::xvision::IMGDataList baidu::xvision::ExtractFrameJpeg::extract_frame(
+    const std::string &file_path, int n, int count) {
+  FFmpegDemuxer demuxer(file_path.c_str());
+  NvDecoder nvdec(
+      p_cu_context, true, FFmpeg2NvCodecId(demuxer.GetVideoCodec()));
+  int n_width = demuxer.GetWidth();
+  int n_height = demuxer.GetHeight();
+  double frame_rate = demuxer.GetFrameRate();
+  int n_frame_size = n_width * n_height * 4;  // rgbp depth 3 uint_8
+  uint8_t *p_video = nullptr;
+  uint8_t **pp_frame = nullptr;
+  int64_t *p_timestamp = nullptr;
+  CUdeviceptr p_tmp_image = 0;
+  cuMemAlloc(&p_tmp_image, n_frame_size);
+  int n_video_bytes = 0;
+  int frame_count = -1;
+  int frame_returned = 0;
+  int64_t pts = 0;
+  int64_t pre_frame_time = 0;
+  int64_t cur_frame_time = 0;
+  if (n == 0) {
+    n = 1000;
+  }
+  IMGDataList result_list;
+  do {
+    demuxer.Demux(&p_video, &n_video_bytes, &pts);
+
+    nvdec.Decode(p_video,
+                 n_video_bytes,
+                 &pp_frame,
+                 &frame_returned,
+                 0,
+                 &p_timestamp,
+                 pts);
+    for (auto i = 0; i < frame_returned; ++i) {
+      cur_frame_time = p_timestamp[i];
+      frame_count += 1;
+      if (!select_frame(
+              frame_rate, pre_frame_time, cur_frame_time, frame_count, n, 0)) {
+        continue;
+      }
+      pre_frame_time = cur_frame_time;
+
+      FrameResult fm_tmp;
+      fm_tmp.set_height(nvdec.GetHeight());
+      fm_tmp.set_width(nvdec.GetWidth());
+      if (nvdec.GetBitDepth() == 8) {
+        if (nvdec.GetOutputFormat() == cudaVideoSurfaceFormat_YUV444) {
+          YUV444ToColorPlanar<RGBA32>(reinterpret_cast<uint8_t *>(pp_frame[i]),
+                                      nvdec.GetWidth(),
+                                      reinterpret_cast<uint8_t *>(p_tmp_image),
+                                      nvdec.GetWidth(),
+                                      nvdec.GetWidth(),
+                                      nvdec.GetHeight());
+        } else {
+          Nv12ToColorPlanar<RGBA32>(reinterpret_cast<uint8_t *>(pp_frame[i]),
+                                    nvdec.GetWidth(),
+                                    reinterpret_cast<uint8_t *>(p_tmp_image),
+                                    nvdec.GetWidth(),
+                                    nvdec.GetWidth(),
+                                    nvdec.GetHeight());
+        }
+      } else {
+        if (nvdec.GetOutputFormat() == cudaVideoSurfaceFormat_YUV444_16Bit)
+          YUV444P16ToColorPlanar<RGBA32>(
+              reinterpret_cast<uint8_t *>(pp_frame[i]),
+              2 * nvdec.GetWidth(),
+              reinterpret_cast<uint8_t *>(p_tmp_image),
+              nvdec.GetWidth(),
+              nvdec.GetWidth(),
+              nvdec.GetHeight());
+        else
+          P016ToColorPlanar<RGBA32>(reinterpret_cast<uint8_t *>(pp_frame[i]),
+                                    2 * nvdec.GetWidth(),
+                                    reinterpret_cast<uint8_t *>(p_tmp_image),
+                                    nvdec.GetWidth(),
+                                    nvdec.GetWidth(),
+                                    nvdec.GetHeight());
+      }
+      jpeg_encode(reinterpret_cast<uint8_t *>(p_tmp_image),
+                  nvdec.GetWidth(),
+                  nvdec.GetHeight(),
+                  fm_tmp);
+      result_list.push_back(fm_tmp);
+    }
+    if (result_list.size() >= count) {
+      break;
+    }
+  } while (n_video_bytes);
+  cuMemFree(p_tmp_image);
+  return result_list;
+}
+int baidu::xvision::ExtractFrameJpeg::jpeg_encode(uint8_t *p_image,
+                                                  int width,
+                                                  int height,
+                                                  FrameResult &result) {
+  nvjpegImage_t nv_image;
+  nv_image.channel[0] = p_image;
+  nv_image.channel[1] = p_image + width * height;
+  nv_image.channel[2] = p_image + width * height * 2;
+  nv_image.pitch[0] = width;
+  nv_image.pitch[1] = width;
+  nv_image.pitch[2] = width;
+  auto stat = nvjpegEncodeImage(_nv_jpeg_handler,
+                                _nv_enc_state,
+                                _nv_enc_params,
+                                &nv_image,
+                                NVJPEG_INPUT_RGB,
+                                width,
+                                height,
+                                _cuda_stream);
+
+  if (stat != nvjpegStatus_t::NVJPEG_STATUS_SUCCESS) {
+    return -1;
+  }
+  size_t len(0);
+  stat = nvjpegEncodeRetrieveBitstream(
+      _nv_jpeg_handler, _nv_enc_state, nullptr, &len, _cuda_stream);
+
+  auto cuda_stat = cudaStreamSynchronize(_cuda_stream);
+
+  if (cuda_stat != ::cudaSuccess) {
+    return -1;
+  }
+  result.set_frame_buffer(new uint8_t[len]);
+  result.set_width(width);
+  result.set_height(height);
+  result.set_rows(len);
+  result.set_cols(1);
+  result.set_thick(1);
+  // jpeg, rows: len, cols:1, thick:height
+  stat = nvjpegEncodeRetrieveBitstream(
+      _nv_jpeg_handler, _nv_enc_state, result.get_frame(), &len, _cuda_stream);
+
+  if (stat != nvjpegStatus_t::NVJPEG_STATUS_SUCCESS) {
+    return -1;
+  }
+  cuda_stat = cudaStreamSynchronize(_cuda_stream);
+  if (cuda_stat != ::cudaSuccess) {
+    return -1;
+  }
+  return 0;
+}
diff --git a/core/preprocess/nvdec-extractframe/src/main.cpp b/core/preprocess/nvdec-extractframe/src/main.cpp
new file mode 100644
index 00000000..a3e245e6
--- /dev/null
+++ b/core/preprocess/nvdec-extractframe/src/main.cpp
@@ -0,0 +1,98 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*******************************************
+ *
+ * Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
+ *
+ ******************************************/
+/**
+ * @file ExtractFrameBGRARaw.h
+ * @author chengang06@baidu.com
+ * @date 2020-04-16
+ **/
+#include <cstring>
+#include <iomanip>
+#include <memory>
+#include <sstream>
+#include <string>
+#include "ExtractFrameBGRARaw.h"
+#include "ExtractFrameJpeg.h"
+
+simplelogger::Logger* g_logger =
+    simplelogger::LoggerFactory::CreateConsoleLogger();
+/**
+ * @Name:
+ *     image_file_writer
+ * @Feature:
+ *     write image data to file
+ * @params
+ *     img_data: image_data
+ *     file_path: image_file stored path
+ *     prefix: image_name prefix
+ *     extension: image_file extension name
+ * @returns
+ *     void
+ **/
+void inline image_file_writer(const baidu::xvision::FrameResult& img_data,
+                              std::string file_path,
+                              std::string prefix,
+                              std::string extension = "raw") {
+  std::ofstream f_out(file_path + "/" + prefix + "." + extension,
+                      std::ios::binary | std::ios::out);
+  f_out.write(reinterpret_cast<char*>(img_data.get_frame()), img_data.len());
+  f_out.close();
+}
+bool parse_cmd_line(int argc, const char* const argv[]) {
+  if (argc <= 3) {
+    LOG(FATAL) << "params error, eg: ./hw_frame_extract /path/to/mp4.mp4 "
+                  "/output/path bgra|jpeg";
+    return false;
+  }
+  if (!strcmp(argv[3], "bgra") && !strcmp(argv[3], "jpeg")) {
+    LOG(FATAL) << "unsupported output file format";
+    return false;
+  }
+  return true;
+}
+int main(int argc, char* argv[]) {
+  if (!parse_cmd_line(argc, argv)) {
+    return -1;
+  }
+
+  baidu::xvision::ExtractFrameBase* extract_frame_handler(nullptr);
+  if (strcmp("bgra", argv[3]) == 0) {
+    extract_frame_handler = new baidu::xvision::ExtractFrameBGRARaw();
+  } else {
+    extract_frame_handler = new baidu::xvision::ExtractFrameJpeg();
+  }
+  auto init_result = extract_frame_handler->init();
+  auto result = extract_frame_handler->extract_frame(argv[1], 1, 200);
+  int frame_index = 0;
+  std::stringstream ss;
+  for (auto result_iter = result.begin(); result_iter != result.end();
+       result_iter++) {
+    ss << std::setw(5) << std::setfill('0') << frame_index;
+    image_file_writer(*result_iter,
+                      argv[2],
+                      "image_" + std::to_string(result_iter->width()) + "_" +
+                          std::to_string(result_iter->height()) + "_" +
+                          ss.str(),
+                      argv[3]);
+    result_iter->free_memory();
+    frame_index++;
+    ss.str("");
+  }
+  return 0;
+}
-- 
GitLab