提交 5929ac80 编写于 作者: B BohaoWu

Init Zelda

上级 bcc0c749
cmake_minimum_required(VERSION 3.2)
project(hw-frame-extract)
# SET(CUDA_VERSION 10.1)
#gcc version
#GCC('gcc482')
#CUDA("10.1")
set(global_cflags_str "-g -pipe -W -Wall -fPIC")
set(CMAKE_C_FLAGS ${global_cflags_str})
#C++ flags.
set(global_cxxflags_str "-g -pipe -W -Wall -fPIC -std=c++11")
set(CMAKE_CXX_FLAGS ${global_cxxflags_str})
add_subdirectory(cuda)
add_subdirectory(pybind11)
set (EXTRA_LIBS ${EXTRA_LIBS} gpu)
message(${CMAKE_CURRENT_SOURCE_DIR})
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include")
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/pybind11/include")
include_directories("/opt/compiler/cuda-10.1/include")
include_directories("/home/work/wubohao/baidu/third-party/python/include/python2.7")
file(GLOB SOURCE_FILES src/*.cpp pybind11/*.cpp )
link_directories("-L/opt/compiler/cuda-10.1/lib64 -lcudart -lnppidei_static -lnppial_static -lnpps_static -lnppc_static -lculibos")
# link_directories("/home/work/wubohao/baidu/third-party/python/lib")
#.so
add_library(gpupreprocess SHARED ${SOURCE_FILES})
target_link_libraries (gpupreprocess ${EXTRA_LIBS})
# hwvideoframe
简要说明
## 快速开始
如何构建、安装、运行
## 测试
如何执行自动化测试
## 如何贡献
贡献patch流程、质量要求
## 讨论
百度Hi讨论群:XXXX
cmake_minimum_required(VERSION 3.2)
project(gpu)
FIND_PACKAGE(CUDA ${CUDA_VERSION} REQUIRED)
SET(CUDA_TARGET_INCLUDE ${CUDA_TOOLKIT_ROOT_DIR}-${CUDA_VERSION}/targets/${CMAKE_HOST_SYSTEM_PROCESSOR}-${LOWER_SYSTEM_NAME}/include)
file(GLOB_RECURSE CURRENT_HEADERS *.h *.hpp *.cuh)
file(GLOB CURRENT_SOURCES *.cpp *.cu)
file(GLOB CUDA_LIBS /opt/compiler/cuda-10.1/lib64/*.so)
source_group("Include" FILES ${CURRENT_HEADERS})
source_group("Source" FILES ${CURRENT_SOURCES})
set(CMAKE_CUDA_FLAGS "-ccbin /opt/compiler/gcc-4.8.2/bin -Xcompiler -fPIC --std=c++11")
set(CUDA_NVCC_FLAGS "-L/opt/compiler/gcc-4.8.2/bin -Xcompiler -fPIC --std=c++11")
include_directories("/opt/compiler/cuda-10.1/include")
#cuda_add_library(gpu SHARED ${CURRENT_HEADERS} ${CURRENT_SOURCES})
cuda_add_library(gpu SHARED ${CURRENT_HEADERS} ${CURRENT_SOURCES})
target_link_libraries(gpu ${CUDA_LIBS})
# import libgpupreprocess as pp
\ No newline at end of file
#include "cuda_runtime.h"
#define clip(x, a, b) x >= a ? (x < b ? x : b-1) : a;
const int INTER_RESIZE_COEF_BITS = 11;
const int INTER_RESIZE_COEF_SCALE = 1 << INTER_RESIZE_COEF_BITS;
__global__ void resizeCudaKernel( const float* input,
float* output,
const int inputWidth,
const int inputHeight,
const int outputWidth,
const int outputHeight,
const int inputChannels)
{
//2D Index of current thread
const int dx = blockIdx.x * blockDim.x + threadIdx.x;
const int dy = blockIdx.y * blockDim.y + threadIdx.y;
if((dx < outputWidth) && (dy < outputHeight))
{
if(inputChannels == 1) { // grayscale image
// TODO: support grayscale
} else if(inputChannels == 3) { // RGB image
double scale_x = (double) inputWidth / outputWidth;
double scale_y = (double) inputHeight / outputHeight;
int xmax = outputWidth;
float fx = (float)((dx + 0.5) * scale_x - 0.5);
int sx = floorf(fx);
fx = fx - sx;
int isx1 = sx;
if (isx1 < 0) {
fx = 0.0;
isx1 = 0;
}
if (isx1 >= (inputWidth - 1)) {
xmax = ::min( xmax, dx);
fx = 0;
isx1 = inputWidth - 1;
}
float2 cbufx;
cbufx.x = (1.f - fx);
cbufx.y = fx;
float fy = (float)((dy + 0.5) * scale_y - 0.5);
int sy = floorf(fy);
fy = fy - sy;
int isy1 = clip(sy + 0, 0, inputHeight);
int isy2 = clip(sy + 1, 0, inputHeight);
float2 cbufy;
cbufy.x = (1.f - fy);
cbufy.y = fy;
int isx2 = isx1 + 1;
float3 d0;
float3 s11 = make_float3(input[(isy1 * inputWidth + isx1) * inputChannels + 0] , input[(isy1 * inputWidth + isx1) * inputChannels + 1] , input[(isy1 * inputWidth + isx1) * inputChannels + 2]);
float3 s12 = make_float3(input[(isy1 * inputWidth + isx2) * inputChannels + 0] , input[(isy1 * inputWidth + isx2) * inputChannels + 1] , input[(isy1 * inputWidth + isx2) * inputChannels + 2]);
float3 s21 = make_float3(input[(isy2 * inputWidth + isx1) * inputChannels + 0] , input[(isy2 * inputWidth + isx1) * inputChannels + 1] , input[(isy2 * inputWidth + isx1) * inputChannels + 2]);
float3 s22 = make_float3(input[(isy2 * inputWidth + isx2) * inputChannels + 0] , input[(isy2 * inputWidth + isx2) * inputChannels + 1] , input[(isy2 * inputWidth + isx2) * inputChannels + 2]);
float h_rst00, h_rst01;
// B
if( dx > xmax - 1)
{
h_rst00 = s11.x;
h_rst01 = s21.x;
}
else
{
h_rst00 = s11.x * cbufx.x + s12.x * cbufx.y;
h_rst01 = s21.x * cbufx.x + s22.x * cbufx.y;
}
d0.x = h_rst00 * cbufy.x + h_rst01 * cbufy.y;
// G
if( dx > xmax - 1)
{
h_rst00 = s11.y;
h_rst01 = s21.y;
}
else
{
h_rst00 = s11.y * cbufx.x + s12.y * cbufx.y;
h_rst01 = s21.y * cbufx.x + s22.y * cbufx.y;
}
d0.y = h_rst00 * cbufy.x + h_rst01 * cbufy.y;
// R
if( dx > xmax - 1)
{
h_rst00 = s11.z;
h_rst01 = s21.z;
}
else
{
h_rst00 = s11.z * cbufx.x + s12.z * cbufx.y;
h_rst01 = s21.z * cbufx.x + s22.z * cbufx.y;
}
d0.z = h_rst00 * cbufy.x + h_rst01 * cbufy.y;
output[(dy*outputWidth + dx) * 3 + 0 ] = (d0.x); // R
output[(dy*outputWidth + dx) * 3 + 1 ] = (d0.y); // G
output[(dy*outputWidth + dx) * 3 + 2 ] = (d0.z); // B
} else {
// TODO: support alpha channel
}
}
}
__global__ void resizeCudaKernel_fixpt( const float* input,
float* output,
const int inputWidth,
const int inputHeight,
const int outputWidth,
const int outputHeight,
const int inputChannels)
{
//2D Index of current thread
const int dx = blockIdx.x * blockDim.x + threadIdx.x;
const int dy = blockIdx.y * blockDim.y + threadIdx.y;
if((dx < outputWidth) && (dy < outputHeight))
{
if(inputChannels == 1) { // grayscale image
// TODO: support grayscale
} else if(inputChannels == 3) { // RGB image
double scale_x = (double) inputWidth / outputWidth;
double scale_y = (double) inputHeight / outputHeight;
int xmax = outputWidth;
float fx = (float)((dx + 0.5) * scale_x - 0.5);
int sx = floorf(fx);
fx = fx - sx;
int isx1 = sx;
if (isx1 < 0) {
fx = 0.0;
isx1 = 0;
}
if (isx1 >= (inputWidth - 1)) {
xmax = ::min( xmax, dx);
fx = 0;
isx1 = inputWidth - 1;
}
short2 cbufx;
cbufx.x = lrintf((1.f - fx) * INTER_RESIZE_COEF_SCALE);
cbufx.y = lrintf(fx * INTER_RESIZE_COEF_SCALE);
float fy = (float)((dy + 0.5) * scale_y - 0.5);
int sy = floorf(fy);
fy = fy - sy;
int isy1 = clip(sy + 0, 0, inputHeight);
int isy2 = clip(sy + 1, 0, inputHeight);
short2 cbufy;
cbufy.x = lrintf((1.f - fy) * INTER_RESIZE_COEF_SCALE);
cbufy.y = lrintf(fy * INTER_RESIZE_COEF_SCALE);
int isx2 = isx1 + 1;
uchar3 d0;
int3 s11 = make_int3(input[(isy1 * inputWidth + isx1) * inputChannels + 0] , input[(isy1 * inputWidth + isx1) * inputChannels + 1] , input[(isy1 * inputWidth + isx1) * inputChannels + 2]);
int3 s12 = make_int3(input[(isy1 * inputWidth + isx2) * inputChannels + 0] , input[(isy1 * inputWidth + isx2) * inputChannels + 1] , input[(isy1 * inputWidth + isx2) * inputChannels + 2]);
int3 s21 = make_int3(input[(isy2 * inputWidth + isx1) * inputChannels + 0] , input[(isy2 * inputWidth + isx1) * inputChannels + 1] , input[(isy2 * inputWidth + isx1) * inputChannels + 2]);
int3 s22 = make_int3(input[(isy2 * inputWidth + isx2) * inputChannels + 0] , input[(isy2 * inputWidth + isx2) * inputChannels + 1] , input[(isy2 * inputWidth + isx2) * inputChannels + 2]);
int h_rst00, h_rst01;
// B
if( dx > xmax - 1)
{
h_rst00 = s11.x * INTER_RESIZE_COEF_SCALE;
h_rst01 = s21.x * INTER_RESIZE_COEF_SCALE;
}
else
{
h_rst00 = s11.x * cbufx.x + s12.x * cbufx.y;
h_rst01 = s21.x * cbufx.x + s22.x * cbufx.y;
}
d0.x = (unsigned char)(( ((cbufy.x * (h_rst00 >> 4)) >> 16) + ((cbufy.y * (h_rst01 >> 4)) >> 16) + 2) >> 2);
// G
if( dx > xmax - 1)
{
h_rst00 = s11.y * INTER_RESIZE_COEF_SCALE;
h_rst01 = s21.y * INTER_RESIZE_COEF_SCALE;
}
else
{
h_rst00 = s11.y * cbufx.x + s12.y * cbufx.y;
h_rst01 = s21.y * cbufx.x + s22.y * cbufx.y;
}
d0.y = (unsigned char)(( ((cbufy.x * (h_rst00 >> 4)) >> 16) + ((cbufy.y * (h_rst01 >> 4)) >> 16) + 2) >> 2);
// R
if( dx > xmax - 1)
{
h_rst00 = s11.z * INTER_RESIZE_COEF_SCALE;
h_rst01 = s21.z * INTER_RESIZE_COEF_SCALE;
}
else
{
h_rst00 = s11.z * cbufx.x + s12.z * cbufx.y;
h_rst01 = s21.z * cbufx.x + s22.z * cbufx.y;
}
d0.z = (unsigned char)(( ((cbufy.x * (h_rst00 >> 4)) >> 16) + ((cbufy.y * (h_rst01 >> 4)) >> 16) + 2) >> 2);
output[(dy*outputWidth + dx) * 3 + 0 ] = (d0.x); // R
output[(dy*outputWidth + dx) * 3 + 1 ] = (d0.y); // G
output[(dy*outputWidth + dx) * 3 + 2 ] = (d0.z); // B
} else {
// TODO: support alpha channel
}
}
}
extern "C" cudaError_t resize_linear( const float* input,
float* output,
const int inputWidth,
const int inputHeight,
const int outputWidth,
const int outputHeight,
const int inputChannels,
const bool use_fixed_point) {
//Specify a reasonable block size
const dim3 block(16,16);
//Calculate grid size to cover the whole image
const dim3 grid((outputWidth + block.x - 1) / block.x, (outputHeight + block.y - 1) / block.y);
//Launch the size conversion kernel
if (use_fixed_point) {
resizeCudaKernel_fixpt<<<grid,block>>>(input, output, inputWidth, inputHeight, outputWidth, outputHeight, inputChannels);
} else {
resizeCudaKernel<<<grid,block>>>(input, output, inputWidth, inputHeight, outputWidth, outputHeight, inputChannels);
}
//Synchronize to check for any kernel launch errors
return cudaDeviceSynchronize();
}
\ No newline at end of file
/*******************************************
*
* Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
*
******************************************/
/**
* @file center_crop.h
* @author yinyijun@baidu.com
* @date 2020-06-15
**/
#ifndef BAIDU_CVPRE_CENTER_CROP_H
#define BAIDU_CVPRE_CENTER_CROP_H
#include <memory>
#include <npp.h>
#include "op_context.h"
// Crops the given Image at the center.
// the size must not bigger than any inputs' height and width
class CenterCrop {
public:
CenterCrop(int size) : _size(size) {};
std::shared_ptr<OpContext> operator()(std::shared_ptr<OpContext> input);
private:
int _size;
};
#endif // BAIDU_CVPRE_CENTER_CROP_H
\ No newline at end of file
/*******************************************
*
* Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
*
******************************************/
/**
* @file div.h
* @author yinyijun@baidu.com
* @date 2020-06-11
**/
#ifndef BAIDU_CVPRE_DIV_H
#define BAIDU_CVPRE_DIV_H
#include <memory>
#include <npp.h>
#include "op_context.h"
// divide by some float number for all pixel
class Div {
public:
Div(float value);
std::shared_ptr<OpContext> operator()(std::shared_ptr<OpContext> input);
private:
Npp32f _divisor;
};
#endif // BAIDU_CVPRE_DIV_H
\ No newline at end of file
/*******************************************
*
* Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
*
******************************************/
/**
* @file image_io.h
* @author yinyijun@baidu.com
* @date 2020-06-08
**/
#ifndef BAIDU_CVPRE_IMAGE_IO_H
#define BAIDU_CVPRE_IMAGE_IO_H
#include <memory>
#include <npp.h>
#include <pybind11/numpy.h>
#include "op_context.h"
// Input operator that copy numpy data to gpu buffer
class Image2Gpubuffer {
public:
std::shared_ptr<OpContext> operator()(pybind11::array_t<float> array);
};
// Output operator that copy gpu buffer data to numpy
class Gpubuffer2Image {
public:
pybind11::array_t<float> operator()(std::shared_ptr<OpContext> input);
};
#endif // BAIDU_CVPRE_IMAGE_IO_H
\ No newline at end of file
/*******************************************
*
* Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
*
******************************************/
/**
* @file normalize.h
* @author yinyijun@baidu.com
* @date 2020-06-04
**/
#ifndef BAIDU_CVPRE_NORMALIZE_H
#define BAIDU_CVPRE_NORMALIZE_H
#include <memory>
#include <vector>
#include <npp.h>
#include "op_context.h"
// utilize normalize operator on gpu
class Normalize {
public:
Normalize(const std::vector<float> &mean, const std::vector<float> &std, bool channel_first = false);
std::shared_ptr<OpContext> operator()(std::shared_ptr<OpContext> input);
private:
Npp32f _mean[CHANNEL_SIZE];
Npp32f _std[CHANNEL_SIZE];
bool _channel_first; // indicate whether the channel is dimension 0, unsupported
};
#endif // BAIDU_CVPRE_NORMALIZE_H
\ No newline at end of file
/*******************************************
*
* Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
*
******************************************/
/**
* @file op_context.h
* @author yinyijun@baidu.com
* @date 2020-06-03
**/
#ifndef BAIDU_CVPRE_OPCONTEXT_H
#define BAIDU_CVPRE_OPCONTEXT_H
#include <npp.h>
const size_t CHANNEL_SIZE = 3;
// The context as input/ouput of all operators
// contains pointer to raw data on gpu, frame size
class OpContext {
public:
OpContext() {
_step = 0;
_size = 0;
_p_frame = nullptr;
}
// constructor to apply gpu memory of image raw data
OpContext(int height, int width) {
_step = sizeof(Npp32f) * width * CHANNEL_SIZE;
_length = height * width * CHANNEL_SIZE;
_size = _step * height;
_nppi_size.height = height;
_nppi_size.width = width;
cudaMalloc((void **)(&_p_frame), _size);
}
virtual ~OpContext() {
free_memory();
}
public:
Npp32f* p_frame() const {
return _p_frame;
}
int step() const {
return _step;
}
int length() const {
return _length;
}
int size() const {
return _size;
}
NppiSize& nppi_size() {
return _nppi_size;
}
void free_memory() {
if (_p_frame != nullptr) {
cudaFree(_p_frame);
_p_frame = nullptr;
}
_nppi_size.height = 0;
_nppi_size.width = 0;
_step = 0;
_size = 0;
}
private:
Npp32f *_p_frame; // pointer to raw data on gpu
int _step; // number of bytes in a row
int _length; // length of _p_frame, _size = _length * sizeof(Npp32f)
int _size; // number of bytes of the image
NppiSize _nppi_size; // contains height and width
};
#endif // BAIDU_CVPRE_OPCONTEXT_H
\ No newline at end of file
/*******************************************
*
* Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
*
******************************************/
/**
* @file resize.h
* @author yinyijun@baidu.com
* @date 2020-06-15
**/
#ifndef BAIDU_CVPRE_RESIZE_H
#define BAIDU_CVPRE_RESIZE_H
#include <memory>
#include <vector>
#include <npp.h>
#include "op_context.h"
extern "C" cudaError_t resize_linear(const float* input,
float* output,
const int inputWidth,
const int inputHeight,
const int outputWidth,
const int outputHeight,
const int inputChannels,
const bool use_fixed_point);
// Resize the input numpy array Image to the given size.
// only support linear interpolation
// only support RGB channels
class Resize {
public:
// size is an int, smaller edge of the image will be matched to this number.
Resize(int size, int max_size = 214748364, bool use_fixed_point = false, int interpolation = 0)
: _size(size), _max_size(max_size), _use_fixed_point(use_fixed_point), _interpolation(interpolation) {};
// size is a sequence like (w, h), output size will be matched to this
Resize(std::vector<int> size, int max_size = 214748364, bool use_fixed_point = false, int interpolation = 0)
: _size(-1), _max_size(max_size), _use_fixed_point(use_fixed_point), _interpolation(interpolation) {
_target_size[0] = size[0];
_target_size[1] = size[1];
}
std::shared_ptr<OpContext> operator()(std::shared_ptr<OpContext> input);
private:
int _size; // target of smaller edge
int _target_size[2]; // target size sequence (w, h)
int _max_size;
bool _use_fixed_point;
int _interpolation; // unused
};
#endif // BAIDU_CVPRE_RESIZE_H
\ No newline at end of file
/*******************************************
*
* Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
*
******************************************/
/**
* @file resize_by_factor.h
* @author wubohao@baidu.com
* @date 2020-07-21
**/
#ifndef BAIDU_CVPRE_RESIZE_BY_FACTOR_H
#define BAIDU_CVPRE_RESIZE_BY_FACTOR_H
#include <memory>
#include <vector>
#include <npp.h>
#include "op_context.h"
extern "C" cudaError_t resize_linear(const float *input,
float *output,
const int inputWidth,
const int inputHeight,
const int outputWidth,
const int outputHeight,
const int inputChannels,
const bool use_fixed_point);
// Resize the input numpy array Image to a size multiple of factor which is usually required by a network
// only support linear interpolation
// only support RGB channels
class ResizeByFactor
{
public:
// Resize factor. make width and height multiple factor of the value of factor. Default is 32
ResizeByFactor(int factor = 32, int max_side_len = 2400, bool use_fixed_point = false, int interpolation = 0)
: _factor(factor), _max_side_len(max_side_len), _use_fixed_point(use_fixed_point), _interpolation(interpolation){};
std::shared_ptr<OpContext> operator()(std::shared_ptr<OpContext> input);
private:
int _factor; // target of smaller edge
int _max_side_len;
bool _use_fixed_point;
int _interpolation; // unused
};
#endif // BAIDU_CVPRE_RESIZE_BY_FACTOR_H
\ No newline at end of file
/*******************************************
*
* Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
*
******************************************/
/**
* @file rgb_swap.h
* @author yinyijun@baidu.com
* @date 2020-06-01
**/
#ifndef BAIDU_CVPRE_RGB_SWAP_H
#define BAIDU_CVPRE_RGB_SWAP_H
#include <memory>
#include <npp.h>
#include "op_context.h"
// swap channel 0 and channel 2 for every pixel
// both RGB2BGR and BGR2RGB use this operator
class SwapChannel {
public:
SwapChannel(){};
std::shared_ptr<OpContext> operator()(std::shared_ptr<OpContext> input);
private:
static const int _ORDER[CHANNEL_SIZE]; // describing how channel values are permutated
};
class RGB2BGR : public SwapChannel {};
class BGR2RGB : public SwapChannel {};
#endif // BAIDU_CVPRE_RGB_SWAP_H
/*******************************************
*
* Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
*
******************************************/
/**
* @file sub.h
* @author yinyijun@baidu.com
* @date 2020-07-06
**/
#ifndef BAIDU_CVPRE_SUB_H
#define BAIDU_CVPRE_SUB_H
#include <memory>
#include <vector>
#include <npp.h>
#include "op_context.h"
// subtract by some float numbers
class Sub {
public:
Sub(float subtractor);
Sub(const std::vector<float> &subtractors);
std::shared_ptr<OpContext> operator()(std::shared_ptr<OpContext> input);
private:
Npp32f _subtractors[CHANNEL_SIZE];
};
#endif // BAIDU_CVPRE_SUB_H
\ No newline at end of file
/*******************************************
*
* Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
*
******************************************/
/**
* @file rgb_swap.h
* @author yinyijun@baidu.com
* @date 2020-06-10
**/
#ifndef BAIDU_CVPRE_UTILS_H
#define BAIDU_CVPRE_UTILS_H
#include <string>
#include <npp.h>
// verify return value of npp function
// throw an exception if failed
void verify_npp_ret(const std::string& function_name, NppStatus ret);
// verify return value of cuda runtime function
// throw an exception if failed
void verify_cuda_ret(const std::string& function_name, cudaError_t ret);
#endif // BAIDU_CVPRE_UTILS_H
\ No newline at end of file
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include "div.h"
#include "sub.h"
#include "image_io.h"
#include "rgb_swap.h"
#include "normalize.h"
#include "center_crop.h"
#include "resize.h"
#include "resize_by_factor.h"
PYBIND11_MODULE(libgpupreprocess, m) {
pybind11::class_<OpContext, std::shared_ptr<OpContext>>(m, "OpContext");
pybind11::class_<Image2Gpubuffer>(m, "Image2Gpubuffer")
.def(pybind11::init<>())
.def("__call__", &Image2Gpubuffer::operator());
pybind11::class_<Gpubuffer2Image>(m, "Gpubuffer2Image")
.def(pybind11::init<>())
.def("__call__", &Gpubuffer2Image::operator());
pybind11::class_<RGB2BGR>(m, "RGB2BGR")
.def(pybind11::init<>())
.def("__call__", &RGB2BGR::operator());
pybind11::class_<BGR2RGB>(m, "BGR2RGB")
.def(pybind11::init<>())
.def("__call__", &BGR2RGB::operator());
pybind11::class_<Div>(m, "Div")
.def(pybind11::init<float>())
.def("__call__", &Div::operator());
pybind11::class_<Sub>(m, "Sub")
.def(pybind11::init<float>())
.def(pybind11::init<const std::vector<float>&>())
.def("__call__", &Sub::operator());
pybind11::class_<Normalize>(m, "Normalize")
.def(pybind11::init<const std::vector<float>&, const std::vector<float>&, bool>(),
pybind11::arg("mean"), pybind11::arg("std"), pybind11::arg("channel_first") = false)
.def("__call__", &Normalize::operator());
pybind11::class_<CenterCrop>(m, "CenterCrop")
.def(pybind11::init<int>())
.def("__call__", &CenterCrop::operator());
pybind11::class_<Resize>(m, "Resize")
.def(pybind11::init<int, int, bool>(),
pybind11::arg("size"),
pybind11::arg("max_size") = 214748364,
pybind11::arg("use_fixed_point") = false)
.def(pybind11::init<const std::vector<int>&, int, bool>(),
pybind11::arg("target_size"),
pybind11::arg("max_size") = 214748364,
pybind11::arg("use_fixed_point") = false)
.def("__call__", &Resize::operator());
pybind11::class_<ResizeByFactor>(m, "ResizeByFactor")
.def(pybind11::init<int, int, bool>(),
pybind11::arg("factor") = 32,
pybind11::arg("max_side_len") = 2400,
pybind11::arg("use_fixed_point") = false)
.def("__call__", &ResizeByFactor::operator());
}
/*******************************************
*
* Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
*
******************************************/
/**
* @file center_crop.cpp
* @author yinyijun@baidu.com
* @date 2020-06-15
**/
#include "center_crop.h"
#include <algorithm>
#include <npp.h>
#include "utils.h"
std::shared_ptr<OpContext> CenterCrop::operator()(std::shared_ptr<OpContext> input) {
int new_width = std::min(_size, input->nppi_size().width);
int new_height = std::min(_size, input->nppi_size().height);
auto output = std::make_shared<OpContext>(new_height, new_width);
int x_start = (input->nppi_size().width - new_width) / 2;
int y_start = (input->nppi_size().height - new_height) / 2;
Npp32f* p_src = input->p_frame()
+ y_start * input->nppi_size().width * CHANNEL_SIZE
+ x_start * CHANNEL_SIZE;
NppStatus ret = nppiCopy_32f_C3R(p_src,
input->step(),
output->p_frame(),
output->step(),
output->nppi_size());
verify_npp_ret("nppiCopy_32f_C3R", ret);
return output;
}
/*******************************************
*
* Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
*
******************************************/
/**
* @file div.cpp
* @author yinyijun@baidu.com
* @date 2020-06-11
**/
#include "div.h"
#include <stdexcept>
#include <npp.h>
#include "utils.h"
Div::Div(float value) {
_divisor = value;
}
std::shared_ptr<OpContext> Div::operator()(std::shared_ptr<OpContext> input) {
NppStatus ret = nppsDivC_32f_I(_divisor, input->p_frame(), input->length());
verify_npp_ret("nppsDivC_32f_I", ret);
return input;
}
/*******************************************
*
* Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
*
******************************************/
/**
* @file image_io.cpp
* @author yinyijun@baidu.com
* @date 2020-06-08
**/
#include "image_io.h"
#include <stdexcept>
#include <cuda_runtime.h>
#include <pybind11/numpy.h>
#include "utils.h"
std::shared_ptr<OpContext> Image2Gpubuffer::operator()(pybind11::array_t<float> input) {
pybind11::buffer_info buf = input.request();
if (buf.format != pybind11::format_descriptor<float>::format()) {
throw std::runtime_error("Incompatible format: expected a float numpy!");
}
if (buf.ndim != 3) {
throw std::runtime_error("Number of dimensions must be three");
}
if (buf.shape[2] != CHANNEL_SIZE) {
throw std::runtime_error("Number of channels must be three");
}
auto result = std::make_shared<OpContext>(buf.shape[0], buf.shape[1]);
auto ret = cudaMemcpy(result->p_frame(), static_cast<float*>(buf.ptr), result->size(), cudaMemcpyHostToDevice);
verify_cuda_ret("cudaMemcpy", ret);
return result;
}
pybind11::array_t<float> Gpubuffer2Image::operator()(std::shared_ptr<OpContext> input) {
auto result = pybind11::array_t<float>({ input->nppi_size().height, input->nppi_size().width, (int)CHANNEL_SIZE });
pybind11::buffer_info buf = result.request();
auto ret = cudaMemcpy(static_cast<float*>(buf.ptr), input->p_frame(), input->size(), cudaMemcpyDeviceToHost);
verify_cuda_ret("cudaMemcpy", ret);
return result;
}
/*******************************************
*
* Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
*
******************************************/
/**
* @file normalize.cpp
* @author yinyijun@baidu.com
* @date 2020-06-04
**/
#include "normalize.h"
#include <stdexcept>
#include <npp.h>
#include "utils.h"
Normalize::Normalize(const std::vector<float> &mean, const std::vector<float> &std, bool channel_first) {
if (mean.size() != CHANNEL_SIZE) {
throw std::runtime_error("size of mean must be three");
}
if (std.size() != CHANNEL_SIZE) {
throw std::runtime_error("size of std must be three");
}
for (size_t i = 0; i < CHANNEL_SIZE; i++) {
_mean[i] = mean[i];
_std[i] = std[i];
}
_channel_first = channel_first;
}
std::shared_ptr<OpContext> Normalize::operator()(std::shared_ptr<OpContext> input) {
NppStatus ret = nppiSubC_32f_C3IR(_mean, input->p_frame(), input->step(), input->nppi_size());
verify_npp_ret("nppiSubC_32f_C3IR", ret);
ret = nppiDivC_32f_C3IR(_std, input->p_frame(), input->step(), input->nppi_size());
verify_npp_ret("nppiDivC_32f_C3IR", ret);
return input;
}
/*******************************************
*
* Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
*
******************************************/
/**
* @file resize.cpp
* @author yinyijun@baidu.com
* @date 2020-06-15
**/
#include "resize.h"
#include <sstream>
#include <math.h>
#include "utils.h"
std::shared_ptr<OpContext> Resize::operator()(std::shared_ptr<OpContext> input) {
int resized_width = 0, resized_height = 0;
if (_size == -1) {
resized_width = std::min(_target_size[0], _max_size);
resized_height = std::min(_target_size[1], _max_size);
} else {
int im_max_size = std::max(input->nppi_size().height, input->nppi_size().width);
float percent = (float)_size / std::min(input->nppi_size().height, input->nppi_size().width);
if (round(percent * im_max_size) > _max_size) {
percent = float(_max_size) / float(im_max_size);
}
resized_width = int(round(input->nppi_size().width * percent));
resized_height = int(round(input->nppi_size().height * percent));
}
auto output = std::make_shared<OpContext>(resized_height, resized_width);
auto ret = resize_linear(input->p_frame(),
output->p_frame(),
input->nppi_size().width,
input->nppi_size().height,
output->nppi_size().width,
output->nppi_size().height,
CHANNEL_SIZE,
_use_fixed_point);
verify_cuda_ret("resize_linear", ret);
return output;
}
/*******************************************
*
* Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
*
******************************************/
/**
* @file resize_by_factor.cpp
* @author wubohao@baidu.com
* @date 2020-07-21
**/
#include "resize_by_factor.h"
#include <sstream>
#include <math.h>
#include "resize.h"
#include "utils.h"
std::shared_ptr<OpContext> ResizeByFactor::operator()(std::shared_ptr<OpContext> input)
{
int resized_width = input->nppi_size().width, resized_height = input->nppi_size().height;
float radio = 0;
if (std::max(resized_width, resized_height) > _max_side_len)
{
if (resized_width > resized_height)
{
radio = float(_max_side_len / resized_width);
}
else
{
radio = float(_max_side_len / resized_height);
}
}
else
{
radio = 1;
}
resized_width = int(resized_width * radio);
resized_height = int(resized_height * radio);
if (resized_height % _factor == 0)
{
resized_height = resized_height;
}
else if (floor(resized_height / _factor) <= 1)
{
resized_height = _factor;
}
else
{
resized_height = (floor(resized_height / 32) - 1) * 32;
}
if (resized_width % _factor == 0)
{
resized_width = resized_width;
}
else if (floor(resized_width / _factor) <= 1)
{
resized_width = _factor;
}
else
{
resized_width = (floor(resized_width / 32) - 1) * _factor;
}
if (int(resized_width) <= 0 || int(resized_height) <= 0)
{
return NULL;
}
auto output = std::make_shared<OpContext>(resized_height, resized_width);
auto ret = resize_linear(input->p_frame(),
output->p_frame(),
input->nppi_size().width,
input->nppi_size().height,
output->nppi_size().width,
output->nppi_size().height,
CHANNEL_SIZE,
_use_fixed_point);
verify_cuda_ret("resize_linear", ret);
return output;
}
/*******************************************
*
* Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
*
******************************************/
/**
* @file ExtractFrameJpeg.h
* @author yinyijun@baidu.com
* @date 2020-06-01
**/
#include "rgb_swap.h"
#include <npp.h>
#include "utils.h"
const int SwapChannel::_ORDER[CHANNEL_SIZE] = {2, 1, 0};
std::shared_ptr<OpContext> SwapChannel::operator()(std::shared_ptr<OpContext> input) {
NppStatus ret = nppiSwapChannels_32f_C3IR(input->p_frame(), input->step(), input->nppi_size(), _ORDER);
verify_npp_ret("nppiSwapChannels_32f_C3IR", ret);
return input;
}
/*******************************************
*
* Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
*
******************************************/
/**
* @file sub.cpp
* @author yinyijun@baidu.com
* @date 2020-06-11
**/
#include "sub.h"
#include <stdexcept>
#include <npp.h>
#include "utils.h"
Sub::Sub(float subtractor) {
for (size_t i = 0; i < CHANNEL_SIZE; i++) {
_subtractors[i] = subtractor;
}
}
Sub::Sub(const std::vector<float> &subtractors) {
if (subtractors.size() != CHANNEL_SIZE) {
throw std::runtime_error("size of subtractors must be three");
}
for (size_t i = 0; i < CHANNEL_SIZE; i++) {
_subtractors[i] = subtractors[i];
}
}
std::shared_ptr<OpContext> Sub::operator()(std::shared_ptr<OpContext> input) {
NppStatus ret = nppiSubC_32f_C3IR(_subtractors, input->p_frame(), input->step(), input->nppi_size());
verify_npp_ret("nppiSubC_32f_C3IR", ret);
return input;
}
#include <sstream>
#include <stdexcept>
#include <npp.h>
#include "utils.h"
void verify_npp_ret(const std::string& function_name, NppStatus ret) {
if (ret != NPP_SUCCESS) {
std::ostringstream ss;
ss << function_name << ", ret: " << ret;
throw std::runtime_error(ss.str());
}
}
void verify_cuda_ret(const std::string& function_name, cudaError_t ret) {
if (ret != cudaSuccess) {
std::ostringstream ss;
ss << function_name << ", ret: " << ret;
throw std::runtime_error(ss.str());
}
}
\ No newline at end of file
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from audio_reader import AudioFeatureOp
if __name__ == '__main__':
local_video_path = 'case.mp4'
test_op = AudioFeatureOp()
test_op.extract_audio_from_video(local_video_path)
\ No newline at end of file
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
hwextract lib use case
"""
import paddle_serving_app.reader.hwextract
import sys
if __name__ == '__main__':
handler = hwextract.HwExtractFrameJpeg(0)
# 0, gpu card index
# if you want BGRA Raw Data, plz use HwExtractBGRARaw
handler.init_handler()
# init once can decode many videos
video_file_name = sys.argv[1]
# for now just support h264 codec
frame_list = []
try:
frame_list = handler.extract_frame(video_file_name, 1)
# specifiy file name and fps you want to extract, 0 for all frame
except Exception as e_frame:
print("Failed to cutframe, exception[%s]" % (e_frame))
sys.exit(1)
for item in frame_list:
print "i am a item in frame_list"
# do something, for instance
# jpeg_array = np.array(item, copy=False)
# img = cv2.imdecode(item, cv2.IMREAD_COLOR)
# etc.....
item.free_memory()
# have to release memory
\ No newline at end of file
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import sys
import numpy as np
from paddle_serving_app.reader import Sequential, Resize, File2Image
import libgpupreprocess as pp
class TestOperators(unittest.TestCase):
"""
test all operators, e.g. Div, Normalize
"""
def test_div(self):
"""
test Div
"""
height = 4
width = 5
channels = 3
value = 255.0
img = np.arange(height * width * channels).reshape(
[height, width, channels])
seq = Sequential(
[pp.Image2Gpubuffer(), pp.Div(value), pp.Gpubuffer2Image()])
result = seq(img).reshape(-1)
for i in range(0, result.size):
self.assertAlmostEqual(i / value, result[i], 5)
def test_sub(self):
"""
test Sub
"""
height = 4
width = 5
channels = 3
img = np.arange(height * width * channels).reshape(
[height, width, channels])
# input size is an int
value = 10.0
seq = Sequential(
[pp.Image2Gpubuffer(), pp.Sub(value), pp.Gpubuffer2Image()])
result = seq(img).reshape(-1)
for i in range(0, result.size):
self.assertEqual(i - value, result[i])
# input size is a sequence
values = (9, 4, 2)
seq = Sequential(
[pp.Image2Gpubuffer(), pp.Sub(values), pp.Gpubuffer2Image()])
result = seq(img)
for i in range(0, result.shape[0]):
for j in range(0, result.shape[1]):
for k in range(0, result.shape[2]):
self.assertEqual(result[i][j][k], img[i][j][k] - values[k])
def test_normalize(self):
"""
test Normalize
"""
height = 4
width = 5
channels = 3
img = np.random.rand(height, width, channels)
mean = [5.0, 5.0, 5.0]
std = [2.0, 2.0, 2.0]
seq = Sequential([
pp.Image2Gpubuffer(), pp.Normalize(mean, std), pp.Gpubuffer2Image()
])
result = seq(img)
for i in range(0, height):
for j in range(0, width):
for k in range(0, channels):
self.assertAlmostEqual((img[i][j][k] - mean[k]) / std[k],
result[i][j][k], 5)
def test_center_crop(self):
"""
test CenterCrop
"""
height = 9
width = 7
channels = 3
img = np.arange(height * width * channels).reshape(
[height, width, channels])
new_size = 5
seq = Sequential([
pp.Image2Gpubuffer(), pp.CenterCrop(new_size), pp.Gpubuffer2Image()
])
result = seq(img)
self.assertEqual(result.shape[0], new_size)
self.assertEqual(result.shape[1], new_size)
self.assertEqual(result.shape[2], channels)
def test_resize(self):
"""
test Resize
"""
height = 9
width = 5
channels = 3
img = np.arange(height * width).reshape([height, width, 1]) * np.ones(
(1, channels))
# input size is an int
for new_size in [3, 10]:
seq_gpu = Sequential([
pp.Image2Gpubuffer(), pp.Resize(new_size), pp.Gpubuffer2Image()
])
seq_paddle = Sequential([Resize(new_size)])
result_gpu = seq_gpu(img)
result_paddle = seq_paddle(img)
self.assertEqual(result_gpu.shape, result_paddle.shape)
for i in range(0, result_gpu.shape[0]):
for j in range(0, result_gpu.shape[1]):
for k in range(0, result_gpu.shape[2]):
self.assertAlmostEqual(result_gpu[i][j][k],
result_paddle[i][j][k], 5)
# input size is a sequence
for new_height, new_width in [(7, 3), (15, 10)]:
seq_gpu = Sequential([
pp.Image2Gpubuffer(), pp.Resize((new_width, new_height)),
pp.Gpubuffer2Image()
])
seq_paddle = Sequential([Resize((new_width, new_height))])
result_gpu = seq_gpu(img)
result_paddle = seq_paddle(img)
self.assertEqual(result_gpu.shape, result_paddle.shape)
for i in range(0, result_gpu.shape[0]):
for j in range(0, result_gpu.shape[1]):
for k in range(0, result_gpu.shape[2]):
self.assertAlmostEqual(result_gpu[i][j][k],
result_paddle[i][j][k], 5)
def test_resize_fixed_point(self):
"""
test Resize by using fixed-point
"""
new_height = 256
new_width = 256 * 4 / 3
seq = Sequential([
File2Image(), pp.Image2Gpubuffer(), pp.Resize(
(new_width, new_height), use_fixed_point=True),
pp.Gpubuffer2Image()
])
img = seq("./capture_16.bmp")
img = np.resize(img, (new_height, new_width * 3))
img_vis = np.loadtxt("./cap_resize_16.raw")
img_resize_diff = img_vis - img
self.assertEqual(np.all(img_resize_diff == 0), True)
if __name__ == '__main__':
unittest.main()
\ No newline at end of file
......@@ -16,6 +16,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import platform
import os
from setuptools import setup, Distribution, Extension
......@@ -23,17 +24,33 @@ from setuptools import find_packages
from setuptools import setup
from paddle_serving_app.version import serving_app_version
from pkg_resources import DistributionNotFound, get_distribution
import util
max_version, mid_version, min_version = util.python_version()
def python_version():
return [int(v) for v in platform.python_version().split(".")]
def find_package(pkgname):
try:
get_distribution(pkgname)
return True
except DistributionNotFound:
return False
max_version, mid_version, min_version = python_version()
if '${PACK}' == 'ON':
copy_lib()
os.system('cp ../core/preprocess/nvdec-extractframe/libhwextract.so ./')
os.system('mv ./libhwextract.so ./paddle_serving_app/reader/hwextract.so')
os.system('cp ../core/preprocess/hwvideoframe/libgpupreprocess.so ./paddle_serving_app/reader')
os.system('mkdir ./paddle_serving_app/reader/lib')
os.system('cp ../core/preprocess/nvdec-extractframe/cuda/libhwgpu.so ./paddle_serving_app/reader/lib')
os.system('cp ../core/preprocess/hwvideoframe/cuda/libgpu.so ./paddle_serving_app/reader/lib')
os.system('export LD_LIBRARY_PATH="./paddle_serving_app/reader/lib"')
REQUIRED_PACKAGES = [
'six >= 1.10.0', 'sentencepiece', 'opencv-python<=4.2.0.32', 'pillow',
'shapely<=1.6.1', 'pyclipper'
'shapely', 'pyclipper'
]
packages=['paddle_serving_app',
......@@ -41,9 +58,11 @@ packages=['paddle_serving_app',
'paddle_serving_app.reader',
'paddle_serving_app.utils',
'paddle_serving_app.models',
'paddle_serving_app.reader.pddet']
'paddle_serving_app.reader.pddet',
'paddle_serving_app.reader.lib']
package_data={}
package_data={'paddle_serving_app': ['reader/*.so']}
# 'paddle_serving_app.reader': ['lib/*.so']}
package_dir={'paddle_serving_app':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app',
'paddle_serving_app.proto':
......@@ -55,7 +74,9 @@ package_dir={'paddle_serving_app':
'paddle_serving_app.models':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/models',
'paddle_serving_app.reader.pddet':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/reader/pddet',}
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/reader/pddet',
'paddle_serving_app.reader.lib':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/reader/lib',}
setup(
name='paddle-serving-app',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册