提交 e1e0c466 编写于 作者: M marina.kolpakova

integrated patch: HOG confidence calculation. Thanks, Wongun.

上级 4fa282e1
......@@ -1142,6 +1142,13 @@ private:
//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
struct CV_EXPORTS HOGConfidence
{
double scale;
vector<Point> locations;
vector<double> confidences;
vector<double> part_scores[4];
};
struct CV_EXPORTS HOGDescriptor
{
......@@ -1173,6 +1180,13 @@ struct CV_EXPORTS HOGDescriptor
Size padding=Size(), double scale0=1.05,
int group_threshold=2);
void computeConfidence(const GpuMat& img, vector<Point>& hits, double hit_threshold,
Size win_stride, Size padding, vector<Point>& locations, vector<double>& confidences);
void computeConfidenceMultiScale(const GpuMat& img, vector<Rect>& found_locations,
double hit_threshold, Size win_stride, Size padding,
vector<HOGConfidence> &conf_out, int group_threshold);
void getDescriptors(const GpuMat& img, Size win_stride,
GpuMat& descriptors,
int descr_format=DESCR_FORMAT_COL_BY_COL);
......
......@@ -326,6 +326,97 @@ namespace cv { namespace gpu { namespace device
// Linear SVM based classification
//
// return confidence values not just positive location
template <int nthreads, // Number of threads per one histogram block
int nblocks> // Number of histogram block processed by single GPU thread block
__global__ void compute_confidence_hists_kernel_many_blocks(const int img_win_width, const int img_block_width,
const int win_block_stride_x, const int win_block_stride_y,
const float* block_hists, const float* coefs,
float free_coef, float threshold, float* confidences)
{
const int win_x = threadIdx.z;
if (blockIdx.x * blockDim.z + win_x >= img_win_width)
return;
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
blockIdx.x * win_block_stride_x * blockDim.z + win_x) *
cblock_hist_size;
float product = 0.f;
for (int i = threadIdx.x; i < cdescr_size; i += nthreads)
{
int offset_y = i / cdescr_width;
int offset_x = i - offset_y * cdescr_width;
product += coefs[i] * hist[offset_y * img_block_width * cblock_hist_size + offset_x];
}
__shared__ float products[nthreads * nblocks];
const int tid = threadIdx.z * nthreads + threadIdx.x;
products[tid] = product;
__syncthreads();
if (nthreads >= 512)
{
if (threadIdx.x < 256) products[tid] = product = product + products[tid + 256];
__syncthreads();
}
if (nthreads >= 256)
{
if (threadIdx.x < 128) products[tid] = product = product + products[tid + 128];
__syncthreads();
}
if (nthreads >= 128)
{
if (threadIdx.x < 64) products[tid] = product = product + products[tid + 64];
__syncthreads();
}
if (threadIdx.x < 32)
{
volatile float* smem = products;
if (nthreads >= 64) smem[tid] = product = product + smem[tid + 32];
if (nthreads >= 32) smem[tid] = product = product + smem[tid + 16];
if (nthreads >= 16) smem[tid] = product = product + smem[tid + 8];
if (nthreads >= 8) smem[tid] = product = product + smem[tid + 4];
if (nthreads >= 4) smem[tid] = product = product + smem[tid + 2];
if (nthreads >= 2) smem[tid] = product = product + smem[tid + 1];
}
if (threadIdx.x == 0)
confidences[blockIdx.y * img_win_width + blockIdx.x * blockDim.z + win_x]
= (float)(product + free_coef);
}
void compute_confidence_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
float* coefs, float free_coef, float threshold, float *confidences)
{
const int nthreads = 256;
const int nblocks = 1;
int win_block_stride_x = win_stride_x / block_stride_x;
int win_block_stride_y = win_stride_y / block_stride_y;
int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
dim3 threads(nthreads, 1, nblocks);
dim3 grid(divUp(img_win_width, nblocks), img_win_height);
cudaSafeCall(cudaFuncSetCacheConfig(compute_confidence_hists_kernel_many_blocks<nthreads, nblocks>,
cudaFuncCachePreferL1));
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
block_stride_x;
compute_confidence_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(
img_win_width, img_block_width, win_block_stride_x, win_block_stride_y,
block_hists, coefs, free_coef, threshold, confidences);
cudaSafeCall(cudaThreadSynchronize());
}
template <int nthreads, // Number of threads per one histogram block
int nblocks> // Number of histogram block processed by single GPU thread block
......
......@@ -57,6 +57,8 @@ void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat&, Size, GpuMat&, int) {
std::vector<float> cv::gpu::HOGDescriptor::getDefaultPeopleDetector() { throw_nogpu(); return std::vector<float>(); }
std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector48x96() { throw_nogpu(); return std::vector<float>(); }
std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector64x128() { throw_nogpu(); return std::vector<float>(); }
void cv::gpu::HOGDescriptor::computeConfidence(const GpuMat&, vector<Point>&, double, Size, Size, vector<Point>&, vector<double>&) { throw_nogpu(); }
void cv::gpu::HOGDescriptor::computeConfidenceMultiScale(const GpuMat&, vector<Rect>&, double, Size, Size, vector<HOGConfidence>&, int) { throw_nogpu(); }
#else
......@@ -79,6 +81,10 @@ namespace cv { namespace gpu { namespace device
int width, float* block_hists, float* coefs, float free_coef,
float threshold, unsigned char* labels);
void compute_confidence_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
float* coefs, float free_coef, float threshold, float *confidences);
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
cv::gpu::DevMem2Df descriptors);
......@@ -258,6 +264,99 @@ void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride,
}
}
void cv::gpu::HOGDescriptor::computeConfidence(const GpuMat& img, vector<Point>& hits, double hit_threshold,
Size win_stride, Size padding, vector<Point>& locations, vector<double>& confidences)
{
CV_Assert(padding == Size(0, 0));
hits.clear();
if (detector.empty())
return;
computeBlockHistograms(img);
if (win_stride == Size())
win_stride = block_stride;
else
CV_Assert(win_stride.width % block_stride.width == 0 &&
win_stride.height % block_stride.height == 0);
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
labels.create(1, wins_per_img.area(), CV_32F);
hog::compute_confidence_hists(win_size.height, win_size.width, block_stride.height, block_stride.width,
win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(),
detector.ptr<float>(), (float)free_coef, (float)hit_threshold, labels.ptr<float>());
labels.download(labels_host);
float* vec = labels_host.ptr<float>();
// does not support roi for now..
locations.clear();
confidences.clear();
for (int i = 0; i < wins_per_img.area(); i++)
{
int y = i / wins_per_img.width;
int x = i - wins_per_img.width * y;
if (vec[i] >= hit_threshold)
hits.push_back(Point(x * win_stride.width, y * win_stride.height));
Point pt(win_stride.width * x, win_stride.height * y);
locations.push_back(pt);
confidences.push_back((double)vec[i]);
}
}
void cv::gpu::HOGDescriptor::computeConfidenceMultiScale(const GpuMat& img, vector<Rect>& found_locations,
double hit_threshold, Size win_stride, Size padding,
vector<HOGConfidence> &conf_out, int group_threshold)
{
vector<double> level_scale;
double scale = 1.;
int levels = 0;
for (levels = 0; levels < conf_out.size(); levels++)
{
scale = conf_out[levels].scale;
level_scale.push_back(scale);
if (cvRound(img.cols/scale) < win_size.width ||
cvRound(img.rows/scale) < win_size.height)
break;
}
levels = std::max(levels, 1);
level_scale.resize(levels);
std::vector<Rect> all_candidates;
vector<Point> locations;
for (size_t i = 0; i < level_scale.size(); i++)
{
double scale = level_scale[i];
Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
GpuMat smaller_img;
if (sz == img.size())
smaller_img = img;
else
{
smaller_img.create(sz, img.type());
switch (img.type()) {
case CV_8UC1: hog::resize_8UC1(img, smaller_img); break;
case CV_8UC4: hog::resize_8UC4(img, smaller_img); break;
}
}
computeConfidence(smaller_img, locations, hit_threshold, win_stride, padding, conf_out[i].locations, conf_out[i].confidences);
Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale));
for (size_t j = 0; j < locations.size(); j++)
all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, scaled_win_size));
}
found_locations.assign(all_candidates.begin(), all_candidates.end());
groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/);
}
void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector<Point>& hits, double hit_threshold, Size win_stride, Size padding)
{
......
......@@ -491,6 +491,17 @@ protected:
//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
// struct for detection region of interest (ROI)
struct DetectionROI
{
// scale(size) of the bounding box
double scale;
// set of requrested locations to be evaluated
vector<cv::Point> locations;
// vector that will contain confidence values for each location
vector<double> confidences;
};
struct CV_EXPORTS_W HOGDescriptor
{
public:
......@@ -583,6 +594,23 @@ public:
CV_PROP bool gammaCorrection;
CV_PROP vector<float> svmDetector;
CV_PROP int nlevels;
// evaluate specified ROI and return confidence value for each location
virtual void detectROI(const cv::Mat& img, const vector<cv::Point> &locations,
CV_OUT std::vector<cv::Point>& foundLocations, CV_OUT std::vector<double>& confidences,
double hitThreshold = 0, cv::Size winStride = Size(),
cv::Size padding = Size()) const;
// evaluate specified ROI and return confidence value for each location in multiple scales
virtual void detectMultiScaleROI(const cv::Mat& img,
CV_OUT std::vector<cv::Rect>& foundLocations,
std::vector<DetectionROI>& locations,
double hitThreshold = 0,
int groupThreshold = 0) const;
// read/parse Dalal's alt model file
void readALTModel(std::string modelfile);
};
......
......@@ -39,7 +39,7 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <stdio.h>
#include "precomp.hpp"
#include <iterator>
#ifdef HAVE_IPP
......@@ -2382,4 +2382,238 @@ vector<float> HOGDescriptor::getDaimlerPeopleDetector()
return vector<float>(detector, detector + sizeof(detector)/sizeof(detector[0]));
}
struct HOGConfInvoker
{
HOGConfInvoker( const HOGDescriptor* _hog, const Mat& _img,
double _hitThreshold, Size _padding,
std::vector<DetectionROI>* locs,
ConcurrentRectVector* _vec )
{
hog = _hog;
img = _img;
hitThreshold = _hitThreshold;
padding = _padding;
locations = locs;
vec = _vec;
}
void operator()( const BlockedRange& range ) const
{
int i, i1 = range.begin(), i2 = range.end();
Size maxSz(cvCeil(img.cols/(*locations)[0].scale), cvCeil(img.rows/(*locations)[0].scale));
Mat smallerImgBuf(maxSz, img.type());
vector<Point> dets;
for( i = i1; i < i2; i++ )
{
double scale = (*locations)[i].scale;
Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
Mat smallerImg(sz, img.type(), smallerImgBuf.data);
if( sz == img.size() )
smallerImg = Mat(sz, img.type(), img.data, img.step);
else
resize(img, smallerImg, sz);
hog->detectROI(smallerImg, (*locations)[i].locations, dets, (*locations)[i].confidences, hitThreshold, Size(), padding);
Size scaledWinSize = Size(cvRound(hog->winSize.width*scale), cvRound(hog->winSize.height*scale));
for( size_t j = 0; j < dets.size(); j++ )
vec->push_back(Rect(cvRound(dets[j].x*scale),
cvRound(dets[j].y*scale),
scaledWinSize.width, scaledWinSize.height));
}
}
const HOGDescriptor* hog;
Mat img;
double hitThreshold;
std::vector<DetectionROI>* locations;
Size padding;
ConcurrentRectVector* vec;
};
void HOGDescriptor::detectROI(const cv::Mat& img, const vector<cv::Point> &locations,
CV_OUT std::vector<cv::Point>& foundLocations, CV_OUT std::vector<double>& confidences,
double hitThreshold, cv::Size winStride,
cv::Size padding) const
{
foundLocations.clear();
confidences.clear();
if( svmDetector.empty() )
return;
if( locations.empty() )
return;
if( winStride == Size() )
winStride = cellSize;
Size cacheStride(gcd(winStride.width, blockStride.width),
gcd(winStride.height, blockStride.height));
size_t nwindows = locations.size();
padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width);
padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);
Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2);
// HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);
HOGCache cache(this, img, padding, padding, true, cacheStride);
if( !nwindows )
nwindows = cache.windowsInImage(paddedImgSize, winStride).area();
const HOGCache::BlockData* blockData = &cache.blockData[0];
int nblocks = cache.nblocks.area();
int blockHistogramSize = cache.blockHistogramSize;
size_t dsize = getDescriptorSize();
double rho = svmDetector.size() > dsize ? svmDetector[dsize] : 0;
vector<float> blockHist(blockHistogramSize);
for( size_t i = 0; i < nwindows; i++ )
{
Point pt0;
pt0 = locations[i];
if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height )
{
// out of image
confidences.push_back(-10.0);
continue;
}
double s = rho;
const float* svmVec = &svmDetector[0];
int j, k;
for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize )
{
const HOGCache::BlockData& bj = blockData[j];
Point pt = pt0 + bj.imgOffset;
// need to devide this into 4 parts!
const float* vec = cache.getBlock(pt, &blockHist[0]);
for( k = 0; k <= blockHistogramSize - 4; k += 4 )
s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] +
vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3];
for( ; k < blockHistogramSize; k++ )
s += vec[k]*svmVec[k];
}
// cv::waitKey();
confidences.push_back(s);
if( s >= hitThreshold )
foundLocations.push_back(pt0);
}
}
void HOGDescriptor::detectMultiScaleROI(const cv::Mat& img,
CV_OUT std::vector<cv::Rect>& foundLocations,
std::vector<DetectionROI>& locations,
double hitThreshold,
int groupThreshold) const
{
ConcurrentRectVector allCandidates;
parallel_for(BlockedRange(0, (int)locations.size()),
HOGConfInvoker(this, img, hitThreshold, Size(8, 8), &locations, &allCandidates));
foundLocations.resize(allCandidates.size());
std::copy(allCandidates.begin(), allCandidates.end(), foundLocations.begin());
cv::groupRectangles(foundLocations, groupThreshold, 0.2);
}
void HOGDescriptor::readALTModel(std::string modelfile)
{
// read model from SVMlight format..
FILE *modelfl;
if ((modelfl = fopen(modelfile.c_str(), "rb")) == NULL)
{
std::string eerr("file not exist");
std::string efile(__FILE__);
std::string efunc(__FUNCTION__);
throw Exception(CV_StsError, eerr, efile, efunc, __LINE__);
}
char version_buffer[10];
if (!fread (&version_buffer,sizeof(char),10,modelfl))
{
std::string eerr("version?");
std::string efile(__FILE__);
std::string efunc(__FUNCTION__);
throw Exception(CV_StsError, eerr, efile, efunc, __LINE__);
}
if(strcmp(version_buffer,"V6.01")) {
std::string eerr("version doesnot match");
std::string efile(__FILE__);
std::string efunc(__FUNCTION__);
throw Exception(CV_StsError, eerr, efile, efunc, __LINE__);
}
/* read version number */
int version = 0;
if (!fread (&version,sizeof(int),1,modelfl))
{ throw Exception(); }
if (version < 200)
{
std::string eerr("version doesnot match");
std::string efile(__FILE__);
std::string efunc(__FUNCTION__);
throw Exception();
}
int kernel_type;
int nread;
nread=fread(&(kernel_type),sizeof(int),1,modelfl);
{// ignore these
int poly_degree;
nread=fread(&(poly_degree),sizeof(int),1,modelfl);
double rbf_gamma;
nread=fread(&(rbf_gamma),sizeof(double), 1, modelfl);
double coef_lin;
nread=fread(&(coef_lin),sizeof(double),1,modelfl);
double coef_const;
nread=fread(&(coef_const),sizeof(double),1,modelfl);
int l;
nread=fread(&l,sizeof(int),1,modelfl);
char* custom = new char[l];
nread=fread(custom,sizeof(char),l,modelfl);
delete[] custom;
}
int totwords;
nread=fread(&(totwords),sizeof(int),1,modelfl);
{// ignore these
int totdoc;
nread=fread(&(totdoc),sizeof(int),1,modelfl);
int sv_num;
nread=fread(&(sv_num), sizeof(int),1,modelfl);
}
double linearbias;
nread=fread(&linearbias, sizeof(double), 1, modelfl);
std::vector<float> detector;
detector.clear();
if(kernel_type == 0) { /* linear kernel */
/* save linear wts also */
double *linearwt = new double[totwords+1];
int length = totwords;
nread = fread(linearwt, sizeof(double), totwords + 1, modelfl);
if(nread != length + 1)
throw Exception();
for(int i = 0; i < length; i++)
detector.push_back((float)linearwt[i]);
detector.push_back((float)-linearbias);
setSVMDetector(detector);
delete linearwt;
} else {
throw Exception();
}
fclose(modelfl);
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册