提交 5539e85a 编写于 作者: Y yao

use perf test replace performance sample

上级 55c9a7c8
......@@ -7,12 +7,13 @@
// copy or use the software.
//
//
// Intel License Agreement
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
......@@ -21,12 +22,12 @@
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// and/or other oclMaterials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
......@@ -41,129 +42,118 @@
#include "precomp.hpp"
#ifdef HAVE_OPENCL
using namespace std;
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
void print_info()
{
printf("\n");
#if defined _WIN32
# if defined _WIN64
puts("OS: Windows 64");
# else
puts("OS: Windows 32");
# endif
#elif defined linux
# if defined _LP64
puts("OS: Linux 64");
# else
puts("OS: Linux 32");
# endif
#elif defined __APPLE__
# if defined _LP64
puts("OS: Apple 64");
# else
puts("OS: Apple 32");
# endif
#endif
}
std::string workdir;
int main(int argc, char **argv)
int main(int argc, const char *argv[])
{
TS::ptr()->init("ocl");
InitGoogleTest(&argc, argv);
const char *keys =
"{ h | help | false | print help message }"
"{ w | workdir | ../../../samples/c/| set working directory }"
vector<ocl::Info> oclinfo;
int num_devices = getDevice(oclinfo);
"{ t | type | gpu | set device type:cpu or gpu}"
if (num_devices < 1)
{
cerr << "no device found\n";
return -1;
}
"{ p | platform | 0 | set platform id }"
int devidx = 0;
"{ d | device | 0 | set device id }";
for (size_t i = 0; i < oclinfo.size(); i++)
{
for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++)
{
printf("device %d: %s\n", devidx++, oclinfo[i].DeviceName[j].c_str());
}
}
redirectError(cvErrorCallback);
const char *keys =
"{ h | help | false | print help message }"
"{ f | filter | | filter for test }"
"{ w | workdir | | set working directory }"
"{ l | list | false | show all tests }"
"{ d | device | 0 | device id }"
"{ i | iters | 10 | iteration count }"
"{ m | warmup | 1 | gpu warm up iteration count}"
"{ t | xtop | 1.1 | xfactor top boundary}"
"{ b | xbottom | 0.9 | xfactor bottom boundary}"
"{ v | verify | false | only run gpu once to verify if problems occur}";
CommandLineParser cmd(argc, argv, keys);
if (cmd.get<bool>("help"))
{
cout << "Avaible options besides goole test option:" << endl;
cout << "Avaible options:" << endl;
cmd.printParams();
return 0;
}
workdir = cmd.get<string>("workdir");
string type = cmd.get<string>("type");
unsigned int pid = cmd.get<unsigned int>("platform");
int device = cmd.get<int>("device");
print_info();
// int flag = CVCL_DEVICE_TYPE_GPU;
// if(type == "cpu")
// {
// flag = CVCL_DEVICE_TYPE_CPU;
// }
std::vector<cv::ocl::Info> oclinfo;
int devnums = getDevice(oclinfo);
if(devnums <= device || device < 0)
if (device < 0 || device >= num_devices)
{
std::cout << "device invalid\n";
cerr << "Invalid device ID" << endl;
return -1;
}
if (cmd.get<bool>("verify"))
{
TestSystem::instance().setNumIters(1);
TestSystem::instance().setGPUWarmupIters(0);
TestSystem::instance().setCPUIters(0);
}
if(pid >= oclinfo.size())
devidx = 0;
for (size_t i = 0; i < oclinfo.size(); i++)
{
for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++, devidx++)
{
if (device == devidx)
{
ocl::setDevice(oclinfo[i], (int)j);
TestSystem::instance().setRecordName(oclinfo[i].DeviceName[j]);
printf("\nuse %d: %s\n", devidx, oclinfo[i].DeviceName[j].c_str());
goto END_DEV;
}
}
}
std::cout << "platform invalid\n";
END_DEV:
return -1;
string filter = cmd.get<string>("filter");
string workdir = cmd.get<string>("workdir");
bool list = cmd.get<bool>("list");
int iters = cmd.get<int>("iters");
int wu_iters = cmd.get<int>("warmup");
double x_top = cmd.get<double>("xtop");
double x_bottom = cmd.get<double>("xbottom");
}
TestSystem::instance().setTopThreshold(x_top);
TestSystem::instance().setBottomThreshold(x_bottom);
if(pid != 0 || device != 0)
if (!filter.empty())
{
TestSystem::instance().setTestFilter(filter);
}
if (!workdir.empty())
{
if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\')
{
workdir += '/';
}
setDevice(oclinfo[pid], device);
TestSystem::instance().setWorkingDir(workdir);
}
if (list)
{
TestSystem::instance().setListMode(true);
}
cout << "Device type:" << type << endl << "Device name:" << oclinfo[pid].DeviceName[device] << endl;
setBinpath(CLBINPATH);
return RUN_ALL_TESTS();
}
TestSystem::instance().setNumIters(iters);
TestSystem::instance().setGPUWarmupIters(wu_iters);
#else // DON'T HAVE_OPENCL
TestSystem::instance().run();
int main()
{
printf("OpenCV was built without OpenCL support\n");
return 0;
}
\ No newline at end of file
#endif // HAVE_OPENCL
此差异已折叠。
......@@ -44,79 +44,77 @@
//M*/
#include "precomp.hpp"
#include <iomanip>
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
PARAM_TEST_CASE(Blend, MatType, int)
///////////// blend ////////////////////////
template <typename T>
void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &weights1, const cv::Mat &weights2, cv::Mat &result_gold)
{
int type;
int channels;
std::vector<cv::ocl::Info> oclinfo;
result_gold.create(img1.size(), img1.type());
int cn = img1.channels();
virtual void SetUp()
for (int y = 0; y < img1.rows; ++y)
{
const float *weights1_row = weights1.ptr<float>(y);
const float *weights2_row = weights2.ptr<float>(y);
const T *img1_row = img1.ptr<T>(y);
const T *img2_row = img2.ptr<T>(y);
T *result_gold_row = result_gold.ptr<T>(y);
type = GET_PARAM(0);
channels = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
//cv::ocl::setBinpath(CLBINPATH);
for (int x = 0; x < img1.cols * cn; ++x)
{
float w1 = weights1_row[x / cn];
float w2 = weights2_row[x / cn];
result_gold_row[x] = static_cast<T>((img1_row[x] * w1 + img2_row[x] * w2) / (w1 + w2 + 1e-5f));
}
};
TEST_P(Blend, Performance)
}
}
TEST(blend)
{
cv::Size size(MWIDTH, MHEIGHT);
cv::Mat img1_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
cv::Mat img2_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
cv::ocl::oclMat gimg1(size, CV_MAKETYPE(type, channels)), gimg2(size, CV_MAKETYPE(type, channels)), gweights1(size, CV_32F), gweights2(size, CV_32F);
cv::ocl::oclMat gdst(size, CV_MAKETYPE(type, channels));
Mat src1, src2, weights1, weights2, dst;
ocl::oclMat d_src1, d_src2, d_weights1, d_weights2, d_dst;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
double totalgputick_all = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for (int j = 0; j < LOOP_TIMES + 1; j ++) //LOOP_TIMES=100
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
t1 = (double)cvGetTickCount();
cv::ocl::oclMat gimg1 = cv::ocl::oclMat(img1_host);
cv::ocl::oclMat gimg2 = cv::ocl::oclMat(img2_host);
cv::ocl::oclMat gweights1 = cv::ocl::oclMat(weights1);
cv::ocl::oclMat gweights2 = cv::ocl::oclMat(weights1);
t2 = (double)cvGetTickCount();
cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, gdst);
t2 = (double)cvGetTickCount() - t2;
cv::Mat m;
gdst.download(m);
t1 = (double)cvGetTickCount() - t1;
if (j == 0)
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
continue;
SUBTEST << size << 'x' << size << "; " << type_name[j] << " and CV_32FC1";
gen(src1, size, size, all_type[j], 0, 256);
gen(src2, size, size, all_type[j], 0, 256);
gen(weights1, size, size, CV_32FC1, 0, 1);
gen(weights2, size, size, CV_32FC1, 0, 1);
blendLinearGold<uchar>(src1, src2, weights1, weights2, dst);
CPU_ON;
blendLinearGold<uchar>(src1, src2, weights1, weights2, dst);
CPU_OFF;
d_src1.upload(src1);
d_src2.upload(src2);
d_weights1.upload(weights1);
d_weights2.upload(weights2);
WARMUP_ON;
ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
;
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
d_weights1.upload(weights1);
d_weights2.upload(weights2);
ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
}
}
totalgputick_all = t1 + totalgputick_all;
totalgputick_kernel = t2 + totalgputick_kernel;
};
cout << "average gpu total runtime is " << totalgputick_all / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfering is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
\ No newline at end of file
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine(
Values(CV_8U, CV_32F), Values(1, 4)));
#endif
\ No newline at end of file
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
//////////////////// BruteForceMatch /////////////////
TEST(BruteForceMatcher)
{
Mat trainIdx_cpu;
Mat distance_cpu;
Mat allDist_cpu;
Mat nMatches_cpu;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
// Init CPU matcher
int desc_len = 64;
BFMatcher matcher(NORM_L2);
Mat query;
gen(query, size, desc_len, CV_32F, 0, 1);
Mat train;
gen(train, size, desc_len, CV_32F, 0, 1);
// Output
vector< vector<DMatch> > matches(2);
// Init GPU matcher
ocl::BruteForceMatcher_OCL_base d_matcher(ocl::BruteForceMatcher_OCL_base::L2Dist);
ocl::oclMat d_query(query);
ocl::oclMat d_train(train);
ocl::oclMat d_trainIdx, d_distance, d_allDist, d_nMatches;
SUBTEST << size << "; match";
matcher.match(query, train, matches[0]);
CPU_ON;
matcher.match(query, train, matches[0]);
CPU_OFF;
WARMUP_ON;
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
WARMUP_OFF;
GPU_ON;
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
;
GPU_OFF;
GPU_FULL_ON;
d_query.upload(query);
d_train.upload(train);
d_matcher.match(d_query, d_train, matches[0]);
GPU_FULL_OFF;
SUBTEST << size << "; knnMatch";
matcher.knnMatch(query, train, matches, 2);
CPU_ON;
matcher.knnMatch(query, train, matches, 2);
CPU_OFF;
WARMUP_ON;
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
WARMUP_OFF;
GPU_ON;
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
;
GPU_OFF;
GPU_FULL_ON;
d_query.upload(query);
d_train.upload(train);
d_matcher.knnMatch(d_query, d_train, matches, 2);
GPU_FULL_OFF;
SUBTEST << size << "; radiusMatch";
float max_distance = 2.0f;
matcher.radiusMatch(query, train, matches, max_distance);
CPU_ON;
matcher.radiusMatch(query, train, matches, max_distance);
CPU_OFF;
d_trainIdx.release();
WARMUP_ON;
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
WARMUP_OFF;
GPU_ON;
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
;
GPU_OFF;
GPU_FULL_ON;
d_query.upload(query);
d_train.upload(train);
d_matcher.radiusMatch(d_query, d_train, matches, max_distance);
GPU_FULL_OFF;
}
}
\ No newline at end of file
......@@ -42,112 +42,42 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iomanip>
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
#ifndef MWC_TEST_UTILITY
#define MWC_TEST_UTILITY
// Param class
#ifndef IMPLEMENT_PARAM_CLASS
#define IMPLEMENT_PARAM_CLASS(name, type) \
class name \
{ \
public: \
name ( type arg = type ()) : val_(arg) {} \
operator type () const {return val_;} \
private: \
type val_; \
}; \
inline void PrintTo( name param, std::ostream* os) \
{ \
*os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \
}
IMPLEMENT_PARAM_CLASS(Channels, int)
#endif // IMPLEMENT_PARAM_CLASS
#endif // MWC_TEST_UTILITY
////////////////////////////////////////////////////////
// Canny1
extern std::string workdir;
IMPLEMENT_PARAM_CLASS(AppertureSize, int);
IMPLEMENT_PARAM_CLASS(L2gradient, bool);
PARAM_TEST_CASE(Canny1, AppertureSize, L2gradient)
///////////// Canny ////////////////////////
TEST(Canny)
{
int apperture_size;
bool useL2gradient;
//std::vector<cv::ocl::Info> oclinfo;
Mat img = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE);
virtual void SetUp()
if (img.empty())
{
apperture_size = GET_PARAM(0);
useL2gradient = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
throw runtime_error("can't open aloeL.jpg");
}
};
TEST_P(Canny1, Performance)
{
cv::Mat img = readImage(workdir + "fruits.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
double low_thresh = 100.0;
double high_thresh = 150.0;
cv::Mat edges_gold;
cv::ocl::oclMat edges;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);//upload
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
edges.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
SUBTEST << img.cols << 'x' << img.rows << "; aloeL.jpg" << "; edges" << "; CV_8UC1";
if(j == 0)
continue;
Mat edges(img.size(), CV_8UC1);
totalgputick = t1 + totalgputick;
CPU_ON;
Canny(img, edges, 50.0, 100.0);
CPU_OFF;
totalgputick_kernel = t2 + totalgputick_kernel;
ocl::oclMat d_img(img);
ocl::oclMat d_edges;
ocl::CannyBuf d_buf;
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
WARMUP_ON;
ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
WARMUP_OFF;
GPU_ON;
ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
;
GPU_OFF;
GPU_FULL_ON;
d_img.upload(img);
ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
d_edges.download(edges);
GPU_FULL_OFF;
}
\ No newline at end of file
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny1, testing::Combine(
testing::Values(AppertureSize(3), AppertureSize(5)),
testing::Values(L2gradient(false), L2gradient(true))));
#endif //Have opencl
\ No newline at end of file
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
///////////// cvtColor////////////////////////
TEST(cvtColor)
{
Mat src, dst;
ocl::oclMat d_src, d_dst;
int all_type[] = {CV_8UC4};
std::string type_name[] = {"CV_8UC4"};
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
gen(src, size, size, all_type[j], 0, 256);
SUBTEST << size << "x" << size << "; " << type_name[j] << " ; CV_RGBA2GRAY";
cvtColor(src, dst, CV_RGBA2GRAY, 4);
CPU_ON;
cvtColor(src, dst, CV_RGBA2GRAY, 4);
CPU_OFF;
d_src.upload(src);
WARMUP_ON;
ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
WARMUP_OFF;
GPU_ON;
ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
d_dst.download(dst);
GPU_FULL_OFF;
}
}
}
\ No newline at end of file
......@@ -15,8 +15,7 @@
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai fangfang@multicorewareinc.com
//
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
......@@ -31,7 +30,7 @@
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
......@@ -43,78 +42,47 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iomanip>
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
///////////////////////////////////////////////////////////////////////////////
/// ColumnSum
#ifdef HAVE_OPENCL
////////////////////////////////////////////////////////////////////////
// ColumnSum
PARAM_TEST_CASE(ColumnSum)
///////////// columnSum////////////////////////
TEST(columnSum)
{
cv::Mat src;
//std::vector<cv::ocl::Info> oclinfo;
Mat src, dst;
ocl::oclMat d_src, d_dst;
virtual void SetUp()
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
}
};
SUBTEST << size << 'x' << size << "; CV_32FC1";
TEST_F(ColumnSum, Performance)
{
cv::Size size(MWIDTH, MHEIGHT);
cv::Mat src = randomMat(size, CV_32FC1);
cv::ocl::oclMat d_dst;
gen(src, size, size, CV_32FC1, 0, 256);
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
CPU_ON;
dst.create(src.size(), src.type());
for(int j = 0; j < LOOP_TIMES + 1; j ++)
for (int i = 1; i < src.rows; ++i)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat d_src(src);
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::columnSum(d_src, d_dst);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
d_dst.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
for (int j = 0; j < src.cols; ++j)
{
dst.at<float>(i, j) = src.at<float>(i, j) += src.at<float>(i - 1, j);
}
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
CPU_OFF;
d_src.upload(src);
WARMUP_ON;
ocl::columnSum(d_src, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::columnSum(d_src, d_dst);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::columnSum(d_src, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
}
}
\ No newline at end of file
#endif
\ No newline at end of file
......@@ -15,7 +15,7 @@
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfangbai, fangfang@multicorewareinc.com
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
......@@ -42,85 +42,48 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace std;
#ifdef HAVE_CLAMDFFT
////////////////////////////////////////////////////////////////////////////
// Dft
PARAM_TEST_CASE(Dft, cv::Size, bool)
{
cv::Size dft_size;
bool dft_rows;
vector<cv::ocl::Info> info;
virtual void SetUp()
{
dft_size = GET_PARAM(0);
dft_rows = GET_PARAM(1);
cv::ocl::getDevice(info);
}
};
TEST_P(Dft, C2C)
///////////// dft ////////////////////////
TEST(dft)
{
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
int flags = 0;
flags |= dft_rows ? cv::DFT_ROWS : 0;
cv::ocl::oclMat d_b;
Mat src, dst;
ocl::oclMat d_src, d_dst;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
int all_type[] = {CV_32FC1, CV_32FC2};
std::string type_name[] = {"CV_32FC1", "CV_32FC2"};
for(int j = 0; j < LOOP_TIMES + 1; j ++)
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; complex-to-complex";
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ga = cv::ocl::oclMat(a); //upload
gen(src, size, size, all_type[j], Scalar::all(0), Scalar::all(1));
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::dft(ga, d_b, a.size(), flags);
t2 = (double)cvGetTickCount() - t2;//kernel
dft(src, dst);
cv::Mat cpu_dst;
d_b.download (cpu_dst);//download
CPU_ON;
dft(src, dst);
CPU_OFF;
t1 = (double)cvGetTickCount() - t1;//gpu end1
d_src.upload(src);
if(j == 0)
continue;
WARMUP_ON;
ocl::dft(d_src, d_dst, Size(size, size));
WARMUP_OFF;
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
GPU_ON;
ocl::dft(d_src, d_dst, Size(size, size));
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::dft(d_src, d_dst, Size(size, size));
d_dst.download(dst);
GPU_FULL_OFF;
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
TEST_P(Dft, R2CthenC2R)
{
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
int flags = 0;
//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
cv::ocl::oclMat d_b, d_c;
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
}
}
\ No newline at end of file
//INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
// testing::Values(cv::Size(1280, 1024), cv::Size(1920, 1080),cv::Size(1800, 1500)),
// testing::Values(false, true)));
#endif // HAVE_CLAMDFFT
此差异已折叠。
......@@ -16,6 +16,7 @@
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
......@@ -41,73 +42,47 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace std;
#ifdef HAVE_CLAMDBLAS
////////////////////////////////////////////////////////////////////////////
// GEMM
PARAM_TEST_CASE(Gemm, int, cv::Size, int)
{
int type;
cv::Size mat_size;
int flags;
vector<cv::ocl::Info> info;
virtual void SetUp()
{
type = GET_PARAM(0);
mat_size = GET_PARAM(1);
flags = GET_PARAM(2);
cv::ocl::getDevice(info);
}
};
TEST_P(Gemm, Performance)
///////////// gemm ////////////////////////
TEST(gemm)
{
cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
cv::ocl::oclMat ocl_dst;
Mat src1, src2, src3, dst;
ocl::oclMat d_src1, d_src2, d_src3, d_dst;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload
cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload
cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::gemm(ga, gb, 1.0, gc, 1.0, ocl_dst, flags);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
ocl_dst.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end
if(j == 0)
continue;
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
SUBTEST << size << 'x' << size;
gen(src1, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
gen(src2, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
gen(src3, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
gemm(src1, src2, 1.0, src3, 1.0, dst);
CPU_ON;
gemm(src1, src2, 1.0, src3, 1.0, dst);
CPU_OFF;
d_src1.upload(src1);
d_src2.upload(src2);
d_src3.upload(src3);
WARMUP_ON;
ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
WARMUP_OFF;
GPU_ON;
ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
;
GPU_OFF;
GPU_FULL_ON;
d_src1.upload(src1);
d_src2.upload(src2);
d_src3.upload(src3);
ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
\ No newline at end of file
INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/),
testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
testing::Values(0, (int)cv::GEMM_1_T, (int)cv::GEMM_2_T, (int)(cv::GEMM_1_T + cv::GEMM_2_T))));
#endif
\ No newline at end of file
......@@ -10,12 +10,12 @@
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
......@@ -30,7 +30,7 @@
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
......@@ -42,133 +42,97 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/objdetect/objdetect.hpp"
#include "precomp.hpp"
#ifdef HAVE_OPENCL
///////////// Haar ////////////////////////
namespace cv
{
namespace ocl
{
using namespace cvtest;
using namespace testing;
using namespace std;
using namespace cv;
extern std::string workdir;
struct getRect
{
Rect operator ()(const CvAvgComp &e) const
Rect operator()(const CvAvgComp &e) const
{
return e.rect;
}
};
PARAM_TEST_CASE(HaarTestBase, int, int)
class CascadeClassifier_GPU : public OclCascadeClassifier
{
//std::vector<cv::ocl::Info> oclinfo;
cv::ocl::OclCascadeClassifier cascade, nestedCascade;
cv::CascadeClassifier cpucascade, cpunestedCascade;
// Mat img;
double scale;
int index;
virtual void SetUp()
{
scale = 1.0;
index = 0;
string cascadeName = "../../../data/haarcascades/haarcascade_frontalface_alt.xml";
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
public:
void detectMultiScale(oclMat &image,
CV_OUT std::vector<cv::Rect>& faces,
double scaleFactor = 1.1,
int minNeighbors = 3, int flags = 0,
Size minSize = Size(),
Size maxSize = Size())
{
cout << "ERROR: Could not load classifier cascade" << endl;
return;
}
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums>0);
////if you want to use undefault device, set it here
////setDevice(oclinfo[0]);
//cv::ocl::setBinpath("E:\\");
(void)maxSize;
MemStorage storage(cvCreateMemStorage(0));
//CvMat img=image;
CvSeq *objs = oclHaarDetectObjects(image, storage, scaleFactor, minNeighbors, flags, minSize);
vector<CvAvgComp> vecAvgComp;
Seq<CvAvgComp>(objs).copyTo(vecAvgComp);
faces.resize(vecAvgComp.size());
std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect());
}
};
////////////////////////////////faceDetect/////////////////////////////////////////////////
struct Haar : HaarTestBase {};
};
TEST_F(Haar, FaceDetect)
}
}
TEST(Haar)
{
string imgName = workdir + "lena.jpg";
Mat img = imread( imgName, 1 );
Mat img = imread(abspath("basketball1.png"), CV_LOAD_IMAGE_GRAYSCALE);
if(img.empty())
if (img.empty())
{
std::cout << imgName << std::endl;
return ;
throw runtime_error("can't open basketball1.png");
}
//int i = 0;
double t = 0;
vector<Rect> faces, oclfaces;
// const static Scalar colors[] = { CV_RGB(0, 0, 255),
// CV_RGB(0, 128, 255),
// CV_RGB(0, 255, 255),
// CV_RGB(0, 255, 0),
// CV_RGB(255, 128, 0),
// CV_RGB(255, 255, 0),
// CV_RGB(255, 0, 0),
// CV_RGB(255, 0, 255)
// } ;
Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
MemStorage storage(cvCreateMemStorage(0));
cvtColor( img, gray, CV_BGR2GRAY );
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
equalizeHist( smallImg, smallImg );
CascadeClassifier faceCascadeCPU;
t = (double)cvGetTickCount();
for(int k = 0; k < LOOP_TIMES; k++)
if (!faceCascadeCPU.load(abspath("haarcascade_frontalface_alt.xml")))
{
cpucascade.detectMultiScale( smallImg, faces, 1.1,
3, 0
| CV_HAAR_SCALE_IMAGE
, Size(30, 30), Size(0, 0) );
throw runtime_error("can't load haarcascade_frontalface_alt.xml");
}
t = (double)cvGetTickCount() - t ;
printf( "cpudetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) );
cv::ocl::oclMat image;
CvSeq *_objects=NULL;
t = (double)cvGetTickCount();
for(int k = 0; k < LOOP_TIMES; k++)
vector<Rect> faces;
SUBTEST << img.cols << "x" << img.rows << "; scale image";
CPU_ON;
faceCascadeCPU.detectMultiScale(img, faces,
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
CPU_OFF;
ocl::CascadeClassifier_GPU faceCascade;
if (!faceCascade.load(abspath("haarcascade_frontalface_alt.xml")))
{
image.upload(smallImg);
_objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
3, 0
| CV_HAAR_SCALE_IMAGE
, Size(30, 30), Size(0, 0) );
throw runtime_error("can't load haarcascade_frontalface_alt.xml");
}
t = (double)cvGetTickCount() - t ;
printf( "ocldetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) );
vector<CvAvgComp> vecAvgComp;
Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
oclfaces.resize(vecAvgComp.size());
std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
//for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
//{
// Mat smallImgROI;
// Point center;
// Scalar color = colors[i%8];
// int radius;
// center.x = cvRound((r->x + r->width*0.5)*scale);
// center.y = cvRound((r->y + r->height*0.5)*scale);
// radius = cvRound((r->width + r->height)*0.25*scale);
// circle( img, center, radius, color, 3, 8, 0 );
//}
//namedWindow("result");
//imshow("result",img);
//waitKey(0);
//destroyAllWindows();
ocl::oclMat d_img(img);
faces.clear();
WARMUP_ON;
faceCascade.detectMultiScale(d_img, faces,
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
WARMUP_OFF;
faces.clear();
GPU_ON;
faceCascade.detectMultiScale(d_img, faces,
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
;
GPU_OFF;
GPU_FULL_ON;
d_img.upload(img);
faceCascade.detectMultiScale(d_img, faces,
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
GPU_FULL_OFF;
}
\ No newline at end of file
#endif // HAVE_OPENCL
......@@ -15,7 +15,7 @@
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
......@@ -42,125 +42,47 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iomanip>
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
extern std::string workdir;
#ifndef MWC_TEST_UTILITY
#define MWC_TEST_UTILITY
// Param class
#ifndef IMPLEMENT_PARAM_CLASS
#define IMPLEMENT_PARAM_CLASS(name, type) \
class name \
{ \
public: \
name ( type arg = type ()) : val_(arg) {} \
operator type () const {return val_;} \
private: \
type val_; \
}; \
inline void PrintTo( name param, std::ostream* os) \
{ \
*os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \
}
#endif // IMPLEMENT_PARAM_CLASS
#endif // MWC_TEST_UTILITY
IMPLEMENT_PARAM_CLASS(WinSizw48, bool);
PARAM_TEST_CASE(HOG, WinSizw48, bool)
{
bool is48;
vector<float> detector;
virtual void SetUp()
{
is48 = GET_PARAM(0);
if(is48)
{
detector = cv::ocl::HOGDescriptor::getPeopleDetector48x96();
}
else
{
detector = cv::ocl::HOGDescriptor::getPeopleDetector64x128();
}
}
};
TEST_P(HOG, Performance)
///////////// HOG////////////////////////
TEST(HOG)
{
cv::Mat img = readImage(workdir + "lena.jpg", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
// define HOG related arguments
float scale = 1.05f;
//int nlevels = 13;
int gr_threshold = 8;
float hit_threshold = 1.4f;
//bool hit_threshold_auto = true;
int win_width = is48 ? 48 : 64;
int win_stride_width = 8;
int win_stride_height = 8;
bool gamma_corr = true;
Size win_size(win_width, win_width * 2); //(64, 128) or (48, 96)
Size win_stride(win_stride_width, win_stride_height);
cv::ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
cv::ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
cv::ocl::HOGDescriptor::DEFAULT_NLEVELS);
gpu_hog.setSVMDetector(detector);
double totalgputick = 0;
double totalgputick_kernel = 0;
Mat src = imread(abspath("road.png"), cv::IMREAD_GRAYSCALE);
double t1 = 0;
double t2 = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
if (src.empty())
{
t1 = (double)cvGetTickCount();//gpu start1
ocl::oclMat d_src(img);//upload
t2 = (double)cvGetTickCount(); //kernel
throw runtime_error("can't open road.png");
}
vector<Rect> found;
gpu_hog.detectMultiScale(d_src, found, hit_threshold, win_stride,
Size(0, 0), scale, gr_threshold);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::HOGDescriptor hog;
hog.setSVMDetector(hog.getDefaultPeopleDetector());
std::vector<cv::Rect> found_locations;
// no download time for HOG
SUBTEST << 768 << 'x' << 576 << "; road.png";
t1 = (double)cvGetTickCount() - t1;//gpu end1
hog.detectMultiScale(src, found_locations);
if(j == 0)
continue;
CPU_ON;
hog.detectMultiScale(src, found_locations);
CPU_OFF;
totalgputick = t1 + totalgputick;
cv::ocl::HOGDescriptor ocl_hog;
ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector());
ocl::oclMat d_src;
d_src.upload(src);
totalgputick_kernel = t2 + totalgputick_kernel;
WARMUP_ON;
ocl_hog.detectMultiScale(d_src, found_locations);
WARMUP_OFF;
}
GPU_ON;
ocl_hog.detectMultiScale(d_src, found_locations);
;
GPU_OFF;
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
GPU_FULL_ON;
d_src.upload(src);
ocl_hog.detectMultiScale(d_src, found_locations);
GPU_FULL_OFF;
}
\ No newline at end of file
INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, HOG, testing::Combine(testing::Values(WinSizw48(false), WinSizw48(true)), testing::Values(false)));
#endif //Have opencl
\ No newline at end of file
此差异已折叠。
......@@ -42,191 +42,105 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iomanip>
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
#ifndef MWC_TEST_UTILITY
#define MWC_TEST_UTILITY
//////// Utility
#ifndef DIFFERENT_SIZES
#else
#undef DIFFERENT_SIZES
#endif
#define DIFFERENT_SIZES testing::Values(cv::Size(256, 256), cv::Size(3000, 3000))
// Param class
#ifndef IMPLEMENT_PARAM_CLASS
#define IMPLEMENT_PARAM_CLASS(name, type) \
class name \
{ \
public: \
name ( type arg = type ()) : val_(arg) {} \
operator type () const {return val_;} \
private: \
type val_; \
}; \
inline void PrintTo( name param, std::ostream* os) \
{ \
*os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \
}
IMPLEMENT_PARAM_CLASS(Channels, int)
#endif // IMPLEMENT_PARAM_CLASS
#endif // MWC_TEST_UTILITY
////////////////////////////////////////////////////////////////////////////////
// MatchTemplate
#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF_NORMED))
IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size);
const char *TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"};
PARAM_TEST_CASE(MatchTemplate, cv::Size, TemplateSize, Channels, TemplateMethod)
{
cv::Size size;
cv::Size templ_size;
int cn;
int method;
//vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
size = GET_PARAM(0);
templ_size = GET_PARAM(1);
cn = GET_PARAM(2);
method = GET_PARAM(3);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
}
};
struct MatchTemplate8U : MatchTemplate {};
TEST_P(MatchTemplate8U, Performance)
/////////// matchTemplate ////////////////////////
//void InitMatchTemplate()
//{
// Mat src; gen(src, 500, 500, CV_32F, 0, 1);
// Mat templ; gen(templ, 500, 500, CV_32F, 0, 1);
// ocl::oclMat d_src(src), d_templ(templ), d_dst;
// ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
//}
TEST(matchTemplate)
{
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl;
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl;
std::cout << "Channels: " << cn << std::endl;
cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn));
cv::Mat dst_gold;
cv::ocl::oclMat dst;
//InitMatchTemplate();
Mat src, templ, dst;
int templ_size = 5;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for(int j = 0; j < LOOP_TIMES + 1; j ++)
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
int all_type[] = {CV_32FC1, CV_32FC4};
std::string type_name[] = {"CV_32FC1", "CV_32FC4"};
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload
cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
dst.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end1
if(j == 0)
continue;
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
for(templ_size = 5; templ_size <= 5; templ_size *= 5)
{
gen(src, size, size, all_type[j], 0, 1);
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
SUBTEST << src.cols << 'x' << src.rows << "; " << type_name[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR";
gen(templ, templ_size, templ_size, all_type[j], 0, 1);
}
matchTemplate(src, templ, dst, CV_TM_CCORR);
CPU_ON;
matchTemplate(src, templ, dst, CV_TM_CCORR);
CPU_OFF;
struct MatchTemplate32F : MatchTemplate {};
TEST_P(MatchTemplate32F, Performance)
{
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl;
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl;
std::cout << "Channels: " << cn << std::endl;
cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn));
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn));
ocl::oclMat d_src(src), d_templ, d_dst;
cv::Mat dst_gold;
cv::ocl::oclMat dst;
d_templ.upload(templ);
WARMUP_ON;
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
WARMUP_OFF;
GPU_ON;
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_templ.upload(templ);
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
d_dst.download(dst);
GPU_FULL_OFF;
}
}
double totalgputick = 0;
double totalgputick_kernel = 0;
int all_type_8U[] = {CV_8UC1};
std::string type_name_8U[] = {"CV_8UC1"};
double t1 = 0;
double t2 = 0;
for(int j = 0; j < LOOP_TIMES; j ++)
for (size_t j = 0; j < sizeof(all_type_8U) / sizeof(int); j++)
{
for(templ_size = 5; templ_size <= 5; templ_size *= 5)
{
SUBTEST << src.cols << 'x' << src.rows << "; " << type_name_8U[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR_NORMED";
t1 = (double)cvGetTickCount();//gpu start1
gen(src, size, size, all_type_8U[j], 0, 255);
cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload
cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload
gen(templ, templ_size, templ_size, all_type_8U[j], 0, 255);
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
t2 = (double)cvGetTickCount() - t2;//kernel
matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED);
cv::Mat cpu_dst;
dst.download (cpu_dst);//download
CPU_ON;
matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED);
CPU_OFF;
t1 = (double)cvGetTickCount() - t1;//gpu end1
ocl::oclMat d_src(src);
ocl::oclMat d_templ(templ), d_dst;
totalgputick = t1 + totalgputick;
WARMUP_ON;
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
WARMUP_OFF;
totalgputick_kernel = t2 + totalgputick_kernel;
GPU_ON;
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
d_templ.upload(templ);
ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
d_dst.download(dst);
GPU_FULL_OFF;
}
}
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
\ No newline at end of file
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
testing::Combine(
testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT), cv::Size(1800, 1500)),
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
testing::Values(Channels(1), Channels(4)/*, Channels(3)*/),
ALL_TEMPLATE_METHODS
)
);
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT), cv::Size(1800, 1500)),
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
testing::Values(Channels(1), Channels(4) /*, Channels(3)*/),
testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
#endif //HAVE_OPENCL
\ No newline at end of file
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
///////////// norm////////////////////////
TEST(norm)
{
Mat src, buf;
ocl::oclMat d_src, d_buf;
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
SUBTEST << size << 'x' << size << "; CV_8UC1; NORM_INF";
gen(src, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
gen(buf, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
norm(src, NORM_INF);
CPU_ON;
norm(src, NORM_INF);
CPU_OFF;
d_src.upload(src);
d_buf.upload(buf);
WARMUP_ON;
ocl::norm(d_src, d_buf, NORM_INF);
WARMUP_OFF;
GPU_ON;
ocl::norm(d_src, d_buf, NORM_INF);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::norm(d_src, d_buf, NORM_INF);
GPU_FULL_OFF;
}
}
\ No newline at end of file
///////////////////////////////////////////////////////////////////////////////////////
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
......@@ -15,7 +15,7 @@
// Third party copyrights are property of their respective owners.
//
// @Authors
// fangfang bai, fangfang@multicorewareinc.com
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
......@@ -30,7 +30,7 @@
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
......@@ -42,96 +42,46 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iomanip>
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
PARAM_TEST_CASE(PyrDown, MatType, int)
///////////// pyrDown //////////////////////
TEST(pyrDown)
{
int type;
int channels;
//src mat
cv::Mat mat1;
cv::Mat dst;
//std::vector<cv::ocl::Info> oclinfo;
//ocl dst mat for testing
Mat src, dst;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
cv::ocl::oclMat gmat1;
cv::ocl::oclMat gdst;
virtual void SetUp()
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
{
type = GET_PARAM(0);
channels = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
}
};
#define VARNAME(A) string(#A);
////////////////////////////////PyrDown/////////////////////////////////////////////////
TEST_P(PyrDown, Mat)
{
cv::Size size(MWIDTH, MHEIGHT);
cv::RNG &rng = TS::ptr()->get_rng();
mat1 = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
cv::ocl::oclMat gdst;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for (int j = 0; j < LOOP_TIMES + 1; j ++)
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat gmat1(mat1);
gen(src, size, size, all_type[j], 0, 256);
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::pyrDown(gmat1, gdst);
t2 = (double)cvGetTickCount() - t2;//kernel
pyrDown(src, dst);
cv::Mat cpu_dst;
gdst.download(cpu_dst);
CPU_ON;
pyrDown(src, dst);
CPU_OFF;
t1 = (double)cvGetTickCount() - t1;//gpu end1
if (j == 0)
{
continue;
}
ocl::oclMat d_src(src);
ocl::oclMat d_dst;
totalgputick = t1 + totalgputick;
WARMUP_ON;
ocl::pyrDown(d_src, d_dst);
WARMUP_OFF;
totalgputick_kernel = t2 + totalgputick_kernel;
GPU_ON;
ocl::pyrDown(d_src, d_dst);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::pyrDown(d_src, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
}
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
\ No newline at end of file
//********test****************
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, Combine(
Values(CV_8U, CV_32F), Values(1, 4)));
#endif // HAVE_OPENCL
......@@ -7,12 +7,16 @@
// copy or use the software.
//
//
// Intel License Agreement
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
......@@ -21,12 +25,12 @@
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// and/or other oclMaterials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
......@@ -38,83 +42,102 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#ifndef __OPENCV_TEST_INTERPOLATION_HPP__
#define __OPENCV_TEST_INTERPOLATION_HPP__
template <typename T> T readVal(const cv::Mat &src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
///////////// PyrLKOpticalFlow ////////////////////////
TEST(PyrLKOpticalFlow)
{
if (border_type == cv::BORDER_CONSTANT)
return (y >= 0 && y < src.rows && x >= 0 && x < src.cols) ? src.at<T>(y, x * src.channels() + c) : cv::saturate_cast<T>(borderVal.val[c]);
return src.at<T>(cv::borderInterpolate(y, src.rows, border_type), cv::borderInterpolate(x, src.cols, border_type) * src.channels() + c);
}
std::string images1[] = {"rubberwhale1.png", "aloeL.jpg"};
std::string images2[] = {"rubberwhale2.png", "aloeR.jpg"};
template <typename T> struct NearestInterpolator
{
static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
for (size_t i = 0; i < sizeof(images1) / sizeof(std::string); i++)
{
return readVal<T>(src, cvFloor(y), cvFloor(x), c, border_type, borderVal);
}
};
Mat frame0 = imread(abspath(images1[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE);
template <typename T> struct LinearInterpolator
{
static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
if (frame0.empty())
{
x -= 0.5f;
y -= 0.5f;
int x1 = cvFloor(x);
int y1 = cvFloor(y);
int x2 = x1 + 1;
int y2 = y1 + 1;
float res = 0;
std::string errstr = "can't open " + images1[i];
throw runtime_error(errstr);
}
res += readVal<T>(src, y1, x1, c, border_type, borderVal) * ((x2 - x) * (y2 - y));
res += readVal<T>(src, y1, x2, c, border_type, borderVal) * ((x - x1) * (y2 - y));
res += readVal<T>(src, y2, x1, c, border_type, borderVal) * ((x2 - x) * (y - y1));
res += readVal<T>(src, y2, x2, c, border_type, borderVal) * ((x - x1) * (y - y1));
Mat frame1 = imread(abspath(images2[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE);
return cv::saturate_cast<T>(res);
if (frame1.empty())
{
std::string errstr = "can't open " + images2[i];
throw runtime_error(errstr);
}
};
template <typename T> struct CubicInterpolator
{
static float getValue(float p[4], float x)
Mat gray_frame;
if (i == 0)
{
return p[1] + 0.5 * x * (p[2] - p[0] + x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] + x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
cvtColor(frame0, gray_frame, COLOR_BGR2GRAY);
}
static float getValue(float p[4][4], float x, float y)
for (int points = Min_Size; points <= Max_Size; points *= Multiple)
{
float arr[4];
if (i == 0)
SUBTEST << frame0.cols << "x" << frame0.rows << "; color; " << points << " points";
else
SUBTEST << frame0.cols << "x" << frame0.rows << "; gray; " << points << " points";
Mat nextPts_cpu;
Mat status_cpu;
arr[0] = getValue(p[0], x);
arr[1] = getValue(p[1], x);
arr[2] = getValue(p[2], x);
arr[3] = getValue(p[3], x);
vector<Point2f> pts;
goodFeaturesToTrack(i == 0 ? gray_frame : frame0, pts, points, 0.01, 0.0);
return getValue(arr, y);
}
vector<Point2f> nextPts;
vector<unsigned char> status;
static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
vector<float> err;
calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
CPU_ON;
calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
CPU_OFF;
ocl::PyrLKOpticalFlow d_pyrLK;
ocl::oclMat d_frame0(frame0);
ocl::oclMat d_frame1(frame1);
ocl::oclMat d_pts;
Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void *)&pts[0]);
d_pts.upload(pts_mat);
ocl::oclMat d_nextPts;
ocl::oclMat d_status;
ocl::oclMat d_err;
WARMUP_ON;
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
WARMUP_OFF;
GPU_ON;
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
;
GPU_OFF;
GPU_FULL_ON;
d_frame0.upload(frame0);
d_frame1.upload(frame1);
d_pts.upload(pts_mat);
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
if (!d_nextPts.empty())
{
int ix = cvRound(x);
int iy = cvRound(y);
d_nextPts.download(nextPts_cpu);
}
float vals[4][4] =
if (!d_status.empty())
{
{readVal<T>(src, iy - 2, ix - 2, c, border_type, borderVal), readVal<T>(src, iy - 2, ix - 1, c, border_type, borderVal), readVal<T>(src, iy - 2, ix, c, border_type, borderVal), readVal<T>(src, iy - 2, ix + 1, c, border_type, borderVal)},
{readVal<T>(src, iy - 1, ix - 2, c, border_type, borderVal), readVal<T>(src, iy - 1, ix - 1, c, border_type, borderVal), readVal<T>(src, iy - 1, ix, c, border_type, borderVal), readVal<T>(src, iy - 1, ix + 1, c, border_type, borderVal)},
{readVal<T>(src, iy , ix - 2, c, border_type, borderVal), readVal<T>(src, iy , ix - 1, c, border_type, borderVal), readVal<T>(src, iy , ix, c, border_type, borderVal), readVal<T>(src, iy , ix + 1, c, border_type, borderVal)},
{readVal<T>(src, iy + 1, ix - 2, c, border_type, borderVal), readVal<T>(src, iy + 1, ix - 1, c, border_type, borderVal), readVal<T>(src, iy + 1, ix, c, border_type, borderVal), readVal<T>(src, iy + 1, ix + 1, c, border_type, borderVal)},
};
d_status.download(status_cpu);
}
return cv::saturate_cast<T>(getValue(vals, (x - ix + 2.0) / 4.0, (y - iy + 2.0) / 4.0));
GPU_FULL_OFF;
}
};
#endif // __OPENCV_TEST_INTERPOLATION_HPP__
}
}
......@@ -15,7 +15,7 @@
// Third party copyrights are property of their respective owners.
//
// @Authors
// fangfang bai fangfang@multicorewareinc.com
// Fangfang Bai, fangfang@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
......@@ -30,7 +30,7 @@
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
......@@ -42,81 +42,46 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/core/core.hpp"
#include "precomp.hpp"
#include <iomanip>
#ifdef HAVE_OPENCL
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
PARAM_TEST_CASE(PyrUp, MatType, int)
///////////// pyrUp ////////////////////////
TEST(pyrUp)
{
int type;
int channels;
//std::vector<cv::ocl::Info> oclinfo;
Mat src, dst;
int all_type[] = {CV_8UC1, CV_8UC4};
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
virtual void SetUp()
for (int size = 500; size <= 2000; size *= 2)
{
type = GET_PARAM(0);
channels = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
}
};
TEST_P(PyrUp, Performance)
{
cv::Size size(MWIDTH, MHEIGHT);
cv::Mat src = randomMat(size, CV_MAKETYPE(type, channels));
cv::Mat dst_gold;
cv::ocl::oclMat dst;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for (int j = 0; j < LOOP_TIMES + 1; j ++)
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
{
t1 = (double)cvGetTickCount();//gpu start1
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
cv::ocl::oclMat srcMat = cv::ocl::oclMat(src);//upload
gen(src, size, size, all_type[j], 0, 256);
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::pyrUp(srcMat, dst);
t2 = (double)cvGetTickCount() - t2;//kernel
pyrUp(src, dst);
cv::Mat cpu_dst;
dst.download(cpu_dst); //download
CPU_ON;
pyrUp(src, dst);
CPU_OFF;
t1 = (double)cvGetTickCount() - t1;//gpu end1
ocl::oclMat d_src(src);
ocl::oclMat d_dst;
if (j == 0)
{
continue;
}
totalgputick = t1 + totalgputick;
WARMUP_ON;
ocl::pyrUp(d_src, d_dst);
WARMUP_OFF;
totalgputick_kernel = t2 + totalgputick_kernel;
GPU_ON;
ocl::pyrUp(d_src, d_dst);
;
GPU_OFF;
GPU_FULL_ON;
d_src.upload(src);
ocl::pyrUp(d_src, d_dst);
d_dst.download(dst);
GPU_FULL_OFF;
}
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
\ No newline at end of file
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, Combine(
Values(CV_8U, CV_32F), Values(1, 4)));
#endif // HAVE_OPENCL
\ No newline at end of file
......@@ -7,12 +7,13 @@
// copy or use the software.
//
//
// Intel License Agreement
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
......@@ -21,12 +22,12 @@
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// and/or other oclMaterials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
......@@ -41,4 +42,321 @@
#include "precomp.hpp"
// This program test most of the functions in ocl module and generate data metrix of x-factor in .csv files
// All images needed in this test are in samples/gpu folder.
// For haar template, haarcascade_frontalface_alt.xml shouold be in working directory
void TestSystem::run()
{
if (is_list_mode_)
{
for (vector<Runnable *>::iterator it = tests_.begin(); it != tests_.end(); ++it)
{
cout << (*it)->name() << endl;
}
return;
}
// Run test initializers
for (vector<Runnable *>::iterator it = inits_.begin(); it != inits_.end(); ++it)
{
if ((*it)->name().find(test_filter_, 0) != string::npos)
{
(*it)->run();
}
}
printHeading();
writeHeading();
// Run tests
for (vector<Runnable *>::iterator it = tests_.begin(); it != tests_.end(); ++it)
{
try
{
if ((*it)->name().find(test_filter_, 0) != string::npos)
{
cout << endl << (*it)->name() << ":\n";
setCurrentTest((*it)->name());
//fprintf(record_,"%s\n",(*it)->name().c_str());
(*it)->run();
finishCurrentSubtest();
}
}
catch (const Exception &)
{
// Message is printed via callback
resetCurrentSubtest();
}
catch (const runtime_error &e)
{
printError(e.what());
resetCurrentSubtest();
}
}
printSummary();
writeSummary();
}
void TestSystem::finishCurrentSubtest()
{
if (cur_subtest_is_empty_)
// There is no need to print subtest statistics
{
return;
}
double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
double gpu_full_time = gpu_full_elapsed_ / getTickFrequency() * 1000.0;
double speedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_elapsed_);
speedup_total_ += speedup;
double fullspeedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_full_elapsed_);
speedup_full_total_ += fullspeedup;
if (speedup > top_)
{
speedup_faster_count_++;
}
else if (speedup < bottom_)
{
speedup_slower_count_++;
}
else
{
speedup_equal_count_++;
}
if (fullspeedup > top_)
{
speedup_full_faster_count_++;
}
else if (fullspeedup < bottom_)
{
speedup_full_slower_count_++;
}
else
{
speedup_full_equal_count_++;
}
// compute min, max and
std::sort(gpu_times_.begin(), gpu_times_.end());
double gpu_min = gpu_times_.front() / getTickFrequency() * 1000.0;
double gpu_max = gpu_times_.back() / getTickFrequency() * 1000.0;
double deviation = 0;
if (gpu_times_.size() > 1)
{
double sum = 0;
for (size_t i = 0; i < gpu_times_.size(); i++)
{
int64 diff = gpu_times_[i] - static_cast<int64>(gpu_elapsed_);
double diff_time = diff * 1000 / getTickFrequency();
sum += diff_time * diff_time;
}
deviation = std::sqrt(sum / gpu_times_.size());
}
printMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup);
writeMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation);
num_subtests_called_++;
resetCurrentSubtest();
}
double TestSystem::meanTime(const vector<int64> &samples)
{
double sum = accumulate(samples.begin(), samples.end(), 0.);
return sum / samples.size();
}
void TestSystem::printHeading()
{
cout << endl;
cout << setiosflags(ios_base::left);
cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms"
<< setw(14) << "SPEEDUP" << setw(14) << "GPUTOTAL, ms" << setw(14) << "TOTALSPEEDUP"
<< "DESCRIPTION\n";
cout << resetiosflags(ios_base::left);
}
void TestSystem::writeHeading()
{
if (!record_)
{
recordname_ += "_OCL.csv";
record_ = fopen(recordname_.c_str(), "w");
}
fprintf(record_, "NAME,DESCRIPTION,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n");
fflush(record_);
}
void TestSystem::printSummary()
{
cout << setiosflags(ios_base::fixed);
cout << "\naverage GPU speedup: x"
<< setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_)
<< endl;
cout << "\nGPU exceeded: "
<< setprecision(3) << speedup_faster_count_
<< "\nGPU passed: "
<< setprecision(3) << speedup_equal_count_
<< "\nGPU failed: "
<< setprecision(3) << speedup_slower_count_
<< endl;
cout << "\nGPU exceeded rate: "
<< setprecision(3) << (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100
<< "%"
<< "\nGPU passed rate: "
<< setprecision(3) << (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100
<< "%"
<< "\nGPU failed rate: "
<< setprecision(3) << (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100
<< "%"
<< endl;
cout << "\naverage GPUTOTAL speedup: x"
<< setprecision(3) << speedup_full_total_ / std::max(1, num_subtests_called_)
<< endl;
cout << "\nGPUTOTAL exceeded: "
<< setprecision(3) << speedup_full_faster_count_
<< "\nGPUTOTAL passed: "
<< setprecision(3) << speedup_full_equal_count_
<< "\nGPUTOTAL failed: "
<< setprecision(3) << speedup_full_slower_count_
<< endl;
cout << "\nGPUTOTAL exceeded rate: "
<< setprecision(3) << (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100
<< "%"
<< "\nGPUTOTAL passed rate: "
<< setprecision(3) << (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100
<< "%"
<< "\nGPUTOTAL failed rate: "
<< setprecision(3) << (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
<< "%"
<< endl;
cout << resetiosflags(ios_base::fixed);
}
void TestSystem::printMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup)
{
cout << TAB << setiosflags(ios_base::left);
stringstream stream;
stream << cpu_time;
cout << setw(10) << stream.str();
stream.str("");
stream << gpu_time;
cout << setw(10) << stream.str();
stream.str("");
stream << "x" << setprecision(3) << speedup;
cout << setw(14) << stream.str();
stream.str("");
stream << gpu_full_time;
cout << setw(14) << stream.str();
stream.str("");
stream << "x" << setprecision(3) << fullspeedup;
cout << setw(14) << stream.str();
cout << cur_subtest_description_.str();
cout << resetiosflags(ios_base::left) << endl;
}
void TestSystem::writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev)
{
if (!record_)
{
recordname_ += ".csv";
record_ = fopen(recordname_.c_str(), "w");
}
fprintf(record_, "%s,%s,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n", itname_changed_ ? itname_.c_str() : "",
cur_subtest_description_.str().c_str(),
cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup,
gpu_min, gpu_max, std_dev);
if (itname_changed_)
{
itname_changed_ = false;
}
fflush(record_);
}
void TestSystem::writeSummary()
{
if (!record_)
{
recordname_ += ".csv";
record_ = fopen(recordname_.c_str(), "w");
}
fprintf(record_, "\nAverage GPU speedup: %.3f\n"
"exceeded: %d (%.3f%%)\n"
"passed: %d (%.3f%%)\n"
"failed: %d (%.3f%%)\n"
"\nAverage GPUTOTAL speedup: %.3f\n"
"exceeded: %d (%.3f%%)\n"
"passed: %d (%.3f%%)\n"
"failed: %d (%.3f%%)\n",
speedup_total_ / std::max(1, num_subtests_called_),
speedup_faster_count_, (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100,
speedup_equal_count_, (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100,
speedup_slower_count_, (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100,
speedup_full_total_ / std::max(1, num_subtests_called_),
speedup_full_faster_count_, (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100,
speedup_full_equal_count_, (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100,
speedup_full_slower_count_, (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
);
fflush(record_);
}
void TestSystem::printError(const std::string &msg)
{
if(msg != "CL_INVALID_BUFFER_SIZE")
{
cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl;
}
}
void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high)
{
mat.create(rows, cols, type);
RNG rng(0);
rng.fill(mat, RNG::UNIFORM, low, high);
}
string abspath(const string &relpath)
{
return TestSystem::instance().workingDir() + relpath;
}
int CV_CDECL cvErrorCallback(int /*status*/, const char * /*func_name*/,
const char *err_msg, const char * /*file_name*/,
int /*line*/, void * /*userdata*/)
{
TestSystem::instance().printError(err_msg);
return 0;
}
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册