未验证 提交 d8a27de9 编写于 作者: B BUG1989 提交者: GitHub

add cpu affinity option (#491)

上级 0620c3b0
......@@ -62,7 +62,7 @@ option(TENGINE_OPENMP "openmp support" ON)
option(TENGINE_BUILD_BENCHMARK "build benchmark" ON)
option(TENGINE_BUILD_EXAMPLES "build examples" ON)
option(TENGINE_BUILD_TESTS "build tests" OFF)
option(TENGINE_BUILD_CPP_API "build C++ API" ON)
option(TENGINE_BUILD_CPP_API "build C++ API" OFF)
option(TENGINE_DEBUG_DATA "extract data for every layer" OFF)
option(TENGINE_DEBUG_TIME "print time information for every layer" OFF)
option(TENGINE_DEBUG_MEM_STAT "print memory status for library" OFF)
......
......@@ -33,6 +33,7 @@
#define DEFAULT_LOOP_COUNT 1
#define DEFAULT_THREAD_COUNT 1
#define DEFAULT_CLUSTER TENGINE_CLUSTER_ALL
#define DEFAULT_CPU_AFFINITY 255
int loop_counts = DEFAULT_LOOP_COUNT;
......@@ -142,9 +143,10 @@ int main(int argc, char* argv[])
int select_num = -1;
int num_threads = DEFAULT_THREAD_COUNT;
int power = DEFAULT_CLUSTER;
int affinity = DEFAULT_CPU_AFFINITY;
int res;
while ((res = getopt(argc, argv, "r:t:p:s:h")) != -1)
while ((res = getopt(argc, argv, "r:t:p:s:a:h")) != -1)
{
switch (res)
{
......@@ -160,6 +162,9 @@ int main(int argc, char* argv[])
case 's':
select_num = atoi(optarg);
break;
case 'a':
affinity = atoi(optarg);
break;
case 'h':
show_usage();
return 0;
......@@ -171,6 +176,7 @@ int main(int argc, char* argv[])
fprintf(stderr, "loop_counts = %d\n", loop_counts);
fprintf(stderr, "num_threads = %d\n", num_threads);
fprintf(stderr, "power = %d\n", power);
fprintf(stderr, "affinity = %d\n", affinity);
/* inital tengine */
if (init_tengine() != 0)
......@@ -183,6 +189,7 @@ int main(int argc, char* argv[])
struct options opt;
opt.num_thread = num_threads;
opt.precision = TENGINE_MODE_FP32;
opt.affinity = affinity;
switch (power)
{
......
......@@ -355,6 +355,7 @@ bool tengine_predict(float * input_data, graph_t graph, const int input_dims[4],
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
tensor_t input_tensor = get_graph_input_tensor(graph, 0, 0);
if (input_tensor == NULL)
......
......@@ -40,15 +40,17 @@
#define DEFAULT_MEAN3 122.679
#define DEFAULT_LOOP_COUNT 1
#define DEFAULT_THREAD_COUNT 1
#define DEFAULT_CPU_AFFINITY 255
int tengine_classify(const char* model_file, const char* image_file, int img_h, int img_w, const float* mean,
const float* scale, int loop_count, int num_thread)
const float* scale, int loop_count, int num_thread, int affinity)
{
/* set runtime options */
struct options opt;
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32;
opt.affinity = affinity;
/* inital tengine */
if (init_tengine() != 0)
......@@ -151,7 +153,7 @@ void show_usage()
fprintf(
stderr,
"[Usage]: [-h]\n [-m model_file] [-i image_file]\n [-g img_h,img_w] [-s scale[0],scale[1],scale[2]] [-w "
"mean[0],mean[1],mean[2]] [-r loop_count] [-t thread_count]\n");
"mean[0],mean[1],mean[2]] [-r loop_count] [-t thread_count] [-a cpu_affinity]\n");
fprintf(
stderr,
"\nmobilenet example: \n ./classification -m /path/to/mobilenet.tmfile -i /path/to/img.jpg -g 224,224 -s "
......@@ -162,6 +164,7 @@ int main(int argc, char* argv[])
{
int loop_count = DEFAULT_LOOP_COUNT;
int num_thread = DEFAULT_THREAD_COUNT;
int cpu_affinity = DEFAULT_CPU_AFFINITY;
char* model_file = NULL;
char* image_file = NULL;
float img_hw[2] = {0.f};
......@@ -171,7 +174,7 @@ int main(int argc, char* argv[])
float scale[3] = {0.f, 0.f, 0.f};
int res;
while ((res = getopt(argc, argv, "m:i:l:g:s:w:r:t:h")) != -1)
while ((res = getopt(argc, argv, "m:i:l:g:s:w:r:t:a:h")) != -1)
{
switch (res)
{
......@@ -198,6 +201,9 @@ int main(int argc, char* argv[])
case 't':
num_thread = atoi(optarg);
break;
case 'a':
cpu_affinity = atoi(optarg);
break;
case 'h':
show_usage();
return 0;
......@@ -252,7 +258,7 @@ int main(int argc, char* argv[])
fprintf(stderr, "Mean value not specified, use default %.1f, %.1f, %.1f\n", mean[0], mean[1], mean[2]);
}
if (tengine_classify(model_file, image_file, img_h, img_w, mean, scale, loop_count, num_thread) < 0)
if (tengine_classify(model_file, image_file, img_h, img_w, mean, scale, loop_count, num_thread, cpu_affinity) < 0)
return -1;
return 0;
......
......@@ -62,6 +62,7 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h,
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP16;
opt.affinity = 0;
/* inital tengine */
if (init_tengine() != 0)
......
......@@ -70,6 +70,7 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h,
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_INT8;
opt.affinity = 0;
/* inital tengine */
if (init_tengine() != 0)
......
......@@ -70,6 +70,7 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h,
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_UINT8;
opt.affinity = 0;
/* inital tengine */
if (init_tengine() != 0)
......
......@@ -49,6 +49,7 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h,
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */
if (init_tengine() != 0)
......
......@@ -216,6 +216,7 @@ int main(int argc, char* argv[])
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */
if (init_tengine() != 0)
......
......@@ -110,6 +110,7 @@ int main(int argc, char* argv[])
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */
init_tengine();
......
......@@ -119,6 +119,7 @@ int main(int argc, char* argv[])
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_UINT8;
opt.affinity = 0;
/* inital tengine */
init_tengine();
......
......@@ -157,6 +157,7 @@ int main(int argc, char* argv[])
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */
init_tengine();
......
......@@ -156,7 +156,8 @@ int main(int argc, char* argv[])
struct options opt;
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32;
opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */
init_tengine();
......
......@@ -173,6 +173,7 @@ int main(int argc, char* argv[])
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_UINT8;
opt.affinity = 0;
// init tengine
if (init_tengine() < 0)
......
......@@ -176,6 +176,7 @@ int main(int argc, char* argv[])
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */
init_tengine();
......
......@@ -452,7 +452,8 @@ int main(int argc, char* argv[])
struct options opt;
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32;
opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */
int ret = init_tengine();
......
......@@ -224,6 +224,7 @@ static int detect_yolact(const cv::Mat& bgr, std::vector<Object>& objects, const
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */
if (init_tengine() != 0)
......
......@@ -699,6 +699,7 @@ int main(int argc, char* argv[])
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */
if (init_tengine() != 0)
......
......@@ -713,6 +713,7 @@ int main(int argc, char* argv[])
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_UINT8;
opt.affinity = 0;
/* inital tengine */
if (init_tengine() != 0)
......
......@@ -380,7 +380,8 @@ int main(int argc, char* argv[])
struct options opt;
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32;
opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */
if (init_tengine() != 0)
......
......@@ -700,6 +700,7 @@ int main(int argc, char* argv[])
opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */
if (init_tengine() != 0)
......
......@@ -136,6 +136,7 @@ struct options
int num_thread;
int cluster;
int precision;
uint64_t affinity;
};
/* performance profiling records */
......
......@@ -37,18 +37,20 @@
* Author: lswang@openailab.com
*/
#include "cpu.h"
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include "tengine_c_api.h"
//#ifndef __ANDROID__
#ifndef _MSC_VER
#include <pthread.h>
#include <sys/syscall.h>
#include <sched.h>
#include <unistd.h>
#include <stdint.h>
//#endif
#endif
#if __APPLE__
#include "TargetConditionals.h"
......@@ -120,6 +122,7 @@ int init_cpu_count()
return core_count;
}
#ifndef _MSC_VER
static int get_max_freq_khz(int cpuid)
{
// first try, for all possible cpu
......@@ -215,24 +218,36 @@ static int set_sched_affinity(size_t thread_affinity_mask)
#define CPU_ZERO(cpusetp) memset((cpusetp), 0, sizeof(cpu_set_t))
// set affinity for thread
#if defined(__GLIBC__) || defined(__OHOS__)
#if (defined __GLIBC__) || (defined _OHOS_)
pid_t pid = syscall(SYS_gettid);
#else
#ifdef PI3
pid_t pid = getpid();
#else
#ifdef MACOS
uint64_t tid64;
pthread_threadid_np(NULL, &tid64);
pid_t pid = (pid_t)tid64;
#else
pid_t pid = gettid();
#endif
#endif
#endif
cpu_set_t mask;
CPU_ZERO(&mask);
for (int i = 0; i < ( int )sizeof(size_t) * 8; i++)
// for (int i = 0; i < ( int )sizeof(size_t) * 8; i++)
for (int i = 0; i < core_count; i++)
{
if (thread_affinity_mask & (1 << i))
CPU_SET(i, &mask);
}
#if MACOS
int syscallret = syscall(set_sched_affinity, pid, sizeof(mask), &mask);
#else
int syscallret = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask);
#endif
if (syscallret)
{
fprintf(stderr, "syscall error %d\n", syscallret);
......@@ -241,6 +256,7 @@ static int set_sched_affinity(size_t thread_affinity_mask)
return 0;
}
#endif
int init_cluster_mask()
{
......@@ -249,7 +265,7 @@ int init_cluster_mask()
affinity_mask_all_cluster = ((size_t)(1) << core_count) - 1;
//#ifdef __ANDROID__
#ifndef _MSC_VER
int max_freq_min_val = INT_MAX;
int max_freq_max_val = 0;
......@@ -285,10 +301,10 @@ int init_cluster_mask()
affinity_mask_medium_cluster |= (1 << i);
}
}
//#else
// // TODO implement me for other platforms
// affinity_mask_big_cluster = affinity_mask_all_cluster;
//#endif
#else
// TODO implement me for other platforms
affinity_mask_big_cluster = affinity_mask_all_cluster;
#endif
return 0;
}
......@@ -305,7 +321,7 @@ int get_mask_count(size_t mask)
{
int count = 0;
for (int i = 0; i < sizeof(size_t) * 8; i++)
for (int i = 0; i < core_count; i++)
if (mask & (1 << i))
count++;
......@@ -314,7 +330,7 @@ int get_mask_count(size_t mask)
int set_cpu_affine(size_t mask)
{
#ifdef __ANDROID__
#if defined __ANDROID__ || defined __linux__
int count = get_mask_count(mask);
#ifdef _OPENMP
......@@ -322,7 +338,7 @@ int set_cpu_affine(size_t mask)
omp_set_num_threads(count);
int status[sizeof(size_t) * 8] = {0};
#pragma omp parallel for num_threads(count)
for (int i = 0; i < count; i++)
{
status[i] = set_sched_affinity(mask);
......@@ -339,15 +355,15 @@ int set_cpu_affine(size_t mask)
return -1;
#endif
return 0;
#elif __APPLE_IOS__
#elif __APPLE_IOS__ || _MSC_VER
// thread affinity not supported on ios
( void )mask;
return -1;
#else
int status = set_sched_affinity(mask);
if (0 != status)
return -1;
if (0 != status) return -1;
return 0;
#endif
}
......
......@@ -1041,7 +1041,15 @@ int DLLEXPORT prerun_graph_multithread(graph_t graph, struct options opt)
}
ir_graph->status = GRAPH_STAT_READY;
set_cpu_affine(mask);
if (0 != opt.affinity && 0 != (opt.affinity & mask))
{
set_cpu_affine(opt.affinity);
}
else
{
set_cpu_affine(mask);
}
return 0;
}
......
......@@ -42,6 +42,7 @@ Net::Net()
opt.num_thread = 1;
opt.precision = TENGINE_MODE_FP32;
opt.cluster = TENGINE_CLUSTER_ALL;
opt.affinity = 0;
}
Net::~Net()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册