未验证 提交 d8a27de9 编写于 作者: B BUG1989 提交者: GitHub

add cpu affinity option (#491)

上级 0620c3b0
...@@ -62,7 +62,7 @@ option(TENGINE_OPENMP "openmp support" ON) ...@@ -62,7 +62,7 @@ option(TENGINE_OPENMP "openmp support" ON)
option(TENGINE_BUILD_BENCHMARK "build benchmark" ON) option(TENGINE_BUILD_BENCHMARK "build benchmark" ON)
option(TENGINE_BUILD_EXAMPLES "build examples" ON) option(TENGINE_BUILD_EXAMPLES "build examples" ON)
option(TENGINE_BUILD_TESTS "build tests" OFF) option(TENGINE_BUILD_TESTS "build tests" OFF)
option(TENGINE_BUILD_CPP_API "build C++ API" ON) option(TENGINE_BUILD_CPP_API "build C++ API" OFF)
option(TENGINE_DEBUG_DATA "extract data for every layer" OFF) option(TENGINE_DEBUG_DATA "extract data for every layer" OFF)
option(TENGINE_DEBUG_TIME "print time information for every layer" OFF) option(TENGINE_DEBUG_TIME "print time information for every layer" OFF)
option(TENGINE_DEBUG_MEM_STAT "print memory status for library" OFF) option(TENGINE_DEBUG_MEM_STAT "print memory status for library" OFF)
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#define DEFAULT_LOOP_COUNT 1 #define DEFAULT_LOOP_COUNT 1
#define DEFAULT_THREAD_COUNT 1 #define DEFAULT_THREAD_COUNT 1
#define DEFAULT_CLUSTER TENGINE_CLUSTER_ALL #define DEFAULT_CLUSTER TENGINE_CLUSTER_ALL
#define DEFAULT_CPU_AFFINITY 255
int loop_counts = DEFAULT_LOOP_COUNT; int loop_counts = DEFAULT_LOOP_COUNT;
...@@ -142,9 +143,10 @@ int main(int argc, char* argv[]) ...@@ -142,9 +143,10 @@ int main(int argc, char* argv[])
int select_num = -1; int select_num = -1;
int num_threads = DEFAULT_THREAD_COUNT; int num_threads = DEFAULT_THREAD_COUNT;
int power = DEFAULT_CLUSTER; int power = DEFAULT_CLUSTER;
int affinity = DEFAULT_CPU_AFFINITY;
int res; int res;
while ((res = getopt(argc, argv, "r:t:p:s:h")) != -1) while ((res = getopt(argc, argv, "r:t:p:s:a:h")) != -1)
{ {
switch (res) switch (res)
{ {
...@@ -160,6 +162,9 @@ int main(int argc, char* argv[]) ...@@ -160,6 +162,9 @@ int main(int argc, char* argv[])
case 's': case 's':
select_num = atoi(optarg); select_num = atoi(optarg);
break; break;
case 'a':
affinity = atoi(optarg);
break;
case 'h': case 'h':
show_usage(); show_usage();
return 0; return 0;
...@@ -171,6 +176,7 @@ int main(int argc, char* argv[]) ...@@ -171,6 +176,7 @@ int main(int argc, char* argv[])
fprintf(stderr, "loop_counts = %d\n", loop_counts); fprintf(stderr, "loop_counts = %d\n", loop_counts);
fprintf(stderr, "num_threads = %d\n", num_threads); fprintf(stderr, "num_threads = %d\n", num_threads);
fprintf(stderr, "power = %d\n", power); fprintf(stderr, "power = %d\n", power);
fprintf(stderr, "affinity = %d\n", affinity);
/* inital tengine */ /* inital tengine */
if (init_tengine() != 0) if (init_tengine() != 0)
...@@ -183,6 +189,7 @@ int main(int argc, char* argv[]) ...@@ -183,6 +189,7 @@ int main(int argc, char* argv[])
struct options opt; struct options opt;
opt.num_thread = num_threads; opt.num_thread = num_threads;
opt.precision = TENGINE_MODE_FP32; opt.precision = TENGINE_MODE_FP32;
opt.affinity = affinity;
switch (power) switch (power)
{ {
......
...@@ -355,6 +355,7 @@ bool tengine_predict(float * input_data, graph_t graph, const int input_dims[4], ...@@ -355,6 +355,7 @@ bool tengine_predict(float * input_data, graph_t graph, const int input_dims[4],
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32; opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
tensor_t input_tensor = get_graph_input_tensor(graph, 0, 0); tensor_t input_tensor = get_graph_input_tensor(graph, 0, 0);
if (input_tensor == NULL) if (input_tensor == NULL)
......
...@@ -40,15 +40,17 @@ ...@@ -40,15 +40,17 @@
#define DEFAULT_MEAN3 122.679 #define DEFAULT_MEAN3 122.679
#define DEFAULT_LOOP_COUNT 1 #define DEFAULT_LOOP_COUNT 1
#define DEFAULT_THREAD_COUNT 1 #define DEFAULT_THREAD_COUNT 1
#define DEFAULT_CPU_AFFINITY 255
int tengine_classify(const char* model_file, const char* image_file, int img_h, int img_w, const float* mean, int tengine_classify(const char* model_file, const char* image_file, int img_h, int img_w, const float* mean,
const float* scale, int loop_count, int num_thread) const float* scale, int loop_count, int num_thread, int affinity)
{ {
/* set runtime options */ /* set runtime options */
struct options opt; struct options opt;
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32; opt.precision = TENGINE_MODE_FP32;
opt.affinity = affinity;
/* inital tengine */ /* inital tengine */
if (init_tengine() != 0) if (init_tengine() != 0)
...@@ -151,7 +153,7 @@ void show_usage() ...@@ -151,7 +153,7 @@ void show_usage()
fprintf( fprintf(
stderr, stderr,
"[Usage]: [-h]\n [-m model_file] [-i image_file]\n [-g img_h,img_w] [-s scale[0],scale[1],scale[2]] [-w " "[Usage]: [-h]\n [-m model_file] [-i image_file]\n [-g img_h,img_w] [-s scale[0],scale[1],scale[2]] [-w "
"mean[0],mean[1],mean[2]] [-r loop_count] [-t thread_count]\n"); "mean[0],mean[1],mean[2]] [-r loop_count] [-t thread_count] [-a cpu_affinity]\n");
fprintf( fprintf(
stderr, stderr,
"\nmobilenet example: \n ./classification -m /path/to/mobilenet.tmfile -i /path/to/img.jpg -g 224,224 -s " "\nmobilenet example: \n ./classification -m /path/to/mobilenet.tmfile -i /path/to/img.jpg -g 224,224 -s "
...@@ -162,6 +164,7 @@ int main(int argc, char* argv[]) ...@@ -162,6 +164,7 @@ int main(int argc, char* argv[])
{ {
int loop_count = DEFAULT_LOOP_COUNT; int loop_count = DEFAULT_LOOP_COUNT;
int num_thread = DEFAULT_THREAD_COUNT; int num_thread = DEFAULT_THREAD_COUNT;
int cpu_affinity = DEFAULT_CPU_AFFINITY;
char* model_file = NULL; char* model_file = NULL;
char* image_file = NULL; char* image_file = NULL;
float img_hw[2] = {0.f}; float img_hw[2] = {0.f};
...@@ -171,7 +174,7 @@ int main(int argc, char* argv[]) ...@@ -171,7 +174,7 @@ int main(int argc, char* argv[])
float scale[3] = {0.f, 0.f, 0.f}; float scale[3] = {0.f, 0.f, 0.f};
int res; int res;
while ((res = getopt(argc, argv, "m:i:l:g:s:w:r:t:h")) != -1) while ((res = getopt(argc, argv, "m:i:l:g:s:w:r:t:a:h")) != -1)
{ {
switch (res) switch (res)
{ {
...@@ -198,6 +201,9 @@ int main(int argc, char* argv[]) ...@@ -198,6 +201,9 @@ int main(int argc, char* argv[])
case 't': case 't':
num_thread = atoi(optarg); num_thread = atoi(optarg);
break; break;
case 'a':
cpu_affinity = atoi(optarg);
break;
case 'h': case 'h':
show_usage(); show_usage();
return 0; return 0;
...@@ -252,7 +258,7 @@ int main(int argc, char* argv[]) ...@@ -252,7 +258,7 @@ int main(int argc, char* argv[])
fprintf(stderr, "Mean value not specified, use default %.1f, %.1f, %.1f\n", mean[0], mean[1], mean[2]); fprintf(stderr, "Mean value not specified, use default %.1f, %.1f, %.1f\n", mean[0], mean[1], mean[2]);
} }
if (tengine_classify(model_file, image_file, img_h, img_w, mean, scale, loop_count, num_thread) < 0) if (tengine_classify(model_file, image_file, img_h, img_w, mean, scale, loop_count, num_thread, cpu_affinity) < 0)
return -1; return -1;
return 0; return 0;
......
...@@ -62,6 +62,7 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h, ...@@ -62,6 +62,7 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h,
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP16; opt.precision = TENGINE_MODE_FP16;
opt.affinity = 0;
/* inital tengine */ /* inital tengine */
if (init_tengine() != 0) if (init_tengine() != 0)
......
...@@ -70,6 +70,7 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h, ...@@ -70,6 +70,7 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h,
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_INT8; opt.precision = TENGINE_MODE_INT8;
opt.affinity = 0;
/* inital tengine */ /* inital tengine */
if (init_tengine() != 0) if (init_tengine() != 0)
......
...@@ -70,6 +70,7 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h, ...@@ -70,6 +70,7 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h,
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_UINT8; opt.precision = TENGINE_MODE_UINT8;
opt.affinity = 0;
/* inital tengine */ /* inital tengine */
if (init_tengine() != 0) if (init_tengine() != 0)
......
...@@ -49,6 +49,7 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h, ...@@ -49,6 +49,7 @@ int tengine_classify(const char* model_file, const char* image_file, int img_h,
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32; opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */ /* inital tengine */
if (init_tengine() != 0) if (init_tengine() != 0)
......
...@@ -216,6 +216,7 @@ int main(int argc, char* argv[]) ...@@ -216,6 +216,7 @@ int main(int argc, char* argv[])
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32; opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */ /* inital tengine */
if (init_tengine() != 0) if (init_tengine() != 0)
......
...@@ -110,6 +110,7 @@ int main(int argc, char* argv[]) ...@@ -110,6 +110,7 @@ int main(int argc, char* argv[])
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32; opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */ /* inital tengine */
init_tengine(); init_tengine();
......
...@@ -119,6 +119,7 @@ int main(int argc, char* argv[]) ...@@ -119,6 +119,7 @@ int main(int argc, char* argv[])
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_UINT8; opt.precision = TENGINE_MODE_UINT8;
opt.affinity = 0;
/* inital tengine */ /* inital tengine */
init_tengine(); init_tengine();
......
...@@ -157,6 +157,7 @@ int main(int argc, char* argv[]) ...@@ -157,6 +157,7 @@ int main(int argc, char* argv[])
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32; opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */ /* inital tengine */
init_tengine(); init_tengine();
......
...@@ -156,7 +156,8 @@ int main(int argc, char* argv[]) ...@@ -156,7 +156,8 @@ int main(int argc, char* argv[])
struct options opt; struct options opt;
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32; opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */ /* inital tengine */
init_tengine(); init_tengine();
......
...@@ -173,6 +173,7 @@ int main(int argc, char* argv[]) ...@@ -173,6 +173,7 @@ int main(int argc, char* argv[])
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_UINT8; opt.precision = TENGINE_MODE_UINT8;
opt.affinity = 0;
// init tengine // init tengine
if (init_tengine() < 0) if (init_tengine() < 0)
......
...@@ -176,6 +176,7 @@ int main(int argc, char* argv[]) ...@@ -176,6 +176,7 @@ int main(int argc, char* argv[])
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32; opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */ /* inital tengine */
init_tengine(); init_tengine();
......
...@@ -452,7 +452,8 @@ int main(int argc, char* argv[]) ...@@ -452,7 +452,8 @@ int main(int argc, char* argv[])
struct options opt; struct options opt;
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32; opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */ /* inital tengine */
int ret = init_tengine(); int ret = init_tengine();
......
...@@ -224,6 +224,7 @@ static int detect_yolact(const cv::Mat& bgr, std::vector<Object>& objects, const ...@@ -224,6 +224,7 @@ static int detect_yolact(const cv::Mat& bgr, std::vector<Object>& objects, const
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32; opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */ /* inital tengine */
if (init_tengine() != 0) if (init_tengine() != 0)
......
...@@ -699,6 +699,7 @@ int main(int argc, char* argv[]) ...@@ -699,6 +699,7 @@ int main(int argc, char* argv[])
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32; opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */ /* inital tengine */
if (init_tengine() != 0) if (init_tengine() != 0)
......
...@@ -713,6 +713,7 @@ int main(int argc, char* argv[]) ...@@ -713,6 +713,7 @@ int main(int argc, char* argv[])
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_UINT8; opt.precision = TENGINE_MODE_UINT8;
opt.affinity = 0;
/* inital tengine */ /* inital tengine */
if (init_tengine() != 0) if (init_tengine() != 0)
......
...@@ -380,7 +380,8 @@ int main(int argc, char* argv[]) ...@@ -380,7 +380,8 @@ int main(int argc, char* argv[])
struct options opt; struct options opt;
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32; opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */ /* inital tengine */
if (init_tengine() != 0) if (init_tengine() != 0)
......
...@@ -700,6 +700,7 @@ int main(int argc, char* argv[]) ...@@ -700,6 +700,7 @@ int main(int argc, char* argv[])
opt.num_thread = num_thread; opt.num_thread = num_thread;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.precision = TENGINE_MODE_FP32; opt.precision = TENGINE_MODE_FP32;
opt.affinity = 0;
/* inital tengine */ /* inital tengine */
if (init_tengine() != 0) if (init_tengine() != 0)
......
...@@ -136,6 +136,7 @@ struct options ...@@ -136,6 +136,7 @@ struct options
int num_thread; int num_thread;
int cluster; int cluster;
int precision; int precision;
uint64_t affinity;
}; };
/* performance profiling records */ /* performance profiling records */
......
...@@ -37,18 +37,20 @@ ...@@ -37,18 +37,20 @@
* Author: lswang@openailab.com * Author: lswang@openailab.com
*/ */
#include "cpu.h"
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <limits.h> #include <limits.h>
#include "tengine_c_api.h" #include "tengine_c_api.h"
//#ifndef __ANDROID__ #ifndef _MSC_VER
#include <pthread.h>
#include <sys/syscall.h> #include <sys/syscall.h>
#include <sched.h> #include <sched.h>
#include <unistd.h> #include <unistd.h>
#include <stdint.h> #include <stdint.h>
//#endif #endif
#if __APPLE__ #if __APPLE__
#include "TargetConditionals.h" #include "TargetConditionals.h"
...@@ -120,6 +122,7 @@ int init_cpu_count() ...@@ -120,6 +122,7 @@ int init_cpu_count()
return core_count; return core_count;
} }
#ifndef _MSC_VER
static int get_max_freq_khz(int cpuid) static int get_max_freq_khz(int cpuid)
{ {
// first try, for all possible cpu // first try, for all possible cpu
...@@ -215,24 +218,36 @@ static int set_sched_affinity(size_t thread_affinity_mask) ...@@ -215,24 +218,36 @@ static int set_sched_affinity(size_t thread_affinity_mask)
#define CPU_ZERO(cpusetp) memset((cpusetp), 0, sizeof(cpu_set_t)) #define CPU_ZERO(cpusetp) memset((cpusetp), 0, sizeof(cpu_set_t))
// set affinity for thread // set affinity for thread
#if defined(__GLIBC__) || defined(__OHOS__) #if (defined __GLIBC__) || (defined _OHOS_)
pid_t pid = syscall(SYS_gettid); pid_t pid = syscall(SYS_gettid);
#else #else
#ifdef PI3 #ifdef PI3
pid_t pid = getpid(); pid_t pid = getpid();
#else #else
#ifdef MACOS
uint64_t tid64;
pthread_threadid_np(NULL, &tid64);
pid_t pid = (pid_t)tid64;
#else
pid_t pid = gettid(); pid_t pid = gettid();
#endif #endif
#endif
#endif #endif
cpu_set_t mask; cpu_set_t mask;
CPU_ZERO(&mask); CPU_ZERO(&mask);
for (int i = 0; i < ( int )sizeof(size_t) * 8; i++) // for (int i = 0; i < ( int )sizeof(size_t) * 8; i++)
for (int i = 0; i < core_count; i++)
{ {
if (thread_affinity_mask & (1 << i)) if (thread_affinity_mask & (1 << i))
CPU_SET(i, &mask); CPU_SET(i, &mask);
} }
#if MACOS
int syscallret = syscall(set_sched_affinity, pid, sizeof(mask), &mask);
#else
int syscallret = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask); int syscallret = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask);
#endif
if (syscallret) if (syscallret)
{ {
fprintf(stderr, "syscall error %d\n", syscallret); fprintf(stderr, "syscall error %d\n", syscallret);
...@@ -241,6 +256,7 @@ static int set_sched_affinity(size_t thread_affinity_mask) ...@@ -241,6 +256,7 @@ static int set_sched_affinity(size_t thread_affinity_mask)
return 0; return 0;
} }
#endif
int init_cluster_mask() int init_cluster_mask()
{ {
...@@ -249,7 +265,7 @@ int init_cluster_mask() ...@@ -249,7 +265,7 @@ int init_cluster_mask()
affinity_mask_all_cluster = ((size_t)(1) << core_count) - 1; affinity_mask_all_cluster = ((size_t)(1) << core_count) - 1;
//#ifdef __ANDROID__ #ifndef _MSC_VER
int max_freq_min_val = INT_MAX; int max_freq_min_val = INT_MAX;
int max_freq_max_val = 0; int max_freq_max_val = 0;
...@@ -285,10 +301,10 @@ int init_cluster_mask() ...@@ -285,10 +301,10 @@ int init_cluster_mask()
affinity_mask_medium_cluster |= (1 << i); affinity_mask_medium_cluster |= (1 << i);
} }
} }
//#else #else
// // TODO implement me for other platforms // TODO implement me for other platforms
// affinity_mask_big_cluster = affinity_mask_all_cluster; affinity_mask_big_cluster = affinity_mask_all_cluster;
//#endif #endif
return 0; return 0;
} }
...@@ -305,7 +321,7 @@ int get_mask_count(size_t mask) ...@@ -305,7 +321,7 @@ int get_mask_count(size_t mask)
{ {
int count = 0; int count = 0;
for (int i = 0; i < sizeof(size_t) * 8; i++) for (int i = 0; i < core_count; i++)
if (mask & (1 << i)) if (mask & (1 << i))
count++; count++;
...@@ -314,7 +330,7 @@ int get_mask_count(size_t mask) ...@@ -314,7 +330,7 @@ int get_mask_count(size_t mask)
int set_cpu_affine(size_t mask) int set_cpu_affine(size_t mask)
{ {
#ifdef __ANDROID__ #if defined __ANDROID__ || defined __linux__
int count = get_mask_count(mask); int count = get_mask_count(mask);
#ifdef _OPENMP #ifdef _OPENMP
...@@ -322,7 +338,7 @@ int set_cpu_affine(size_t mask) ...@@ -322,7 +338,7 @@ int set_cpu_affine(size_t mask)
omp_set_num_threads(count); omp_set_num_threads(count);
int status[sizeof(size_t) * 8] = {0}; int status[sizeof(size_t) * 8] = {0};
#pragma omp parallel for num_threads(count)
for (int i = 0; i < count; i++) for (int i = 0; i < count; i++)
{ {
status[i] = set_sched_affinity(mask); status[i] = set_sched_affinity(mask);
...@@ -339,15 +355,15 @@ int set_cpu_affine(size_t mask) ...@@ -339,15 +355,15 @@ int set_cpu_affine(size_t mask)
return -1; return -1;
#endif #endif
return 0; #elif __APPLE_IOS__ || _MSC_VER
#elif __APPLE_IOS__
// thread affinity not supported on ios // thread affinity not supported on ios
( void )mask; ( void )mask;
return -1; return -1;
#else #else
int status = set_sched_affinity(mask); int status = set_sched_affinity(mask);
if (0 != status) if (0 != status) return -1;
return -1;
return 0;
#endif #endif
} }
......
...@@ -1041,7 +1041,15 @@ int DLLEXPORT prerun_graph_multithread(graph_t graph, struct options opt) ...@@ -1041,7 +1041,15 @@ int DLLEXPORT prerun_graph_multithread(graph_t graph, struct options opt)
} }
ir_graph->status = GRAPH_STAT_READY; ir_graph->status = GRAPH_STAT_READY;
set_cpu_affine(mask);
if (0 != opt.affinity && 0 != (opt.affinity & mask))
{
set_cpu_affine(opt.affinity);
}
else
{
set_cpu_affine(mask);
}
return 0; return 0;
} }
......
...@@ -42,6 +42,7 @@ Net::Net() ...@@ -42,6 +42,7 @@ Net::Net()
opt.num_thread = 1; opt.num_thread = 1;
opt.precision = TENGINE_MODE_FP32; opt.precision = TENGINE_MODE_FP32;
opt.cluster = TENGINE_CLUSTER_ALL; opt.cluster = TENGINE_CLUSTER_ALL;
opt.affinity = 0;
} }
Net::~Net() Net::~Net()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册