提交 e9b0cee2 编写于 作者: R russelltao

add cpu cache

上级 87568ac1
#include "stdio.h"
#include <stdlib.h>
#include <time.h>
#include <algorithm>
#include <iostream>
#include <fstream>
#include <unistd.h>
using namespace std;
#define TESTN 128*1024*1024L
long timediff(clock_t t1, clock_t t2) {
long elapsed;
elapsed = ((double)t2 - t1) / CLOCKS_PER_SEC * 1000;
return elapsed;
}
int main(int argc, char** argv) {
int mode = 1;
int ch;
while((ch = getopt(argc, argv, "fsg")) != -1) {
switch(ch)
{
case 's':
mode = 1;
break;
case 'f':
mode = 2;
break;
case 'g':
mode = 3;
break;
}
}
unsigned char* arr = new unsigned char[TESTN];
if (3 == mode) {
for (long i = 0; i < TESTN; i++) arr[i] = rand() % 256;
ofstream ofs;
ofs.open("rand.array", ios::out | ios::binary);
ofs.write((const char*)arr, TESTN);
ofs.close();
sort(arr,arr+TESTN);
ofs.open("sort.array", ios::out | ios::binary);
ofs.write((const char*)arr, TESTN);
ofs.close();
} else {
const char* fname;
if ( 1 == mode) {
fname = "rand.array";
}else if (2 == mode) {
fname = "sort.array";
}
ifstream ifs;
ifs.open(fname);
ifs.read((char *)arr, TESTN);
clock_t start,end;
start =clock();
for(long i = 0; i < TESTN; i++) {
if (arr[i] < 128) arr[i] = 0;
}
end =clock();
cout<<" "<<timediff(start,end)<<endl;
}
}
#include "stdio.h"
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#include <iostream>
#include <unistd.h>
#include <pthread.h>
#include <sched.h>
using namespace std;
#define TESTN 16*1024L
bool setaffinity = false;
void* loopcalc(void* args) {
if (setaffinity) {
cpu_set_t mask; //CPU核的集合
cpu_set_t get; //获取在集合中的CPU
int *thread_num = (int *)args;
CPU_ZERO(&mask); //置空
CPU_SET(*thread_num,&mask); //设置亲和力值
if (sched_setaffinity(0, sizeof(mask), &mask) == -1)//设置线程CPU亲和力
{
cout<<"warning: could not set CPU affinity, continuing...\n";
}
}
timeval tStart,tEnd;
gettimeofday(&tStart, 0);
unsigned char* arr = new unsigned char[TESTN];
for (long i = 0; i < TESTN; i++) arr[i] = rand() % 256;
for (int j = 1; j < 16*1024; j++) {
for (long i = 0; i < TESTN; i++) arr[i] += 1;
}
gettimeofday(&tEnd, 0);
*(long*)args = (1000000LL * (tEnd.tv_sec-tStart.tv_sec) + (tEnd.tv_usec-tStart.tv_usec))/1000;
}
int main(int argc, char** argv) {
int threadnum = 2;
int ch;
while((ch = getopt(argc, argv, "t:fs")) != -1) {
switch(ch)
{
case 't':
threadnum = atoi(optarg);
break;
case 'f':
setaffinity = true;
break;
case 's':
setaffinity = false;
break;
}
}
pthread_t* id = new pthread_t[threadnum];
long* timecost = new long[threadnum];
for(int i = 0; i < threadnum; i++) {
timecost[i] = i;
int ret=pthread_create(&id[i],NULL,loopcalc,&timecost[i]);
if(ret!=0){
cout<<"Create pthread error!\n";
exit (1);
}
}
long costsum = 0;
for(int i = 0; i < threadnum; i++) {
pthread_join(id[i],NULL);
costsum += timecost[i];
}
cout<<"costsum: "<<costsum<<", avg: "<<costsum/threadnum<<endl;
}
#include <time.h>
#include <iostream>
#include <unistd.h>
#include <stdlib.h>
using namespace std;
long timediff(clock_t t1, clock_t t2) {
long elapsed;
elapsed = ((double)t2 - t1) / CLOCKS_PER_SEC * 1000;
return elapsed;
}
int main(int argc, char** argv) {
int step = 1,ch;
bool slowMode = true;
long TESTN = 1024*1024*1024*8L;
while((ch = getopt(argc, argv, "s:")) != -1) {
switch(ch)
{
case 's':
step = atoi(optarg);
break;
}
}
char* arr = new char[TESTN];
clock_t start, end;
long total = TESTN/1024,cnt = 0;
long i = 0;
start =clock();
while (++cnt < total) {
arr[i] = 0;
i += step;
}
end =clock();
cout<<timediff(start,end)<<",access count:"<<cnt<<endl;
}
* C++程序traverse_2d_array.cpp
** 编译程序
*** 安装编译依赖的软件
如Linux中需要安装gcc-c++,CentOS中可用yum install gcc-c++安装,Ubuntu中可用apt-get install gcc-c++
*** 编译程序
g++ traverse_2d_array.cpp -o traverse_2d_array
*** 运行验证
**** 使用array[i][j]遍历数组
./traverse_2d_array -f
**** 使用array[j][i]遍历数组
./traverse_2d_array -s
*** 使用perf验证缓存命中率
**** 使用array[i][j]遍历数组
perf stat -e cache-references,cache-misses,instructions,cycles,L1-dcache-load-misses,L1-dcache-loads ./traverse_2d_array -f
**** 使用array[j][i]遍历数组
perf stat -e cache-references,cache-misses,instructions,cycles,L1-dcache-load-misses,L1-dcache-loads ./traverse_2d_array -s
* python程序traverse_2d_array.py
\ No newline at end of file
#include <time.h>
#include <iostream>
#include <unistd.h>
#include <stdlib.h>
using namespace std;
long timediff(clock_t t1, clock_t t2) {
long elapsed;
elapsed = ((double)t2 - t1) / CLOCKS_PER_SEC * 1000;
return elapsed;
}
#define TESTN 2048
int main(int argc, char** argv) {
int ch;
bool slowMode = true;
while((ch = getopt(argc, argv, "fsn:")) != -1) {
switch(ch)
{
case 's':
slowMode = true;
break;
case 'f':
slowMode = false;
break;
}
}
cout<<(slowMode?"arr[j][i]":"arr[i][j]")<<endl;
char arr[TESTN][TESTN];
clock_t start, end;
if (!slowMode) {
start =clock();
for(int i = 0; i < TESTN; i++) {
for(int j = 0; j < TESTN; j++) {
arr[i][j] = 0;
}
}
end =clock();
cout<<timediff(start,end)<<endl;
}else {
start =clock();
for(int i = 0; i < TESTN; i++) {
for(int j = 0; j < TESTN; j++) {
arr[j][i] = 0;
}
}
end =clock();
cout<<timediff(start,end)<<endl;
}
}
import time
TESTN = 10240
arr = [[0 for col in range(TESTN)] for row in range(TESTN)]
t1 = time.time()
for i in range(TESTN):
for j in range(TESTN):
arr[i][j] = 1
t2 = time.time()
print t2-t1
t1 = time.time()
for i in range(TESTN):
for j in range(TESTN):
arr[j][i] = 1
t2 = time.time()
print t2-t1
perf stat -e cache-references,cache-misses,instructions,cycles,L1-dcache-load-misses,L1-dcache-loads,L1-icache-load-misses,branch-load-misses,branch-loads ./branch -f
g++ branch.cpp -o branch
g++ cpu_migrate.cpp -o cpu_migrate -lpthread
《极客时间:分布式高性能程序优化30讲》示例代码
1-cpu_cache目录:第1讲 CPU缓存:怎样写代码能够提升缓存命中率?
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册