提交 a87d65d0 编写于 作者: R russelltao

update readme

上级 5f2763dd
......@@ -20,6 +20,7 @@
#### 遍历有序数组
`./branch_predict -f`
消耗时间(毫秒):350
### c. 使用perf验证缓存命中率
#### 遍历随机数组
`perf stat -e cache-references,cache-misses,instructions,cycles,L1-dcache-load-misses,L1-dcache-loads ./branch_predict -`
```
......
......@@ -12,6 +12,7 @@ using namespace std;
long timediff(clock_t t1, clock_t t2) {
long elapsed;
//使用clock统计与取系统时间不同,它表示从进程启动到当下所消耗的CPU计时单元数
elapsed = ((double)t2 - t1) / CLOCKS_PER_SEC * 1000;
return elapsed;
}
......@@ -22,12 +23,15 @@ int main(int argc, char** argv) {
while((ch = getopt(argc, argv, "fsg")) != -1) {
switch(ch)
{
//遍历随机数组
case 's':
mode = 1;
break;
//遍历有序数组
case 'f':
mode = 2;
break;
//生成数组至文件中,不影响遍历过程时的perf统计
case 'g':
mode = 3;
break;
......@@ -37,12 +41,16 @@ int main(int argc, char** argv) {
unsigned char* arr = new unsigned char[TESTN];
if (3 == mode) {
//构造随机数组
for (long i = 0; i < TESTN; i++) arr[i] = rand() % 256;
ofstream ofs;
//随机数组写入文件
ofs.open("rand.array", ios::out | ios::binary);
ofs.write((const char*)arr, TESTN);
ofs.close();
//数组排序
sort(arr,arr+TESTN);
//有序数组写入文件
ofs.open("sort.array", ios::out | ios::binary);
ofs.write((const char*)arr, TESTN);
ofs.close();
......@@ -57,9 +65,11 @@ int main(int argc, char** argv) {
ifs.open(fname);
ifs.read((char *)arr, TESTN);
//使用clock比取系统时间能够更准确的看到消耗了多少CPU资源
clock_t start,end;
start =clock();
for(long i = 0; i < TESTN; i++) {
//条件分支预测在这里发生作用
if (arr[i] < 128) arr[i] = 0;
}
end =clock();
......
......@@ -9,95 +9,57 @@
#### 安装编译依赖的软件
如Linux中需要安装gcc-c++,CentOS中可用`yum install gcc-c++`安装,Ubuntu中可用`apt-get install gcc-c++`
#### 编译程序
`g++ traverse_2d_array.cpp -o traverse_2d_array`
`g++ cpu_migrate.cpp -o cpu_migrate -lpthread`
* 注意,多线程依赖pthread库,编译时需要链接
### b. 运行验证
#### 使用array[i][j]遍历数组
`./traverse_2d_array -f`
消耗时间(毫秒):10
#### 使用array[j][i]遍历数组
`./traverse_2d_array -s`
消耗时间(毫秒):70
#### 使用14个(共28个CPU核心)并发线程测试,不绑定CPU
`./cpu_migrate -t 14 -s`
平均每线程消耗时间(毫秒):1083
#### 使用14个(共28个CPU核心)并发线程测试,绑定CPU
`./cpu_migrate -t 14 -f`
平均每线程消耗时间(毫秒):926
### c. 使用perf验证缓存命中率
#### 使用array[i][j]遍历数组
`perf stat -e cache-references,cache-misses,instructions,cycles,L1-dcache-load-misses,L1-dcache-loads ./traverse_2d_array -f`
#### 使用14个(共28个CPU核心)并发线程测试,不绑定CPU
`perf stat -e cpu-migrations,cache-references,cache-misses,instructions,cycles,L1-dcache-load-misses,L1-dcache-loads,L1-icache-load-misses,branch-load-misses,branch-loads ./cpu_migrate -t 14 -s`
* 输出结果:
```
Performance counter stats for './traverse_2d_array -f':
Performance counter stats for './cpu_migrate -t 14 -s':
147,927 cache-references (80.14%)
13,215 cache-misses # 8.933 % of all cache refs (65.49%)
54,454,827 instructions # 1.43 insn per cycle (85.11%)
38,197,267 cycles (85.09%)
161,503 L1-dcache-load-misses # 0.90% of all L1-dcache hits (85.09%)
18,035,307 L1-dcache-loads (84.19%)
10 cpu-migrations
8,193,825 cache-references (44.40%)
175,792 cache-misses # 2.145 % of all cache refs (44.34%)
45,480,238,906 instructions # 1.30 insn per cycle (55.47%)
35,111,144,560 cycles (55.47%)
11,997,428 L1-dcache-load-misses # 0.05% of all L1-dcache hits (55.57%)
26,407,960,253 L1-dcache-loads (55.60%)
2,459,766 L1-icache-load-misses (55.66%)
2,136,304 branch-load-misses (44.53%)
3,825,848,726 branch-loads (44.43%)
0.020651344 seconds time elapsed
1.251076337 seconds time elapsed
0.018625000 seconds user
0.002069000 seconds sys
14.630618000 seconds user
0.459616000 seconds sys
```
#### 使用array[j][i]遍历数组
`perf stat -e cache-references,cache-misses,instructions,cycles,L1-dcache-load-misses,L1-dcache-loads ./traverse_2d_array -s`
#### 使用14个(共28个CPU核心)并发线程测试,绑定CPU
`perf stat -e cpu-migrations,cache-references,cache-misses,instructions,cycles,L1-dcache-load-misses,L1-dcache-loads,L1-icache-load-misses,branch-load-misses,branch-loads ./cpu_migrate -t 14 -f`
* 输出结果:
```
Performance counter stats for './traverse_2d_array -s':
Performance counter stats for './cpu_migrate -t 14 -f':
4,341,186 cache-references (83.01%)
13,974 cache-misses # 0.322 % of all cache refs (66.03%)
55,245,646 instructions # 0.25 insn per cycle (83.01%)
218,787,967 cycles (83.00%)
4,308,394 L1-dcache-load-misses # 23.79% of all L1-dcache hits (83.86%)
18,112,753 L1-dcache-loads (84.10%)
14 cpu-migrations
4,983,541 cache-references (44.42%)
1,611,627 cache-misses # 32.339 % of all cache refs (44.34%)
45,523,818,723 instructions # 1.52 insn per cycle (55.43%)
29,972,627,158 cycles (55.46%)
5,812,831 L1-dcache-load-misses # 0.02% of all L1-dcache hits (55.53%)
26,388,005,477 L1-dcache-loads (55.58%)
1,262,533 L1-icache-load-misses (55.66%)
1,363,376 branch-load-misses (44.54%)
3,828,570,015 branch-loads (44.47%)
0.082950118 seconds time elapsed
0.948650967 seconds time elapsed
0.079066000 seconds user
0.003953000 seconds sys
12.489932000 seconds user
0.456253000 seconds sys
```
## 3. Java程序
### a. 编译程序
`javac traverse_2d_array.java`
### b.运行验证
#### 使用array[i][j]遍历数组
`java traverse_2d_array -f`
消耗时间(毫秒):20
#### 使用array[j][i]遍历数组
`java traverse_2d_array -s`
消耗时间(毫秒):100
### c. 使用perf验证缓存命中率
#### 使用array[i][j]遍历数组
`perf stat -e cache-references,cache-misses,instructions,cycles,L1-dcache-load-misses,L1-dcache-loads ./traverse_2d_array -f`
* 输出结果:
```
Performance counter stats for 'java traverse_2d_array -f':
6,379,138 cache-references (80.62%)
866,578 cache-misses # 13.585 % of all cache refs (68.93%)
459,726,039 instructions # 1.51 insn per cycle (85.22%)
303,673,757 cycles (85.69%)
5,270,707 L1-dcache-load-misses # 3.96% of all L1-dcache hits (81.64%)
133,211,743 L1-dcache-loads (83.13%)
0.126089887 seconds time elapsed
0.122353000 seconds user
0.047877000 seconds sys
```
#### 使用array[j][i]遍历数组
`perf stat -e cache-references,cache-misses,instructions,cycles,L1-dcache-load-misses,L1-dcache-loads ./traverse_2d_array -s`
* 输出结果:
```
Performance counter stats for 'java traverse_2d_array -s':
42,441,956 cache-references (80.21%)
872,336 cache-misses # 2.055 % of all cache refs (66.61%)
386,326,280 instructions # 0.71 insn per cycle (84.29%)
544,411,061 cycles (85.01%)
38,884,991 L1-dcache-load-misses # 32.48% of all L1-dcache hits (85.24%)
119,711,464 L1-dcache-loads (82.94%)
0.192838747 seconds time elapsed
0.200693000 seconds user
0.052919000 seconds sys
```
\ No newline at end of file
......@@ -18,26 +18,31 @@ void* loopcalc(void* args) {
if (setaffinity) {
cpu_set_t mask; //CPU核的集合
cpu_set_t get; //获取在集合中的CPU
//获取线程的序列号
int *thread_num = (int *)args;
CPU_ZERO(&mask); //置空
CPU_SET(*thread_num,&mask); //设置亲和力值
if (sched_setaffinity(0, sizeof(mask), &mask) == -1)//设置线程CPU亲和力
//将当前线程绑定至特定CPU
CPU_ZERO(&mask);
CPU_SET(*thread_num,&mask);
if (sched_setaffinity(0, sizeof(mask), &mask) == -1)
{
cout<<"warning: could not set CPU affinity, continuing...\n";
}
}
timeval tStart,tEnd;
//这里不再使用clock,因为clock表示的进程所占用过的CPU周期,它将所有CPU都计入了,不适合示例中的统计
gettimeofday(&tStart, 0);
//这个循环中由于反复访问有限的数组,CPU缓存命中率非常高
unsigned char* arr = new unsigned char[TESTN];
for (long i = 0; i < TESTN; i++) arr[i] = rand() % 256;
for (int j = 1; j < 16*1024; j++) {
for (int j = 1; j < TESTN; j++) {
for (long i = 0; i < TESTN; i++) arr[i] += 1;
}
gettimeofday(&tEnd, 0);
//将消耗时间传出到timecost数组中对应的元素上
*(long*)args = (1000000LL * (tEnd.tv_sec-tStart.tv_sec) + (tEnd.tv_usec-tStart.tv_usec))/1000;
}
......@@ -47,12 +52,15 @@ int main(int argc, char** argv) {
while((ch = getopt(argc, argv, "t:fs")) != -1) {
switch(ch)
{
//设置测试的并发线程数,注意不要超过机器上的CPU核数
case 't':
threadnum = atoi(optarg);
break;
//将线程绑定至特定CPU上
case 'f':
setaffinity = true;
break;
//不绑定CPU
case 's':
setaffinity = false;
break;
......@@ -60,8 +68,10 @@ int main(int argc, char** argv) {
}
pthread_t* id = new pthread_t[threadnum];
//统计每个线程计算所需要的时间
long* timecost = new long[threadnum];
for(int i = 0; i < threadnum; i++) {
//最初timecost用于传递线程号,用于绑定CPU
timecost[i] = i;
int ret=pthread_create(&id[i],NULL,loopcalc,&timecost[i]);
......@@ -72,10 +82,12 @@ int main(int argc, char** argv) {
}
long costsum = 0;
//等待所有线程结束
for(int i = 0; i < threadnum; i++) {
pthread_join(id[i],NULL);
costsum += timecost[i];
}
//比较平均每线程所用时间
cout<<"costsum: "<<costsum<<", avg: "<<costsum/threadnum<<endl;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册