提交 1c9f956b 编写于 作者: Y Yan Wang

Remove unnecesary operations and calculations in loop body.

Signed-off-by: NYan Wang <yan.wang@linux.intel.com>
上级 be5c9103
......@@ -210,6 +210,8 @@ __kernel void BruteForceMatch_Match(
}
#else
__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE_ODD * BLOCK_SIZE;
const int s_query_i = mad24(BLOCK_SIZE_ODD, lidy, lidx);
const int s_train_i = mad24(BLOCK_SIZE_ODD, lidx, lidy);
#endif
float myBestDistance = MAX_FLOAT;
......@@ -242,13 +244,15 @@ __kernel void BruteForceMatch_Match(
{
const int loadx = mad24(i, BLOCK_SIZE, lidx);
//load query and train into local memory
s_query[mad24(BLOCK_SIZE_ODD, lidy, lidx)] = 0;
s_train[mad24(BLOCK_SIZE_ODD, lidx, lidy)] = 0;
if (loadx < query_cols)
{
s_query[mad24(BLOCK_SIZE_ODD, lidy, lidx)] = query_vec[loadx];
s_train[mad24(BLOCK_SIZE_ODD, lidx, lidy)] = train_vec[loadx];
s_query[s_query_i] = query_vec[loadx];
s_train[s_train_i] = train_vec[loadx];
}
else
{
s_query[s_query_i] = 0;
s_train[s_train_i] = 0;
}
barrier(CLK_LOCAL_MEM_FENCE);
......@@ -337,18 +341,22 @@ __kernel void BruteForceMatch_RadiusMatch(
__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE_ODD * BLOCK_SIZE;
result_type result = 0;
const int s_query_i = mad24(BLOCK_SIZE_ODD, lidy, lidx);
const int s_train_i = mad24(BLOCK_SIZE_ODD, lidx, lidy);
for (int i = 0 ; i < (query_cols + BLOCK_SIZE - 1) / BLOCK_SIZE ; ++i)
{
//load a BLOCK_SIZE * BLOCK_SIZE block into local train.
const int loadx = mad24(BLOCK_SIZE, i, lidx);
s_query[mad24(BLOCK_SIZE_ODD, lidy, lidx)] = 0;
s_train[mad24(BLOCK_SIZE_ODD, lidx, lidy)] = 0;
if (loadx < query_cols)
{
s_query[mad24(BLOCK_SIZE_ODD, lidy, lidx)] = query_vec[loadx];
s_train[mad24(BLOCK_SIZE_ODD, lidx, lidy)] = train_vec[loadx];
s_query[s_query_i] = query_vec[loadx];
s_train[s_train_i] = train_vec[loadx];
}
else
{
s_query[s_query_i] = 0;
s_train[s_train_i] = 0;
}
//synchronize to make sure each elem for reduceIteration in share memory is written already.
......@@ -405,6 +413,8 @@ __kernel void BruteForceMatch_knnMatch(
}
#else
__local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE_ODD * BLOCK_SIZE;
const int s_query_i = mad24(BLOCK_SIZE_ODD, lidy, lidx);
const int s_train_i = mad24(BLOCK_SIZE_ODD, lidx, lidy);
#endif
float myBestDistance1 = MAX_FLOAT;
......@@ -438,13 +448,15 @@ __kernel void BruteForceMatch_knnMatch(
{
const int loadx = mad24(BLOCK_SIZE, i, lidx);
//load query and train into local memory
s_query[mad24(BLOCK_SIZE_ODD, lidy, lidx)] = 0;
s_train[mad24(BLOCK_SIZE_ODD, lidx, lidy)] = 0;
if (loadx < query_cols)
{
s_query[mad24(BLOCK_SIZE_ODD, lidy, lidx)] = query_vec[loadx];
s_train[mad24(BLOCK_SIZE_ODD, lidx, lidy)] = train_vec[loadx];
s_query[s_query_i] = query_vec[loadx];
s_train[s_train_i] = train_vec[loadx];
}
else
{
s_query[s_query_i] = 0;
s_train[s_train_i] = 0;
}
barrier(CLK_LOCAL_MEM_FENCE);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册