提交 9efc54d6 编写于 作者: 赵奇可

fix unefficient code, delete unused code

上级 f0521e5e
#include <common.h> #include <common.h>
const int kTableSize = (1 << 10); inline float coeff_even(float i) {
float x = i / TABLE_SIZE;
inline float ComputeCoeffs(int i) { return (1.25f * x - 2.25f) * x * x + 1.0f;
const float A = -0.75;
float x = (i / 2) * 1.0 / kTableSize;
if (i % 2 == 0){
float coeff = ((A + 2) * x - (A + 3)) * x * x + 1;
return coeff;
}
else {
x += 1.0;
float coeff = ((A * x - 5 * A) * x + 8 * A) * x - 4 * A;
return coeff;
}
} }
#define BOUND(val, limit) min(limit - 1, max(0, val)) inline float coeff_odd(float i) {
float x = i / TABLE_SIZE + 1.0f;
return ((-0.75f * x + 3.75f) * x - 6.0f) * x + 3.0f;
}
__kernel void resize_bicubic_nocache(KERNEL_ERROR_PARAMS __kernel void resize_bicubic_nocache(KERNEL_ERROR_PARAMS
GLOBAL_WORK_GROUP_SIZE_DIM3 GLOBAL_WORK_GROUP_SIZE_DIM3
...@@ -27,7 +19,6 @@ __kernel void resize_bicubic_nocache(KERNEL_ERROR_PARAMS ...@@ -27,7 +19,6 @@ __kernel void resize_bicubic_nocache(KERNEL_ERROR_PARAMS
__private const int in_height, __private const int in_height,
__private const int in_width, __private const int in_width,
__private const int out_height) { __private const int out_height) {
const int ch_blk = get_global_id(0); const int ch_blk = get_global_id(0);
const int w = get_global_id(1); const int w = get_global_id(1);
const int hb = get_global_id(2); const int hb = get_global_id(2);
...@@ -53,72 +44,59 @@ __kernel void resize_bicubic_nocache(KERNEL_ERROR_PARAMS ...@@ -53,72 +44,59 @@ __kernel void resize_bicubic_nocache(KERNEL_ERROR_PARAMS
const int in_w_offset = mul24(ch_blk, in_width); const int in_w_offset = mul24(ch_blk, in_width);
const int in_h_offset = mul24(b, in_height); const int in_h_offset = mul24(b, in_height);
const int h_in_loc = height_scale * h; const int h_in_loc = (int)h_in;
const float h_delta = height_scale * h - h_in_loc; const float h_delta = h_in - h_in_loc;
const int h_offset = h_delta * kTableSize + 0.5; const int h_offset = h_delta * TABLE_SIZE + 0.5f;
const int w_in_loc = width_scale * w; const int w_in_loc = (int)w_in;
const float w_delta = width_scale * w - w_in_loc; const float w_delta = w_in - w_in_loc;
const int w_offset = w_delta * kTableSize + 0.5; const int w_offset = w_delta * TABLE_SIZE + 0.5f;
float4 y_weights = {ComputeCoeffs(h_offset * 2 + 1), const float h_offset_l = h_offset;
ComputeCoeffs(h_offset * 2), const float h_offset_r = TABLE_SIZE - h_offset_l;
ComputeCoeffs((kTableSize - h_offset) * 2), float4 y_weights = {coeff_odd(h_offset_l), coeff_even(h_offset_l),
ComputeCoeffs((kTableSize - h_offset) * 2 + 1)}; coeff_even(h_offset_r), coeff_odd(h_offset_r)};
int4 y_indices = {BOUND(h_in_loc - 1, in_height), int4 y_indices = {h_in_loc - 1, h_in_loc, h_in_loc + 1, h_in_loc + 2};
BOUND(h_in_loc, in_height), y_indices = min(max(y_indices, 0), in_height - 1);
BOUND(h_in_loc + 1, in_height),
BOUND(h_in_loc + 2, in_height)}; const float w_offset_l = w_offset;
float4 x_weights = {ComputeCoeffs(w_offset * 2 + 1), const float w_offset_r = TABLE_SIZE - w_offset_l;
ComputeCoeffs(w_offset * 2), float4 x_weights = {coeff_odd(w_offset_l), coeff_even(w_offset_l),
ComputeCoeffs((kTableSize - w_offset) * 2), coeff_even(w_offset_r), coeff_odd(w_offset_r)};
ComputeCoeffs((kTableSize - w_offset) * 2 + 1)}; int4 x_indices = {w_in_loc - 1, w_in_loc, w_in_loc + 1, w_in_loc + 2};
int4 x_indices = {BOUND(w_in_loc - 1, in_width), x_indices = min(max(x_indices, 0), in_width - 1);
BOUND(w_in_loc, in_width),
BOUND(w_in_loc + 1, in_width), float4 coeffs0 = 0, coeffs1 = 0, coeffs2 = 0, coeffs3 = 0;
BOUND(w_in_loc + 2, in_width)};
float4 coeffs0 = {0, 0, 0, 0};
float4 coeffs1 = {0, 0, 0, 0};
float4 coeffs2 = {0, 0, 0, 0};
float4 coeffs3 = {0, 0, 0, 0};
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
int y_index = y_indices.s0; int y_index = y_indices.s0;
if ( i == 1 ) { y_index = y_indices.s1; } if ( i == 1 ) { y_index = y_indices.s1; }
if ( i == 2 ) { y_index = y_indices.s2; } if ( i == 2 ) { y_index = y_indices.s2; }
if ( i == 3 ) { y_index = y_indices.s3; } if ( i == 3 ) { y_index = y_indices.s3; }
const int in_h_index = in_h_offset + y_index;
DATA_TYPE4 data0 = READ_IMAGET(input, SAMPLER, DATA_TYPE4 data0 = READ_IMAGET(input, SAMPLER,
(int2)(in_w_offset + x_indices.s0, in_h_offset + y_index)); (int2)(in_w_offset + x_indices.s0, in_h_index));
DATA_TYPE4 data1 = READ_IMAGET(input, SAMPLER, DATA_TYPE4 data1 = READ_IMAGET(input, SAMPLER,
(int2)(in_w_offset + x_indices.s1, in_h_offset + y_index)); (int2)(in_w_offset + x_indices.s1, in_h_index));
DATA_TYPE4 data2 = READ_IMAGET(input, SAMPLER, DATA_TYPE4 data2 = READ_IMAGET(input, SAMPLER,
(int2)(in_w_offset + x_indices.s2, in_h_offset + y_index)); (int2)(in_w_offset + x_indices.s2, in_h_index));
DATA_TYPE4 data3 = READ_IMAGET(input, SAMPLER, DATA_TYPE4 data3 = READ_IMAGET(input, SAMPLER,
(int2)(in_w_offset + x_indices.s3, in_h_offset + y_index)); (int2)(in_w_offset + x_indices.s3, in_h_index));
float4 xw0 = { x_weights.s0, x_weights.s0, x_weights.s0, x_weights.s0 }; float4 res = 0;
float4 xw1 = { x_weights.s1, x_weights.s1, x_weights.s1, x_weights.s1 }; res = mad(data0, x_weights.s0, res);
float4 xw2 = { x_weights.s2, x_weights.s2, x_weights.s2, x_weights.s2 }; res = mad(data1, x_weights.s1, res);
float4 xw3 = { x_weights.s3, x_weights.s3, x_weights.s3, x_weights.s3 }; res = mad(data2, x_weights.s2, res);
float4 res = { 0, 0, 0, 0 }; res = mad(data3, x_weights.s3, res);
res = mad(xw0, data0, res);
res = mad(xw1, data1, res);
res = mad(xw2, data2, res);
res = mad(xw3, data3, res);
if ( i == 0 ) { coeffs0 = res; } if ( i == 0 ) { coeffs0 = res; }
if ( i == 1 ) { coeffs1 = res; } if ( i == 1 ) { coeffs1 = res; }
if ( i == 2 ) { coeffs2 = res; } if ( i == 2 ) { coeffs2 = res; }
if ( i == 3 ) { coeffs3 = res; } if ( i == 3 ) { coeffs3 = res; }
} }
float4 yw0 = { y_weights.s0, y_weights.s0, y_weights.s0, y_weights.s0 }; DATA_TYPE4 outdata = 0;
float4 yw1 = { y_weights.s1, y_weights.s1, y_weights.s1, y_weights.s1 }; outdata = mad(coeffs0, y_weights.s0, outdata);
float4 yw2 = { y_weights.s2, y_weights.s2, y_weights.s2, y_weights.s2 }; outdata = mad(coeffs1, y_weights.s1, outdata);
float4 yw3 = { y_weights.s3, y_weights.s3, y_weights.s3, y_weights.s3 }; outdata = mad(coeffs2, y_weights.s2, outdata);
DATA_TYPE4 outdata = { 0, 0, 0, 0 }; outdata = mad(coeffs3, y_weights.s3, outdata);
outdata = mad(yw0, coeffs0, outdata);
outdata = mad(yw1, coeffs1, outdata);
outdata = mad(yw2, coeffs2, outdata);
outdata = mad(yw3, coeffs3, outdata);
const int out_w_offset = mul24(ch_blk, out_width); const int out_w_offset = mul24(ch_blk, out_width);
const int out_h_offset = mul24(b, out_height); const int out_h_offset = mul24(b, out_height);
......
...@@ -76,6 +76,7 @@ MaceStatus ResizeBicubicFunctor<DeviceType::GPU, T>::operator()( ...@@ -76,6 +76,7 @@ MaceStatus ResizeBicubicFunctor<DeviceType::GPU, T>::operator()(
auto dt = DataTypeToEnum<T>::value; auto dt = DataTypeToEnum<T>::value;
built_options.emplace("-DDATA_TYPE=" + DtToUpCompatibleCLDt(dt)); built_options.emplace("-DDATA_TYPE=" + DtToUpCompatibleCLDt(dt));
built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpCompatibleCLCMDDt(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpCompatibleCLCMDDt(dt));
built_options.emplace(MakeString("-DTABLE_SIZE=", kTableSize));
MACE_RETURN_IF_ERROR( MACE_RETURN_IF_ERROR(
runtime->BuildKernel("resize_bicubic", runtime->BuildKernel("resize_bicubic",
kernel_name, kernel_name,
......
...@@ -103,7 +103,6 @@ inline void ResizeImage(const float *images, ...@@ -103,7 +103,6 @@ inline void ResizeImage(const float *images,
std::array<index_t, 4> y_indices; std::array<index_t, 4> y_indices;
GetWeightsAndIndices(height_scale, y, in_height, &y_weights, GetWeightsAndIndices(height_scale, y, in_height, &y_weights,
&y_indices); &y_indices);
std::stringstream ss;
for (index_t x = 0; x < out_width; ++x) { for (index_t x = 0; x < out_width; ++x) {
std::array<float, 4> x_weights; std::array<float, 4> x_weights;
std::array<index_t, 4> x_indices; std::array<index_t, 4> x_indices;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册