提交 3604f69e 编写于 作者: 赵奇可

fix code style, delete unused comment

上级 d34eb9c2
......@@ -4,7 +4,6 @@
const int kTableSize = (1 << 10);
inline float ComputeCoeffs(int i) {
// const int kTableSize = (1 << 10);
const float A = -0.75;
float x = (i / 2) * 1.0 / kTableSize;
if (i % 2 == 0){
......@@ -18,39 +17,11 @@ inline float ComputeCoeffs(int i) {
}
}
//#define GET_COEFFS(coeffs_tab, i) coeffs_tab[i]
//float getCoeffs(const float *coeffs_tab, int i) {
// return coeffs_tab[i];
//}
#define BOUND(val, limit) min(limit - 1, max(0, val))
//int Bound(int val, int limit) {
// return min(limit - 1, max(0, val));
//}
//float4 GetWeights(const float* coeffs_tab, float scale, int out_loc, int limit) {
// const int in_loc = scale * out_loc;
// const float delta = scale * out_loc - in_loc;
// const int offset = delta * kTableSize + 0.5; //lrintf not found in opencl;
// float4 weights = {getCoeffs(coeffs_tab, offset * 2 + 1),
// getCoeffs(coeffs_tab, offset * 2),
// getCoeffs(coeffs_tab, (kTableSize - offset) * 2),
// getCoeffs(coeffs_tab, (kTableSize - offset) * 2 + 1)};
// return weights;
//}
//
//int4 GetIndices(float scale, int out_loc, int limit) {
// const int in_loc = scale * out_loc;
// const float delta = scale * out_loc - in_loc;
// const int offset = delta * kTableSize + 0.5; //lrintf not found in opencl
// int4 indices = {Bound(in_loc - 1, limit), Bound(in_loc, limit),
// Bound(in_loc + 1, limit), Bound(in_loc + 2, limit)};
// return indices;
//}
__kernel void resize_bicubic_nocache(KERNEL_ERROR_PARAMS
GLOBAL_WORK_GROUP_SIZE_DIM3
__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */
__read_only image2d_t input,
__write_only image2d_t output,
__private const float height_scale,
__private const float width_scale,
......@@ -83,27 +54,30 @@ __kernel void resize_bicubic_nocache(KERNEL_ERROR_PARAMS
const int in_w_offset = mul24(ch_blk, in_width);
const int in_h_offset = mul24(b, in_height);
//begin resize bicubic
const int h_in_loc = height_scale * h;
const float h_delta = height_scale * h - h_in_loc;
const int h_offset = h_delta * kTableSize + 0.5; //lrintf not found in opencl;
const int h_offset = h_delta * kTableSize + 0.5;
const int w_in_loc = width_scale * w;
const float w_delta = width_scale * w - w_in_loc;
const int w_offset = w_delta * kTableSize + 0.5; //lrintf not found in opencl;
const int w_offset = w_delta * kTableSize + 0.5;
float4 y_weights = {ComputeCoeffs(h_offset * 2 + 1),
ComputeCoeffs(h_offset * 2),
ComputeCoeffs((kTableSize - h_offset) * 2),
ComputeCoeffs((kTableSize - h_offset) * 2 + 1)};
int4 y_indices = {BOUND(h_in_loc - 1, in_height), BOUND(h_in_loc, in_height),
BOUND(h_in_loc + 1, in_height), BOUND(h_in_loc + 2, in_height)};
int4 y_indices = {BOUND(h_in_loc - 1, in_height),
BOUND(h_in_loc, in_height),
BOUND(h_in_loc + 1, in_height),
BOUND(h_in_loc + 2, in_height)};
float4 x_weights = {ComputeCoeffs(w_offset * 2 + 1),
ComputeCoeffs(w_offset * 2),
ComputeCoeffs((kTableSize - w_offset) * 2),
ComputeCoeffs((kTableSize - w_offset) * 2 + 1)};
int4 x_indices = {BOUND(w_in_loc - 1, in_width), BOUND(w_in_loc, in_width),
BOUND(w_in_loc + 1, in_width), BOUND(w_in_loc + 2, in_width)};
int4 x_indices = {BOUND(w_in_loc - 1, in_width),
BOUND(w_in_loc, in_width),
BOUND(w_in_loc + 1, in_width),
BOUND(w_in_loc + 2, in_width)};
float4 coeffs0 = {0, 0, 0, 0};
float4 coeffs1 = {0, 0, 0, 0};
......@@ -115,13 +89,13 @@ __kernel void resize_bicubic_nocache(KERNEL_ERROR_PARAMS
if ( i == 2 ) { y_index = y_indices.s2; }
if ( i == 3 ) { y_index = y_indices.s3; }
DATA_TYPE4 data0 = READ_IMAGET(input, SAMPLER,
(int2)(in_w_offset + x_indices.s0, in_h_offset + y_index));
(int2)(in_w_offset + x_indices.s0, in_h_offset + y_index));
DATA_TYPE4 data1 = READ_IMAGET(input, SAMPLER,
(int2)(in_w_offset + x_indices.s1, in_h_offset + y_index));
(int2)(in_w_offset + x_indices.s1, in_h_offset + y_index));
DATA_TYPE4 data2 = READ_IMAGET(input, SAMPLER,
(int2)(in_w_offset + x_indices.s2, in_h_offset + y_index));
(int2)(in_w_offset + x_indices.s2, in_h_offset + y_index));
DATA_TYPE4 data3 = READ_IMAGET(input, SAMPLER,
(int2)(in_w_offset + x_indices.s3, in_h_offset + y_index));
(int2)(in_w_offset + x_indices.s3, in_h_offset + y_index));
float4 xw0 = { x_weights.s0, x_weights.s0, x_weights.s0, x_weights.s0 };
float4 xw1 = { x_weights.s1, x_weights.s1, x_weights.s1, x_weights.s1 };
......@@ -150,7 +124,6 @@ __kernel void resize_bicubic_nocache(KERNEL_ERROR_PARAMS
const int out_h_offset = mul24(b, out_height);
WRITE_IMAGET(output, (int2)(out_w_offset + w, out_h_offset + h), outdata);
//end bicubic
}
......
......@@ -85,13 +85,7 @@ MaceStatus ResizeBicubicFunctor<DeviceType::GPU, T>::operator()(
kwg_size_ =
static_cast<uint32_t>(runtime->GetKernelMaxWorkGroupSize(kernel_));
}
// //create buffer
// cl::Buffer coeffs_buf(runtime->context(), CL_MEM_READ_WRITE, (kTableSize + 1 ) * 2 * sizeof(float));
// if (runtime->command_queue().enqueueWriteBuffer(coeffs_buf,CL_TRUE,0,(kTableSize + 1 ) * 2 * sizeof(float),GetCoeffsTable()) != CL_SUCCESS) {
// std::cout << "Failed to write memory;n";
// exit(1);
// }
// //end create buffer
if (!IsVecEqual(input_shape_, input->shape())) {
MACE_CHECK(out_height > 0 && out_width > 0);
std::vector<index_t> output_shape{batch, out_height, out_width, channels};
......@@ -116,7 +110,6 @@ MaceStatus ResizeBicubicFunctor<DeviceType::GPU, T>::operator()(
kernel_.setArg(idx++, static_cast<int32_t>(in_height));
kernel_.setArg(idx++, static_cast<int32_t>(in_width));
kernel_.setArg(idx++, static_cast<int32_t>(out_height));
// kernel_.setArg(idx++, coeffs_buf);
input_shape_ = input->shape();
}
......
......@@ -95,12 +95,6 @@ inline void ResizeImage(const float *images,
const float height_scale,
const float width_scale,
float *output) {
// std::stringstream tabss;
// for(int tabi = 0; tabi < 10; tabi++){
// tabss << GetCoeffsTable()[tabi] << " ";
// }
// LOG(WARNING) << tabss.str().c_str();
std::array<float, 4> coeff = {{0.0, 0.0, 0.0, 0.0}};
#pragma omp parallel for collapse(2)
for (index_t b = 0; b < batch_size; ++b) {
......@@ -119,22 +113,24 @@ inline void ResizeImage(const float *images,
for (index_t c = 0; c < channels; ++c) {
// Use a 4x4 patch to compute the interpolated output value at
// (b, y, x, c).
const float *channel_input_ptr = images + (b * channels + c) * in_height * in_width;
float *channel_output_ptr = output + (b * channels + c) * out_height * out_width;
const float *channel_input_ptr =
images + (b * channels + c) * in_height * in_width;
float *channel_output_ptr =
output + (b * channels + c) * out_height * out_width;
for (index_t i = 0; i < 4; ++i) {
const std::array<float, 4> values = {
{static_cast<float>(
channel_input_ptr[y_indices[i] * in_width + x_indices[0]]),
static_cast<float>(
channel_input_ptr[y_indices[i] * in_width + x_indices[1]]),
static_cast<float>(
channel_input_ptr[y_indices[i] * in_width + x_indices[2]]),
static_cast<float>(
channel_input_ptr[y_indices[i] * in_width + x_indices[3]])}};
{static_cast<float>(channel_input_ptr
[y_indices[i] * in_width + x_indices[0]]),
static_cast<float>(channel_input_ptr
[y_indices[i] * in_width + x_indices[1]]),
static_cast<float>(channel_input_ptr
[y_indices[i] * in_width + x_indices[2]]),
static_cast<float>(channel_input_ptr
[y_indices[i] * in_width + x_indices[3]])}};
coeff[i] = Interpolate1D(x_weights, values);
}
channel_output_ptr[y * out_width + x] = Interpolate1D(y_weights, coeff);
channel_output_ptr[y * out_width + x] =
Interpolate1D(y_weights, coeff);
}
}
}
......
......@@ -83,17 +83,17 @@ void ResizeBicubicBenchmark(int iters,
} // namespace
#define MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, TYPE, DEVICE) \
static void \
static void \
MACE_BM_RESIZE_BICUBIC_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_\
##DEVICE( \
int iters) { \
const int64_t macc = static_cast<int64_t>(iters) * N * C * H1 * W1 * 3; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H0 * W0; \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
##DEVICE( \
int iters) { \
const int64_t macc = static_cast<int64_t>(iters) * N * C * H1 * W1 * 3; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H0 * W0; \
mace::testing::MaccProcessed(macc); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
ResizeBicubicBenchmark<DEVICE, TYPE>(iters, N, C, H0, W0, H1, W1); \
} \
MACE_BENCHMARK( \
} \
MACE_BENCHMARK( \
MACE_BM_RESIZE_BICUBIC_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_\
##DEVICE)
......
......@@ -48,7 +48,6 @@ TEST_F(ResizeBicubicTest, CPUResizeBicubicWOAlignCorners) {
NHWC);
// Check
//TODO change expected data
auto expected = CreateTensor<float>({1, 1, 2, 3}, {0, 1, 2, 6, 7, 8});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
......@@ -63,7 +62,8 @@ TEST_F(ResizeBicubicTest, CPUResizeBicubicWOAlignCorners1) {
std::vector<float> input(48);
std::iota(begin(input), end(input), 0);
net.AddInputFromArray<DeviceType::CPU, float>("Input", {1, 4, 4, 3}, input);
net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW", NCHW);
net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW",
NCHW);
OpDefBuilder("ResizeBicubic", "ResizeBicubicTest")
.Input("InputNCHW")
......@@ -73,11 +73,14 @@ TEST_F(ResizeBicubicTest, CPUResizeBicubicWOAlignCorners1) {
// Run
net.RunOp();
net.TransformDataFormat<DeviceType::CPU, float>("OutputNCHW", NCHW, "Output", NHWC);
net.TransformDataFormat<DeviceType::CPU, float>("OutputNCHW", NCHW, "Output",
NHWC);
// Check
//TODO change expected data
auto expected = CreateTensor<float>({1, 2, 3, 3}, {0., 1., 2.,4.110297, 5.110297, 6.110297, 8.223037, 9.223036, 10.223037, 24., 25., 26., 28.110298, 29.1103, 30.110298, 32.223038, 33.223038, 34.223038});
auto expected = CreateTensor<float>({1, 2, 3, 3},
{0., 1., 2., 4.110297, 5.110297, 6.110297,
8.223037, 9.223036, 10.223037, 24., 25., 26.,
28.110298, 29.1103, 30.110298, 32.223038, 33.223038, 34.223038});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
}
......@@ -167,7 +170,6 @@ void TestRandomResizeBicubic() {
// Check
ExpectTensorNear<float>(expected, *net.GetOutput("DeviceOutput"), 1e-5,
1e-4);
}
}
} // namespace
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册