提交 43b96415 编写于 作者: 李滨

Merge branch 'half_pixel_centers' into 'master'

feat: support `half_pixel_centers` arg for `resize` type ops

See merge request deep-computing/mace!1281
...@@ -16,6 +16,7 @@ __kernel void resize_bicubic_nocache(OUT_OF_RANGE_PARAMS ...@@ -16,6 +16,7 @@ __kernel void resize_bicubic_nocache(OUT_OF_RANGE_PARAMS
__write_only image2d_t output, __write_only image2d_t output,
__private const float height_scale, __private const float height_scale,
__private const float width_scale, __private const float width_scale,
__private const int half_pixel_centers,
__private const int in_height, __private const int in_height,
__private const int in_width, __private const int in_width,
__private const int out_height) { __private const int out_height) {
...@@ -38,8 +39,10 @@ __kernel void resize_bicubic_nocache(OUT_OF_RANGE_PARAMS ...@@ -38,8 +39,10 @@ __kernel void resize_bicubic_nocache(OUT_OF_RANGE_PARAMS
const int b = hb / out_height; const int b = hb / out_height;
const int h = hb - mul24(b, out_height); const int h = hb - mul24(b, out_height);
const float h_in = h * height_scale; const float h_in = half_pixel_centers ?
const float w_in = w * width_scale; ((float)h + 0.5f) * height_scale - 0.5f : h * height_scale;
const float w_in = half_pixel_centers ?
((float)w + 0.5f) * width_scale - 0.5f : w * width_scale;
const int in_w_offset = mul24(ch_blk, in_width); const int in_w_offset = mul24(ch_blk, in_width);
const int in_h_offset = mul24(b, in_height); const int in_h_offset = mul24(b, in_height);
......
...@@ -6,6 +6,7 @@ __kernel void resize_bilinear_nocache(OUT_OF_RANGE_PARAMS ...@@ -6,6 +6,7 @@ __kernel void resize_bilinear_nocache(OUT_OF_RANGE_PARAMS
__write_only image2d_t output, __write_only image2d_t output,
__private const float height_scale, __private const float height_scale,
__private const float width_scale, __private const float width_scale,
__private const int half_pixel_centers,
__private const int in_height, __private const int in_height,
__private const int in_width, __private const int in_width,
__private const int out_height) { __private const int out_height) {
...@@ -26,8 +27,10 @@ __kernel void resize_bilinear_nocache(OUT_OF_RANGE_PARAMS ...@@ -26,8 +27,10 @@ __kernel void resize_bilinear_nocache(OUT_OF_RANGE_PARAMS
const int b = hb / out_height; const int b = hb / out_height;
const int h = hb - mul24(b, out_height); const int h = hb - mul24(b, out_height);
const float h_in = h * height_scale; const float h_in = half_pixel_centers ?
const float w_in = w * width_scale; ((float)h + 0.5f) * height_scale - 0.5f : h * height_scale;
const float w_in = half_pixel_centers ?
((float)w + 0.5f) * width_scale - 0.5f : w * width_scale;
const int h_lower = max(0, (int) floor(h_in)); const int h_lower = max(0, (int) floor(h_in));
const int h_upper = min(in_height - 1, h_lower + 1); const int h_upper = min(in_height - 1, h_lower + 1);
const int w_lower = max(0, (int) floor(w_in)); const int w_lower = max(0, (int) floor(w_in));
......
...@@ -7,6 +7,7 @@ __kernel void resize_nearest_neighbor_nocache( ...@@ -7,6 +7,7 @@ __kernel void resize_nearest_neighbor_nocache(
__write_only image2d_t output, __write_only image2d_t output,
__private const float height_scale, __private const float height_scale,
__private const float width_scale, __private const float width_scale,
__private const int half_pixel_centers,
__private const int in_height, __private const int in_height,
__private const int in_width, __private const int in_width,
__private const int out_height, __private const int out_height,
...@@ -27,10 +28,15 @@ __kernel void resize_nearest_neighbor_nocache( ...@@ -27,10 +28,15 @@ __kernel void resize_nearest_neighbor_nocache(
const int b = hb / out_height; const int b = hb / out_height;
const int h = hb - mul24(b, out_height); const int h = hb - mul24(b, out_height);
const int h_in = min((align_corner) ? (int) round(h * height_scale) : const float h_in_f = half_pixel_centers ?
(int) floor(h * height_scale), in_height - 1); ((float)h + 0.5f) * height_scale : h * height_scale;
const int w_in = min((align_corner) ? (int) round(w * width_scale) : const float w_in_f = half_pixel_centers ?
(int) floor(w * width_scale), in_width - 1); ((float)w + 0.5f) * width_scale : w * width_scale;
const int h_in = min((align_corner) ? (int) round(h_in_f) :
(int) floor(h_in_f), in_height - 1);
const int w_in = min((align_corner) ? (int) round(w_in_f) :
(int) floor(w_in_f), in_width - 1);
const int in_w_offset = mul24(ch_blk, in_width); const int in_w_offset = mul24(ch_blk, in_width);
const int in_h_offset = mul24(b, in_height); const int in_h_offset = mul24(b, in_height);
......
...@@ -85,6 +85,7 @@ MaceStatus ResizeBicubicKernel::Compute( ...@@ -85,6 +85,7 @@ MaceStatus ResizeBicubicKernel::Compute(
kernel_.setArg(idx++, *(output->opencl_image())); kernel_.setArg(idx++, *(output->opencl_image()));
kernel_.setArg(idx++, height_scale); kernel_.setArg(idx++, height_scale);
kernel_.setArg(idx++, width_scale); kernel_.setArg(idx++, width_scale);
kernel_.setArg(idx++, static_cast<int32_t>(half_pixel_centers_));
kernel_.setArg(idx++, static_cast<int32_t>(in_height)); kernel_.setArg(idx++, static_cast<int32_t>(in_height));
kernel_.setArg(idx++, static_cast<int32_t>(in_width)); kernel_.setArg(idx++, static_cast<int32_t>(in_width));
kernel_.setArg(idx++, static_cast<int32_t>(out_height)); kernel_.setArg(idx++, static_cast<int32_t>(out_height));
......
...@@ -64,9 +64,11 @@ inline std::vector<uint32_t> LocalWS(OpenCLRuntime *runtime, ...@@ -64,9 +64,11 @@ inline std::vector<uint32_t> LocalWS(OpenCLRuntime *runtime,
class ResizeBicubicKernel : public OpenCLResizeBicubicKernel { class ResizeBicubicKernel : public OpenCLResizeBicubicKernel {
public: public:
ResizeBicubicKernel(bool align_corners, ResizeBicubicKernel(bool align_corners,
bool half_pixel_centers,
const index_t out_height, const index_t out_height,
const index_t out_width) const index_t out_width)
: align_corners_(align_corners), : align_corners_(align_corners),
half_pixel_centers_(half_pixel_centers),
out_height_(out_height), out_height_(out_height),
out_width_(out_width) {} out_width_(out_width) {}
...@@ -77,6 +79,7 @@ class ResizeBicubicKernel : public OpenCLResizeBicubicKernel { ...@@ -77,6 +79,7 @@ class ResizeBicubicKernel : public OpenCLResizeBicubicKernel {
private: private:
bool align_corners_; bool align_corners_;
bool half_pixel_centers_;
index_t out_height_; index_t out_height_;
index_t out_width_; index_t out_width_;
cl::Kernel kernel_; cl::Kernel kernel_;
......
...@@ -85,6 +85,7 @@ MaceStatus ResizeBilinearKernel::Compute( ...@@ -85,6 +85,7 @@ MaceStatus ResizeBilinearKernel::Compute(
kernel_.setArg(idx++, *(output->opencl_image())); kernel_.setArg(idx++, *(output->opencl_image()));
kernel_.setArg(idx++, height_scale); kernel_.setArg(idx++, height_scale);
kernel_.setArg(idx++, width_scale); kernel_.setArg(idx++, width_scale);
kernel_.setArg(idx++, static_cast<int32_t>(half_pixel_centers_));
kernel_.setArg(idx++, static_cast<int32_t>(in_height)); kernel_.setArg(idx++, static_cast<int32_t>(in_height));
kernel_.setArg(idx++, static_cast<int32_t>(in_width)); kernel_.setArg(idx++, static_cast<int32_t>(in_width));
kernel_.setArg(idx++, static_cast<int32_t>(out_height)); kernel_.setArg(idx++, static_cast<int32_t>(out_height));
......
...@@ -66,8 +66,9 @@ inline std::vector<uint32_t> LocalWS(OpenCLRuntime *runtime, ...@@ -66,8 +66,9 @@ inline std::vector<uint32_t> LocalWS(OpenCLRuntime *runtime,
class ResizeBilinearKernel : public OpenCLResizeBilinearKernel { class ResizeBilinearKernel : public OpenCLResizeBilinearKernel {
public: public:
explicit ResizeBilinearKernel(bool align_corners) explicit ResizeBilinearKernel(bool align_corners, bool half_pixel_centers)
: align_corners_(align_corners) {} : align_corners_(align_corners),
half_pixel_centers_(half_pixel_centers) {}
MaceStatus Compute( MaceStatus Compute(
OpContext *context, OpContext *context,
...@@ -78,6 +79,7 @@ class ResizeBilinearKernel : public OpenCLResizeBilinearKernel { ...@@ -78,6 +79,7 @@ class ResizeBilinearKernel : public OpenCLResizeBilinearKernel {
private: private:
bool align_corners_; bool align_corners_;
bool half_pixel_centers_;
cl::Kernel kernel_; cl::Kernel kernel_;
uint32_t kwg_size_; uint32_t kwg_size_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
......
...@@ -82,6 +82,7 @@ MaceStatus ResizeNearestNeighborKernel::Compute( ...@@ -82,6 +82,7 @@ MaceStatus ResizeNearestNeighborKernel::Compute(
kernel_.setArg(idx++, *(output->opencl_image())); kernel_.setArg(idx++, *(output->opencl_image()));
kernel_.setArg(idx++, height_scale); kernel_.setArg(idx++, height_scale);
kernel_.setArg(idx++, width_scale); kernel_.setArg(idx++, width_scale);
kernel_.setArg(idx++, static_cast<int32_t>(half_pixel_centers_));
kernel_.setArg(idx++, static_cast<int32_t>(in_height)); kernel_.setArg(idx++, static_cast<int32_t>(in_height));
kernel_.setArg(idx++, static_cast<int32_t>(in_width)); kernel_.setArg(idx++, static_cast<int32_t>(in_width));
kernel_.setArg(idx++, static_cast<int32_t>(out_height)); kernel_.setArg(idx++, static_cast<int32_t>(out_height));
......
...@@ -66,8 +66,10 @@ inline std::vector<uint32_t> LocalWS(OpenCLRuntime *runtime, ...@@ -66,8 +66,10 @@ inline std::vector<uint32_t> LocalWS(OpenCLRuntime *runtime,
class ResizeNearestNeighborKernel : public OpenCLResizeNearestNeighborKernel { class ResizeNearestNeighborKernel : public OpenCLResizeNearestNeighborKernel {
public: public:
explicit ResizeNearestNeighborKernel(bool align_corners) explicit ResizeNearestNeighborKernel(bool align_corners,
: align_corners_(align_corners) {} bool half_pixel_centers)
: align_corners_(align_corners),
half_pixel_centers_(half_pixel_centers) {}
MaceStatus Compute( MaceStatus Compute(
OpContext *context, OpContext *context,
...@@ -78,6 +80,7 @@ class ResizeNearestNeighborKernel : public OpenCLResizeNearestNeighborKernel { ...@@ -78,6 +80,7 @@ class ResizeNearestNeighborKernel : public OpenCLResizeNearestNeighborKernel {
private: private:
bool align_corners_; bool align_corners_;
bool half_pixel_centers_;
cl::Kernel kernel_; cl::Kernel kernel_;
uint32_t kwg_size_; uint32_t kwg_size_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
......
...@@ -56,11 +56,15 @@ inline int64_t Bound(int64_t val, int64_t limit) { ...@@ -56,11 +56,15 @@ inline int64_t Bound(int64_t val, int64_t limit) {
return std::min<int64_t>(limit - 1ll, std::max<int64_t>(0ll, val)); return std::min<int64_t>(limit - 1ll, std::max<int64_t>(0ll, val));
} }
inline void GetWeightsAndIndices(float scale, int64_t out_loc, int64_t limit, inline void GetWeightsAndIndices(float scale, bool half_pixel_centers,
int64_t out_loc, int64_t limit,
std::vector<float> *weights, std::vector<float> *weights,
std::vector<int64_t> *indices) { std::vector<int64_t> *indices) {
auto in_loc = static_cast<int64_t>(scale * out_loc); const float in = half_pixel_centers ?
const float delta = scale * out_loc - in_loc; (static_cast<float>(out_loc) + 0.5f) * scale - 0.5f :
out_loc * scale;
auto in_loc = static_cast<int64_t>(in);
const float delta = in - in_loc;
const int64_t offset = lrintf(delta * common::utils::kTableSize); const int64_t offset = lrintf(delta * common::utils::kTableSize);
const float *coeffs_tab = GetCoeffsTable(); const float *coeffs_tab = GetCoeffsTable();
*weights = {coeffs_tab[offset * 2 + 1], *weights = {coeffs_tab[offset * 2 + 1],
...@@ -87,6 +91,7 @@ inline void ResizeImage(const OpContext *context, ...@@ -87,6 +91,7 @@ inline void ResizeImage(const OpContext *context,
const index_t channels, const index_t channels,
const float height_scale, const float height_scale,
const float width_scale, const float width_scale,
const bool half_pixel_centers,
float *output) { float *output) {
utils::ThreadPool utils::ThreadPool
&thread_pool = context->device()->cpu_runtime()->thread_pool(); &thread_pool = context->device()->cpu_runtime()->thread_pool();
...@@ -97,13 +102,13 @@ inline void ResizeImage(const OpContext *context, ...@@ -97,13 +102,13 @@ inline void ResizeImage(const OpContext *context,
for (index_t y = start1; y < end1; y += step1) { for (index_t y = start1; y < end1; y += step1) {
std::vector<float> y_weights; std::vector<float> y_weights;
std::vector<index_t> y_indices; std::vector<index_t> y_indices;
GetWeightsAndIndices(height_scale, y, in_height, &y_weights, GetWeightsAndIndices(height_scale, half_pixel_centers, y, in_height,
&y_indices); &y_weights, &y_indices);
for (index_t x = 0; x < out_width; ++x) { for (index_t x = 0; x < out_width; ++x) {
std::vector<float> x_weights; std::vector<float> x_weights;
std::vector<index_t> x_indices; std::vector<index_t> x_indices;
GetWeightsAndIndices(width_scale, x, in_width, &x_weights, GetWeightsAndIndices(width_scale, half_pixel_centers, x, in_width,
&x_indices); &x_weights, &x_indices);
for (index_t c = 0; c < channels; ++c) { for (index_t c = 0; c < channels; ++c) {
// Use a 4x4 patch to compute the interpolated output value at // Use a 4x4 patch to compute the interpolated output value at
...@@ -139,6 +144,8 @@ class ResizeBicubicOp<DeviceType::CPU, float> : public Operation { ...@@ -139,6 +144,8 @@ class ResizeBicubicOp<DeviceType::CPU, float> : public Operation {
explicit ResizeBicubicOp(OpConstructContext *context) explicit ResizeBicubicOp(OpConstructContext *context)
: Operation(context), : Operation(context),
align_corners_(Operation::GetOptionalArg<bool>("align_corners", false)), align_corners_(Operation::GetOptionalArg<bool>("align_corners", false)),
half_pixel_centers_(
Operation::GetOptionalArg<bool>("half_pixel_centers", false)),
size_(Operation::GetRepeatedArgs<index_t>("size", {-1, -1})) {} size_(Operation::GetRepeatedArgs<index_t>("size", {-1, -1})) {}
MaceStatus Run(OpContext *context) override { MaceStatus Run(OpContext *context) override {
...@@ -191,6 +198,7 @@ class ResizeBicubicOp<DeviceType::CPU, float> : public Operation { ...@@ -191,6 +198,7 @@ class ResizeBicubicOp<DeviceType::CPU, float> : public Operation {
channels, channels,
height_scale, height_scale,
width_scale, width_scale,
half_pixel_centers_,
output_data); output_data);
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
...@@ -198,6 +206,7 @@ class ResizeBicubicOp<DeviceType::CPU, float> : public Operation { ...@@ -198,6 +206,7 @@ class ResizeBicubicOp<DeviceType::CPU, float> : public Operation {
private: private:
bool align_corners_; bool align_corners_;
bool half_pixel_centers_;
std::vector<index_t> size_; std::vector<index_t> size_;
}; };
...@@ -209,12 +218,14 @@ class ResizeBicubicOp<DeviceType::GPU, float> : public Operation { ...@@ -209,12 +218,14 @@ class ResizeBicubicOp<DeviceType::GPU, float> : public Operation {
: Operation(context) { : Operation(context) {
bool align_corners = Operation::GetOptionalArg<bool>( bool align_corners = Operation::GetOptionalArg<bool>(
"align_corners", false); "align_corners", false);
bool half_pixel_centers = Operation::GetOptionalArg<bool>(
"half_pixel_centers", false);
std::vector<index_t> size = Operation::GetRepeatedArgs<index_t>( std::vector<index_t> size = Operation::GetRepeatedArgs<index_t>(
"size", {-1, -1}); "size", {-1, -1});
MACE_CHECK(size.size() == 2); MACE_CHECK(size.size() == 2);
if (context->GetOpMemoryType() == MemoryType::GPU_IMAGE) { if (context->GetOpMemoryType() == MemoryType::GPU_IMAGE) {
kernel_ = make_unique<opencl::image::ResizeBicubicKernel>( kernel_ = make_unique<opencl::image::ResizeBicubicKernel>(
align_corners, size[0], size[1]); align_corners, half_pixel_centers, size[0], size[1]);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -40,11 +40,13 @@ inline void ComputeInterpolationWeights( ...@@ -40,11 +40,13 @@ inline void ComputeInterpolationWeights(
const index_t out_size, const index_t out_size,
const index_t in_size, const index_t in_size,
const float scale, const float scale,
bool half_pixel_centers,
CachedInterpolation *interpolation) { CachedInterpolation *interpolation) {
interpolation[out_size].lower = 0; interpolation[out_size].lower = 0;
interpolation[out_size].upper = 0; interpolation[out_size].upper = 0;
for (index_t i = out_size - 1; i >= 0; --i) { for (index_t i = out_size - 1; i >= 0; --i) {
const float in = i * scale; const float in = half_pixel_centers ?
(static_cast<float>(i) + 0.5f) * scale - 0.5f : i * scale;
interpolation[i].lower = static_cast<index_t>(in); interpolation[i].lower = static_cast<index_t>(in);
interpolation[i].upper = std::min(interpolation[i].lower + 1, in_size - 1); interpolation[i].upper = std::min(interpolation[i].lower + 1, in_size - 1);
interpolation[i].lerp = in - interpolation[i].lower; interpolation[i].lerp = in - interpolation[i].lower;
...@@ -183,7 +185,9 @@ class ResizeBilinearOp<DeviceType::CPU, T> : public Operation { ...@@ -183,7 +185,9 @@ class ResizeBilinearOp<DeviceType::CPU, T> : public Operation {
align_corners_(Operation::GetOptionalArg<bool>("align_corners", false)), align_corners_(Operation::GetOptionalArg<bool>("align_corners", false)),
size_(Operation::GetRepeatedArgs<index_t>("size", {-1, -1})), size_(Operation::GetRepeatedArgs<index_t>("size", {-1, -1})),
height_scale_(Operation::GetOptionalArg<float>("height_scale", 0)), height_scale_(Operation::GetOptionalArg<float>("height_scale", 0)),
width_scale_(Operation::GetOptionalArg<float>("width_scale", 0)) {} width_scale_(Operation::GetOptionalArg<float>("width_scale", 0)),
half_pixel_centers_(
Operation::GetOptionalArg<bool>("half_pixel_centers", false)) {}
MaceStatus Run(OpContext *context) override { MaceStatus Run(OpContext *context) override {
MACE_UNUSED(context); MACE_UNUSED(context);
...@@ -237,8 +241,10 @@ class ResizeBilinearOp<DeviceType::CPU, T> : public Operation { ...@@ -237,8 +241,10 @@ class ResizeBilinearOp<DeviceType::CPU, T> : public Operation {
std::vector<CachedInterpolation> xs(out_width + 1); std::vector<CachedInterpolation> xs(out_width + 1);
// Compute the cached interpolation weights on the x and y dimensions. // Compute the cached interpolation weights on the x and y dimensions.
ComputeInterpolationWeights(out_height, in_height, height_scale, ys.data()); ComputeInterpolationWeights(out_height, in_height, height_scale,
ComputeInterpolationWeights(out_width, in_width, width_scale, xs.data()); half_pixel_centers_, ys.data());
ComputeInterpolationWeights(out_width, in_width, width_scale,
half_pixel_centers_, xs.data());
ResizeImageNCHW(context, ResizeImageNCHW(context,
input_data, input_data,
...@@ -260,6 +266,7 @@ class ResizeBilinearOp<DeviceType::CPU, T> : public Operation { ...@@ -260,6 +266,7 @@ class ResizeBilinearOp<DeviceType::CPU, T> : public Operation {
std::vector<index_t> size_; std::vector<index_t> size_;
float height_scale_; float height_scale_;
float width_scale_; float width_scale_;
bool half_pixel_centers_;
}; };
#ifdef MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_QUANTIZE
...@@ -271,7 +278,9 @@ class ResizeBilinearOp<DeviceType::CPU, uint8_t> : public Operation { ...@@ -271,7 +278,9 @@ class ResizeBilinearOp<DeviceType::CPU, uint8_t> : public Operation {
align_corners_(Operation::GetOptionalArg<bool>("align_corners", false)), align_corners_(Operation::GetOptionalArg<bool>("align_corners", false)),
size_(Operation::GetRepeatedArgs<index_t>("size", {-1, -1})), size_(Operation::GetRepeatedArgs<index_t>("size", {-1, -1})),
height_scale_(Operation::GetOptionalArg<float>("height_scale", 0)), height_scale_(Operation::GetOptionalArg<float>("height_scale", 0)),
width_scale_(Operation::GetOptionalArg<float>("width_scale", 0)) {} width_scale_(Operation::GetOptionalArg<float>("width_scale", 0)),
half_pixel_centers_(
Operation::GetOptionalArg<bool>("half_pixel_centers", false)) {}
MaceStatus Run(OpContext *context) override { MaceStatus Run(OpContext *context) override {
MACE_UNUSED(context); MACE_UNUSED(context);
...@@ -325,8 +334,10 @@ class ResizeBilinearOp<DeviceType::CPU, uint8_t> : public Operation { ...@@ -325,8 +334,10 @@ class ResizeBilinearOp<DeviceType::CPU, uint8_t> : public Operation {
std::vector<CachedInterpolation> xs(out_width + 1); std::vector<CachedInterpolation> xs(out_width + 1);
// Compute the cached interpolation weights on the x and y dimensions. // Compute the cached interpolation weights on the x and y dimensions.
ComputeInterpolationWeights(out_height, in_height, height_scale, ys.data()); ComputeInterpolationWeights(out_height, in_height, height_scale,
ComputeInterpolationWeights(out_width, in_width, width_scale, xs.data()); half_pixel_centers_, ys.data());
ComputeInterpolationWeights(out_width, in_width, width_scale,
half_pixel_centers_, xs.data());
ResizeImageNHWC(context, ResizeImageNHWC(context,
input_data, input_data,
...@@ -348,6 +359,7 @@ class ResizeBilinearOp<DeviceType::CPU, uint8_t> : public Operation { ...@@ -348,6 +359,7 @@ class ResizeBilinearOp<DeviceType::CPU, uint8_t> : public Operation {
std::vector<index_t> size_; std::vector<index_t> size_;
float height_scale_; float height_scale_;
float width_scale_; float width_scale_;
bool half_pixel_centers_;
}; };
#endif // MACE_ENABLE_QUANTIZE #endif // MACE_ENABLE_QUANTIZE
...@@ -362,8 +374,11 @@ class ResizeBilinearOp<DeviceType::GPU, float> : public Operation { ...@@ -362,8 +374,11 @@ class ResizeBilinearOp<DeviceType::GPU, float> : public Operation {
width_scale_(Operation::GetOptionalArg<float>("width_scale", 0)) { width_scale_(Operation::GetOptionalArg<float>("width_scale", 0)) {
bool align_corners = Operation::GetOptionalArg<bool>( bool align_corners = Operation::GetOptionalArg<bool>(
"align_corners", false); "align_corners", false);
bool half_pixel_centers = Operation::GetOptionalArg<bool>(
"half_pixel_centers", false);
if (context->GetOpMemoryType() == MemoryType::GPU_IMAGE) { if (context->GetOpMemoryType() == MemoryType::GPU_IMAGE) {
kernel_ = make_unique<opencl::image::ResizeBilinearKernel>(align_corners); kernel_ = make_unique<opencl::image::ResizeBilinearKernel>(
align_corners, half_pixel_centers);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -37,7 +37,8 @@ inline void ResizeImageNCHW(const OpContext *context, ...@@ -37,7 +37,8 @@ inline void ResizeImageNCHW(const OpContext *context,
const index_t channels, const index_t channels,
const float height_scale, const float height_scale,
const float width_scale, const float width_scale,
bool align_corners, const bool align_corners,
const bool half_pixel_centers,
T *output) { T *output) {
utils::ThreadPool utils::ThreadPool
&thread_pool = context->device()->cpu_runtime()->thread_pool(); &thread_pool = context->device()->cpu_runtime()->thread_pool();
...@@ -52,14 +53,20 @@ inline void ResizeImageNCHW(const OpContext *context, ...@@ -52,14 +53,20 @@ inline void ResizeImageNCHW(const OpContext *context,
T *channel_output_ptr = T *channel_output_ptr =
output + (b * channels + c) * out_height * out_width; output + (b * channels + c) * out_height * out_width;
for (index_t y = 0; y < out_height; ++y) { for (index_t y = 0; y < out_height; ++y) {
const float in_f_y = half_pixel_centers ?
(static_cast<float>(y) + 0.5f) * height_scale :
y * height_scale;
const index_t in_y = std::min( const index_t in_y = std::min(
(align_corners) ? static_cast<index_t>(roundf(y * height_scale)) (align_corners) ? static_cast<index_t>(roundf(in_f_y))
: static_cast<index_t>(floorf(y * height_scale)), : static_cast<index_t>(floorf(in_f_y)),
in_height - 1); in_height - 1);
for (int x = 0; x < out_width; ++x) { for (int x = 0; x < out_width; ++x) {
const float in_f_x = half_pixel_centers ?
(static_cast<float>(x) + 0.5f) * width_scale :
x * width_scale;
const index_t in_x = std::min( const index_t in_x = std::min(
(align_corners) ? static_cast<index_t>(roundf(x * width_scale)) (align_corners) ? static_cast<index_t>(roundf(in_f_x))
: static_cast<index_t>(floorf(x * width_scale)), : static_cast<index_t>(floorf(in_f_x)),
in_width - 1); in_width - 1);
channel_output_ptr[y * out_width + x] = channel_output_ptr[y * out_width + x] =
channel_input_ptr[in_y * in_width + in_x]; channel_input_ptr[in_y * in_width + in_x];
...@@ -79,6 +86,8 @@ class ResizeNearestNeighborOp<DeviceType::CPU, T> : public Operation { ...@@ -79,6 +86,8 @@ class ResizeNearestNeighborOp<DeviceType::CPU, T> : public Operation {
explicit ResizeNearestNeighborOp(OpConstructContext *context) explicit ResizeNearestNeighborOp(OpConstructContext *context)
: Operation(context), : Operation(context),
align_corners_(Operation::GetOptionalArg<bool>("align_corners", false)), align_corners_(Operation::GetOptionalArg<bool>("align_corners", false)),
half_pixel_centers_(
Operation::GetOptionalArg<bool>("half_pixel_centers", false)),
height_scale_(Operation::GetOptionalArg<float>("height_scale", 0)), height_scale_(Operation::GetOptionalArg<float>("height_scale", 0)),
width_scale_(Operation::GetOptionalArg<float>("width_scale", 0)) {} width_scale_(Operation::GetOptionalArg<float>("width_scale", 0)) {}
...@@ -144,12 +153,14 @@ class ResizeNearestNeighborOp<DeviceType::CPU, T> : public Operation { ...@@ -144,12 +153,14 @@ class ResizeNearestNeighborOp<DeviceType::CPU, T> : public Operation {
height_scale, height_scale,
width_scale, width_scale,
align_corners_, align_corners_,
half_pixel_centers_,
output_data); output_data);
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
private: private:
bool align_corners_; const bool align_corners_;
const bool half_pixel_centers_;
float height_scale_; float height_scale_;
float width_scale_; float width_scale_;
}; };
...@@ -164,9 +175,11 @@ class ResizeNearestNeighborOp<DeviceType::GPU, float> : public Operation { ...@@ -164,9 +175,11 @@ class ResizeNearestNeighborOp<DeviceType::GPU, float> : public Operation {
width_scale_(Operation::GetOptionalArg<float>("width_scale", 0)) { width_scale_(Operation::GetOptionalArg<float>("width_scale", 0)) {
bool align_corners = Operation::GetOptionalArg<bool>( bool align_corners = Operation::GetOptionalArg<bool>(
"align_corners", false); "align_corners", false);
bool half_pixel_centers = Operation::GetOptionalArg<bool>(
"half_pixel_centers", false);
if (context->GetOpMemoryType() == MemoryType::GPU_IMAGE) { if (context->GetOpMemoryType() == MemoryType::GPU_IMAGE) {
kernel_ = make_unique<opencl::image::ResizeNearestNeighborKernel>( kernel_ = make_unique<opencl::image::ResizeNearestNeighborKernel>(
align_corners); align_corners, half_pixel_centers);
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -29,6 +29,7 @@ namespace mace { ...@@ -29,6 +29,7 @@ namespace mace {
namespace {{tag}} { namespace {{tag}} {
extern const unsigned char *LoadModelData(); extern const unsigned char *LoadModelData();
extern int64_t GetModelSize();
extern const std::shared_ptr<NetDef> CreateNet(); extern const std::shared_ptr<NetDef> CreateNet();
...@@ -77,17 +78,16 @@ __attribute__((deprecated)) MaceStatus CreateMaceEngineFromCode( ...@@ -77,17 +78,16 @@ __attribute__((deprecated)) MaceStatus CreateMaceEngineFromCode(
{% if embed_model_data %} {% if embed_model_data %}
(void)model_data_file; (void)model_data_file;
const unsigned char * model_data; const unsigned char * model_data;
const int64_t model_size;
{% endif %} {% endif %}
MaceStatus status = MaceStatus::MACE_SUCCESS; MaceStatus status = MaceStatus::MACE_SUCCESS;
switch (model_name_map[model_name]) { switch (model_name_map[model_name]) {
{% for i in range(model_tags |length) %} {% for i in range(model_tags |length) %}
case {{ i }}: case {{ i }}: {
net_def = mace::{{model_tags[i]}}::CreateNet(); net_def = mace::{{model_tags[i]}}::CreateNet();
engine->reset(new mace::MaceEngine(config)); engine->reset(new mace::MaceEngine(config));
{% if embed_model_data %} {% if embed_model_data %}
model_data = mace::{{model_tags[i]}}::LoadModelData(); model_data = mace::{{model_tags[i]}}::LoadModelData();
model_size = mace::{{model_tags[i]}}::GetModelSize(); const int64_t model_size = mace::{{model_tags[i]}}::GetModelSize();
status = (*engine)->Init(net_def.get(), input_nodes, output_nodes, status = (*engine)->Init(net_def.get(), input_nodes, output_nodes,
model_data, model_size); model_data, model_size);
{% else %} {% else %}
...@@ -95,6 +95,7 @@ __attribute__((deprecated)) MaceStatus CreateMaceEngineFromCode( ...@@ -95,6 +95,7 @@ __attribute__((deprecated)) MaceStatus CreateMaceEngineFromCode(
model_data_file); model_data_file);
{% endif %} {% endif %}
break; break;
}
{% endfor %} {% endfor %}
default: default:
status = MaceStatus::MACE_INVALID_ARGS; status = MaceStatus::MACE_INVALID_ARGS;
...@@ -118,19 +119,19 @@ MaceStatus CreateMaceEngineFromCode( ...@@ -118,19 +119,19 @@ MaceStatus CreateMaceEngineFromCode(
std::shared_ptr<NetDef> net_def; std::shared_ptr<NetDef> net_def;
{% if embed_model_data %} {% if embed_model_data %}
const unsigned char * model_data; const unsigned char * model_data;
const int64_t model_size;
(void)model_weights_data; (void)model_weights_data;
(void)model_weights_data_size;
{% endif %} {% endif %}
MaceStatus status = MaceStatus::MACE_SUCCESS; MaceStatus status = MaceStatus::MACE_SUCCESS;
switch (model_name_map[model_name]) { switch (model_name_map[model_name]) {
{% for i in range(model_tags |length) %} {% for i in range(model_tags |length) %}
case {{ i }}: case {{ i }}: {
net_def = mace::{{model_tags[i]}}::CreateNet(); net_def = mace::{{model_tags[i]}}::CreateNet();
engine->reset(new mace::MaceEngine(config)); engine->reset(new mace::MaceEngine(config));
{% if embed_model_data %} {% if embed_model_data %}
model_data = mace::{{model_tags[i]}}::LoadModelData(); model_data = mace::{{model_tags[i]}}::LoadModelData();
model_size = mace::{{model_tags[i]}}::GetModelSize(); const int64_t model_size = mace::{{model_tags[i]}}::GetModelSize();
status = (*engine)->Init(net_def.get(), input_nodes, output_nodes, status = (*engine)->Init(net_def.get(), input_nodes, output_nodes,
model_data, model_size); model_data, model_size);
{% else %} {% else %}
...@@ -138,6 +139,7 @@ MaceStatus CreateMaceEngineFromCode( ...@@ -138,6 +139,7 @@ MaceStatus CreateMaceEngineFromCode(
model_weights_data, model_weights_data_size); model_weights_data, model_weights_data_size);
{% endif %} {% endif %}
break; break;
}
{% endfor %} {% endfor %}
default: default:
status = MaceStatus::MACE_INVALID_ARGS; status = MaceStatus::MACE_INVALID_ARGS;
......
...@@ -29,6 +29,7 @@ namespace mace { ...@@ -29,6 +29,7 @@ namespace mace {
namespace {{tag}} { namespace {{tag}} {
extern const unsigned char *LoadModelData(); extern const unsigned char *LoadModelData();
extern int64_t GetModelSize();
extern const std::shared_ptr<NetDef> CreateNet(); extern const std::shared_ptr<NetDef> CreateNet();
...@@ -77,17 +78,16 @@ __attribute__((deprecated)) MaceStatus CreateMaceEngineFromCode( ...@@ -77,17 +78,16 @@ __attribute__((deprecated)) MaceStatus CreateMaceEngineFromCode(
{% if embed_model_data %} {% if embed_model_data %}
(void)model_data_file; (void)model_data_file;
const unsigned char * model_data; const unsigned char * model_data;
const int64_t model_size;
{% endif %} {% endif %}
MaceStatus status = MaceStatus::MACE_SUCCESS; MaceStatus status = MaceStatus::MACE_SUCCESS;
switch (model_name_map[model_name]) { switch (model_name_map[model_name]) {
{% for i in range(model_tags |length) %} {% for i in range(model_tags |length) %}
case {{ i }}: case {{ i }}: {
net_def = mace::{{model_tags[i]}}::CreateNet(); net_def = mace::{{model_tags[i]}}::CreateNet();
engine->reset(new mace::MaceEngine(config)); engine->reset(new mace::MaceEngine(config));
{% if embed_model_data %} {% if embed_model_data %}
model_data = mace::{{model_tags[i]}}::LoadModelData(); model_data = mace::{{model_tags[i]}}::LoadModelData();
model_size = mace::{{model_tags[i]}}::GetModelSize(); const int64_t model_size = mace::{{model_tags[i]}}::GetModelSize();
status = (*engine)->Init(net_def.get(), input_nodes, output_nodes, status = (*engine)->Init(net_def.get(), input_nodes, output_nodes,
model_data, model_size); model_data, model_size);
{% else %} {% else %}
...@@ -95,6 +95,7 @@ __attribute__((deprecated)) MaceStatus CreateMaceEngineFromCode( ...@@ -95,6 +95,7 @@ __attribute__((deprecated)) MaceStatus CreateMaceEngineFromCode(
model_data_file); model_data_file);
{% endif %} {% endif %}
break; break;
}
{% endfor %} {% endfor %}
default: default:
status = MaceStatus::MACE_INVALID_ARGS; status = MaceStatus::MACE_INVALID_ARGS;
...@@ -118,19 +119,19 @@ MACE_API MaceStatus CreateMaceEngineFromCode( ...@@ -118,19 +119,19 @@ MACE_API MaceStatus CreateMaceEngineFromCode(
std::shared_ptr<NetDef> net_def; std::shared_ptr<NetDef> net_def;
{% if embed_model_data %} {% if embed_model_data %}
const unsigned char * model_data; const unsigned char * model_data;
const int64_t model_size;
(void)model_weights_data; (void)model_weights_data;
(void)model_weights_data_size;
{% endif %} {% endif %}
MaceStatus status = MaceStatus::MACE_SUCCESS; MaceStatus status = MaceStatus::MACE_SUCCESS;
switch (model_name_map[model_name]) { switch (model_name_map[model_name]) {
{% for i in range(model_tags |length) %} {% for i in range(model_tags |length) %}
case {{ i }}: case {{ i }}: {
net_def = mace::{{model_tags[i]}}::CreateNet(); net_def = mace::{{model_tags[i]}}::CreateNet();
engine->reset(new mace::MaceEngine(config)); engine->reset(new mace::MaceEngine(config));
{% if embed_model_data %} {% if embed_model_data %}
model_data = mace::{{model_tags[i]}}::LoadModelData(); model_data = mace::{{model_tags[i]}}::LoadModelData();
model_size = mace::{{model_tags[i]}}::GetModelSize(); const int64_t model_size = mace::{{model_tags[i]}}::GetModelSize();
status = (*engine)->Init(net_def.get(), input_nodes, output_nodes, status = (*engine)->Init(net_def.get(), input_nodes, output_nodes,
model_data, model_size); model_data, model_size);
{% else %} {% else %}
...@@ -138,6 +139,7 @@ MACE_API MaceStatus CreateMaceEngineFromCode( ...@@ -138,6 +139,7 @@ MACE_API MaceStatus CreateMaceEngineFromCode(
model_weights_data, model_weights_data_size); model_weights_data, model_weights_data_size);
{% endif %} {% endif %}
break; break;
}
{% endfor %} {% endfor %}
default: default:
status = MaceStatus::MACE_INVALID_ARGS; status = MaceStatus::MACE_INVALID_ARGS;
......
...@@ -225,6 +225,7 @@ class MaceKeyword(object): ...@@ -225,6 +225,7 @@ class MaceKeyword(object):
mace_align_corners_str = 'align_corners' mace_align_corners_str = 'align_corners'
mace_height_scale_str = 'height_scale' mace_height_scale_str = 'height_scale'
mace_width_scale_str = 'width_scale' mace_width_scale_str = 'width_scale'
mace_half_pixel_centers_str = 'half_pixel_centers'
mace_space_batch_block_shape_str = 'block_shape' mace_space_batch_block_shape_str = 'block_shape'
mace_space_depth_block_size_str = 'block_size' mace_space_depth_block_size_str = 'block_size'
mace_constant_value_str = 'constant_value' mace_constant_value_str = 'constant_value'
......
...@@ -202,8 +202,8 @@ class TensorflowConverter(base_converter.ConverterInterface): ...@@ -202,8 +202,8 @@ class TensorflowConverter(base_converter.ConverterInterface):
} }
pad_type = { pad_type = {
'CONSTANT': PadType.CONSTANT, 'CONSTANT': PadType.CONSTANT,
'REFLECT': PadType.REFLECT, 'REFLECT': PadType.REFLECT,
'SYMMETRIC': PadType.SYMMETRIC 'SYMMETRIC': PadType.SYMMETRIC
} }
...@@ -267,7 +267,8 @@ class TensorflowConverter(base_converter.ConverterInterface): ...@@ -267,7 +267,8 @@ class TensorflowConverter(base_converter.ConverterInterface):
TFOpType.Reshape.name: self.convert_reshape, TFOpType.Reshape.name: self.convert_reshape,
TFOpType.ResizeBicubic.name: self.convert_resize_bicubic, TFOpType.ResizeBicubic.name: self.convert_resize_bicubic,
TFOpType.ResizeBilinear.name: self.convert_resize_bilinear, TFOpType.ResizeBilinear.name: self.convert_resize_bilinear,
TFOpType.ResizeNearestNeighbor.name: self.convert_resize_nearest_neighbor, # noqa TFOpType.ResizeNearestNeighbor.name:
self.convert_resize_nearest_neighbor,
TFOpType.ReverseV2.name: self.convert_reverse, TFOpType.ReverseV2.name: self.convert_reverse,
TFOpType.Select.name: self.convert_select, TFOpType.Select.name: self.convert_select,
TFOpType.Shape.name: self.convert_shape, TFOpType.Shape.name: self.convert_shape,
...@@ -715,6 +716,19 @@ class TensorflowConverter(base_converter.ConverterInterface): ...@@ -715,6 +716,19 @@ class TensorflowConverter(base_converter.ConverterInterface):
op = self.convert_general_op(tf_op) op = self.convert_general_op(tf_op)
op.type = MaceOp.Softmax.name op.type = MaceOp.Softmax.name
def add_resize_args(self, op, tf_op):
align_corners_arg = op.arg.add()
align_corners_arg.name = MaceKeyword.mace_align_corners_str
align_corners_arg.i = tf_op.get_attr(tf_align_corners)
try:
half_pixel_centers = tf_op.get_attr('half_pixel_centers')
half_pixel_centers_arg = op.arg.add()
half_pixel_centers_arg.name = \
MaceKeyword.mace_half_pixel_centers_str
half_pixel_centers_arg.i = half_pixel_centers
except ValueError:
pass
def convert_resize_bicubic(self, tf_op): def convert_resize_bicubic(self, tf_op):
op = self.convert_general_op(tf_op) op = self.convert_general_op(tf_op)
op.type = MaceOp.ResizeBicubic.name op.type = MaceOp.ResizeBicubic.name
...@@ -725,9 +739,7 @@ class TensorflowConverter(base_converter.ConverterInterface): ...@@ -725,9 +739,7 @@ class TensorflowConverter(base_converter.ConverterInterface):
size_value = tf_op.inputs[1].eval().astype(np.int32) size_value = tf_op.inputs[1].eval().astype(np.int32)
size_arg.ints.extend(size_value) size_arg.ints.extend(size_value)
self._skip_tensor.add(tf_op.inputs[1].name) self._skip_tensor.add(tf_op.inputs[1].name)
align_corners_arg = op.arg.add() self.add_resize_args(op, tf_op)
align_corners_arg.name = MaceKeyword.mace_align_corners_str
align_corners_arg.i = tf_op.get_attr(tf_align_corners)
def convert_resize_bilinear(self, tf_op): def convert_resize_bilinear(self, tf_op):
op = self.convert_general_op(tf_op) op = self.convert_general_op(tf_op)
...@@ -739,17 +751,12 @@ class TensorflowConverter(base_converter.ConverterInterface): ...@@ -739,17 +751,12 @@ class TensorflowConverter(base_converter.ConverterInterface):
size_value = tf_op.inputs[1].eval().astype(np.int32) size_value = tf_op.inputs[1].eval().astype(np.int32)
size_arg.ints.extend(size_value) size_arg.ints.extend(size_value)
self._skip_tensor.add(tf_op.inputs[1].name) self._skip_tensor.add(tf_op.inputs[1].name)
align_corners_arg = op.arg.add() self.add_resize_args(op, tf_op)
align_corners_arg.name = MaceKeyword.mace_align_corners_str
align_corners_arg.i = tf_op.get_attr(tf_align_corners)
def convert_resize_nearest_neighbor(self, tf_op): def convert_resize_nearest_neighbor(self, tf_op):
op = self.convert_general_op(tf_op) op = self.convert_general_op(tf_op)
op.type = MaceOp.ResizeNearestNeighbor.name op.type = MaceOp.ResizeNearestNeighbor.name
self.add_resize_args(op, tf_op)
align_corners_arg = op.arg.add()
align_corners_arg.name = MaceKeyword.mace_align_corners_str
align_corners_arg.i = tf_op.get_attr(tf_align_corners)
def convert_space_batch(self, tf_op): def convert_space_batch(self, tf_op):
op = self.convert_general_op(tf_op) op = self.convert_general_op(tf_op)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册