提交 934d3f26 编写于 作者: L lichao18

Add resize nearest neighbor

上级 54bbb62b
......@@ -24,6 +24,7 @@ Operator lists
"EMBEDDING_LOOKUP","Y",""
"EXPANDDIMS","Y","Only CPU and TensorFlow is supported."
"FILL","Y","Only CPU and TensorFlow is supported."
"FLATTEN","Y","Only Caffe is supported."
"FULLY_CONNECTED","Y",""
"GROUP_CONV_2D","","Caffe model with group count = channel count is supported."
"IDENTITY","Y","Only TensorFlow model is supported."
......@@ -35,13 +36,16 @@ Operator lists
"PAD","Y",""
"PSROI_ALIGN","Y",""
"PRELU","Y","Only Caffe model is supported"
"PRIOR_BOX","Y","Only Caffe model is supported"
"REDUCE_MEAN","Y","Only TensorFlow model is supported. For GPU only H + W axis reduce is supported."
"RELU","Y",""
"RELU1","Y",""
"RELU6","Y",""
"RELUX","Y",""
"RESHAPE","Y","Limited support: GPU only supports softmax-like usage, CPU only supports the usage which not change the storage format."
"RESIZE_BILINEAR","Y",""
"RESIZE_BICUBIC","Y","Only Tensorflow is supported"
"RESIZE_BILINEAR","Y","Only Tensorflow is supported"
"RESIZE_NEAREST_NEIGHBOR","Y","Only Tensorflow is supported"
"REVERSE","Y","Only CPU and Tensorflow is supported"
"RNN","",""
"RPN_PROPOSAL_LAYER","Y",""
......
#include <common.h>
__kernel void resize_nearest_neighbor_nocache(
OUT_OF_RANGE_PARAMS
GLOBAL_WORK_GROUP_SIZE_DIM3
__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */
__write_only image2d_t output,
__private const float height_scale,
__private const float width_scale,
__private const int in_height,
__private const int in_width,
__private const int out_height,
__private const int align_corner) {
const int ch_blk = get_global_id(0);
const int w = get_global_id(1);
const int hb = get_global_id(2);
#ifndef NON_UNIFORM_WORK_GROUP
if (ch_blk >= global_size_dim0 || w >= global_size_dim1
|| hb >= global_size_dim2) {
return;
}
#endif
const int ch_blks = global_size_dim0;
const int out_width = global_size_dim1;
const int b = hb / out_height;
const int h = hb - mul24(b, out_height);
const int h_in = min((align_corner) ? (int) round(h * height_scale) :
(int) floor(h * height_scale), in_height - 1);
const int w_in = min((align_corner) ? (int) round(w * width_scale) :
(int) floor(w * width_scale), in_width - 1);
const int in_w_offset = mul24(ch_blk, in_width);
const int in_h_offset = mul24(b, in_height);
const int out_w_offset = mul24(ch_blk, out_width);
const int out_h_offset = mul24(b, out_height);
DATA_TYPE4 out = READ_IMAGET(input, SAMPLER, (int2)(in_w_offset + w_in,
in_h_offset + h_in));
WRITE_IMAGET(output, (int2)(out_w_offset + w, out_h_offset + h), out);
}
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_OPENCL_IMAGE_RESIZE_NEAREST_NEIGHBOR_H_
#define MACE_OPS_OPENCL_IMAGE_RESIZE_NEAREST_NEIGHBOR_H_
#include "mace/ops/opencl/resize_nearest_neighbor.h"
#include <algorithm>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/opencl/helper.h"
#include "mace/ops/resize_nearest_neighbor.h"
namespace mace {
namespace ops {
namespace opencl {
namespace image {
namespace resize_nearest_neighbor {
inline std::vector<uint32_t> LocalWS(OpenCLRuntime *runtime,
const uint32_t *gws,
const uint32_t kwg_size) {
std::vector<uint32_t> lws(4, 0);
if (kwg_size == 0) {
lws[0] = lws[1] = lws[2] = 1;
} else {
uint64_t
cache_size = runtime->device_global_mem_cache_size();
uint32_t base = std::max<uint32_t>(cache_size / kBaseGPUMemCacheSize, 1);
lws[1] = std::min<uint32_t>(gws[1], kwg_size);
if (lws[1] >= base) {
lws[0] = std::min<uint32_t>(gws[0], base);
} else {
lws[0] = gws[0] / 8;
if (lws[0] == 0) {
lws[0] = gws[0];
}
}
lws[0] = std::min<uint32_t>(lws[0], kwg_size / lws[1]);
const uint32_t lws_size = lws[0] * lws[1];
lws[2] = gws[2] / 8;
if (lws[2] == 0) {
lws[2] = gws[2];
}
lws[2] = std::max<uint32_t>(std::min<uint32_t>(lws[2], kwg_size / lws_size),
1);
}
return lws;
}
} // namespace resize_nearest_neighbor
template <typename T>
class ResizeNearestNeighborKernel : public OpenCLResizeNearestNeighborKernel {
public:
explicit ResizeNearestNeighborKernel(bool align_corners)
: align_corners_(align_corners) {}
MaceStatus Compute(
OpContext *context,
const Tensor *input,
const Tensor *size,
Tensor *output) override;
private:
bool align_corners_;
cl::Kernel kernel_;
uint32_t kwg_size_;
std::vector<index_t> input_shape_;
};
template <typename T>
MaceStatus ResizeNearestNeighborKernel<T>::Compute(
OpContext *context,
const Tensor *input,
const Tensor *size,
Tensor *output) {
const index_t batch = input->dim(0);
const index_t in_height = input->dim(1);
const index_t in_width = input->dim(2);
const index_t channels = input->dim(3);
Tensor::MappingGuard input_mapper(input);
Tensor::MappingGuard size_mapper(size);
Tensor::MappingGuard output_mapper(output);
const index_t out_height = size->data<int32_t>()[0];
const index_t out_width = size->data<int32_t>()[1];
const index_t channel_blocks = RoundUpDiv4(channels);
const uint32_t gws[3] = {static_cast<uint32_t>(channel_blocks),
static_cast<uint32_t>(out_width),
static_cast<uint32_t>(out_height * batch)};
auto runtime = context->device()->gpu_runtime()->opencl_runtime();
MACE_OUT_OF_RANGE_DEFINITION;
if (kernel_.get() == nullptr) {
std::set<std::string> built_options;
MACE_OUT_OF_RANGE_CONFIG;
MACE_NON_UNIFORM_WG_CONFIG;
std::string kernel_name = MACE_OBFUSCATE_SYMBOL(
"resize_nearest_neighbor_nocache");
built_options.emplace("-Dresize_nearest_neighbor_nocache=" + kernel_name);
auto dt = DataTypeToEnum<T>::value;
built_options.emplace("-DDATA_TYPE=" + DtToUpCompatibleCLDt(dt));
built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpCompatibleCLCMDDt(dt));
MACE_RETURN_IF_ERROR(
runtime->BuildKernel("resize_nearest_neighbor",
kernel_name,
built_options,
&kernel_));
kwg_size_ =
static_cast<uint32_t>(runtime->GetKernelMaxWorkGroupSize(kernel_));
}
MACE_OUT_OF_RANGE_INIT(kernel_);
if (!IsVecEqual(input_shape_, input->shape())) {
MACE_CHECK(out_height > 0 && out_width > 0);
std::vector<index_t> output_shape{batch, out_height, out_width, channels};
std::vector<size_t> output_image_shape;
OpenCLUtil::CalImage2DShape(output_shape, OpenCLBufferType::IN_OUT_CHANNEL,
&output_image_shape);
MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape));
float height_scale =
mace::ops::resize_nearest_neighbor::CalculateResizeScale(
in_height, out_height, align_corners_);
float width_scale =
mace::ops::resize_nearest_neighbor::CalculateResizeScale(
in_width, out_width, align_corners_);
uint32_t idx = 0;
MACE_OUT_OF_RANGE_SET_ARGS(kernel_);
MACE_SET_3D_GWS_ARGS(kernel_, gws);
kernel_.setArg(idx++, *(input->opencl_image()));
kernel_.setArg(idx++, *(output->opencl_image()));
kernel_.setArg(idx++, height_scale);
kernel_.setArg(idx++, width_scale);
kernel_.setArg(idx++, static_cast<int32_t>(in_height));
kernel_.setArg(idx++, static_cast<int32_t>(in_width));
kernel_.setArg(idx++, static_cast<int32_t>(out_height));
kernel_.setArg(idx++, static_cast<int32_t>(align_corners_));
input_shape_ = input->shape();
}
const std::vector<uint32_t>
lws = resize_nearest_neighbor::LocalWS(runtime, gws, kwg_size_);
std::string tuning_key =
Concat("resize_nearest_neighbor_opencl_kernel", output->dim(0),
output->dim(1), output->dim(2), output->dim(3));
MACE_RETURN_IF_ERROR(TuningOrRun3DKernel(runtime, kernel_, tuning_key,
gws, lws, context->future()));
MACE_OUT_OF_RANGE_VALIDATION;
return MaceStatus::MACE_SUCCESS;
}
} // namespace image
} // namespace opencl
} // namespace ops
} // namespace mace
#endif // MACE_OPS_OPENCL_IMAGE_RESIZE_NEAREST_NEIGHBOR_H_
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_OPENCL_RESIZE_NEAREST_NEIGHBOR_H_
#define MACE_OPS_OPENCL_RESIZE_NEAREST_NEIGHBOR_H_
#include "mace/core/types.h"
#include "mace/public/mace.h"
#include "mace/utils/utils.h"
namespace mace {
class OpContext;
class Tensor;
namespace ops {
class OpenCLResizeNearestNeighborKernel {
public:
virtual MaceStatus Compute(
OpContext *context,
const Tensor *input,
const Tensor *size,
Tensor *output) = 0;
MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLResizeNearestNeighborKernel);
};
} // namespace ops
} // namespace mace
#endif // MACE_OPS_OPENCL_RESIZE_NEAREST_NEIGHBOR_H_
......@@ -49,6 +49,7 @@ extern void RegisterPriorBox(OpRegistryBase *op_registry);
extern void RegisterReshape(OpRegistryBase *op_registry);
extern void RegisterResizeBicubic(OpRegistryBase *op_registry);
extern void RegisterResizeBilinear(OpRegistryBase *op_registry);
extern void RegisterResizeNearestNeighbor(OpRegistryBase *op_registry);
extern void RegisterReverse(OpRegistryBase *op_registry);
extern void RegisterScalarMath(OpRegistryBase *op_registry);
extern void RegisterShape(OpRegistryBase *op_registry);
......@@ -108,6 +109,7 @@ OpRegistry::OpRegistry() : OpRegistryBase() {
ops::RegisterReshape(this);
ops::RegisterResizeBicubic(this);
ops::RegisterResizeBilinear(this);
ops::RegisterResizeNearestNeighbor(this);
ops::RegisterReverse(this);
ops::RegisterScalarMath(this);
ops::RegisterShape(this);
......
......@@ -25,12 +25,12 @@ namespace test {
namespace {
template <DeviceType D, typename T>
void ResizeBicubicBenchmark(int iters,
int batch,
int channels,
int input_height,
int input_width,
int output_height,
int output_width) {
int batch,
int channels,
int input_height,
int input_width,
int output_height,
int output_width) {
mace::testing::StopTiming();
OpsTestNet net;
......
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/resize_nearest_neighbor.h"
#include <algorithm>
#include <memory>
#include <vector>
#include "mace/core/operator.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/resize_nearest_neighbor.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
namespace ops {
template <typename T>
inline void ResizeImageNCHW(const T *images,
const index_t batch_size,
const index_t in_height,
const index_t in_width,
const index_t out_height,
const index_t out_width,
const index_t channels,
const float height_scale,
const float width_scale,
bool align_corners,
T *output) {
#pragma omp parallel for collapse(2) schedule(runtime)
for (index_t b = 0; b < batch_size; ++b) {
for (index_t c = 0; c < channels; ++c) {
const T
*channel_input_ptr =
images + (b * channels + c) * in_height * in_width;
T *channel_output_ptr =
output + (b * channels + c) * out_height * out_width;
for (index_t y = 0; y < out_height; ++y) {
const index_t in_y = std::min(
(align_corners) ? static_cast<index_t>(roundf(y * height_scale))
: static_cast<index_t>(floorf(y * height_scale)),
in_height - 1);
for (int x = 0; x < out_width; ++x) {
const index_t in_x = std::min(
(align_corners) ? static_cast<index_t>(roundf(x * width_scale))
: static_cast<index_t>(floorf(x * width_scale)),
in_width - 1);
channel_output_ptr[y * out_width + x] =
channel_input_ptr[in_y * in_width + in_x];
}
}
}
}
}
template <DeviceType D, typename T>
class ResizeNearestNeighborOp;
template <typename T>
class ResizeNearestNeighborOp<DeviceType::CPU, T> : public Operation {
public:
explicit ResizeNearestNeighborOp(OpConstructContext *context)
: Operation(context),
align_corners_(Operation::GetOptionalArg<bool>("align_corners",
false)) {}
MaceStatus Run(OpContext *context) override {
MACE_UNUSED(context);
const Tensor *input = this->Input(0);
const Tensor *size = this->Input(1);
Tensor *output = this->Output(0);
MACE_CHECK(input->dim_size() == 4 && size->dim_size() == 1,
"input must be 4-dimensional and size must be 1-dimensional. ",
input->dim_size(), size->dim_size());
const index_t batch = input->dim(0);
const index_t channels = input->dim(1);
const index_t in_height = input->dim(2);
const index_t in_width = input->dim(3);
const index_t out_height = size->data<int32_t>()[0];
const index_t out_width = size->data<int32_t>()[1];
MACE_CHECK(out_height > 0 && out_width > 0, out_height, out_width);
std::vector<index_t> out_shape{batch, channels, out_height, out_width};
MACE_RETURN_IF_ERROR(output->Resize(out_shape));
Tensor::MappingGuard input_mapper(input);
Tensor::MappingGuard size_mapper(size);
Tensor::MappingGuard output_mapper(output);
const T *input_data = input->data<T>();
T *output_data = output->mutable_data<T>();
if (out_height == in_height && out_width == in_width) {
std::copy(input_data,
input_data + batch * channels * in_height * in_width,
output_data);
return MaceStatus::MACE_SUCCESS;
}
float height_scale =
resize_nearest_neighbor::CalculateResizeScale(in_height,
out_height,
align_corners_);
float width_scale =
resize_nearest_neighbor::CalculateResizeScale(in_width,
out_width,
align_corners_);
ResizeImageNCHW(input_data,
batch,
in_height,
in_width,
out_height,
out_width,
channels,
height_scale,
width_scale,
align_corners_,
output_data);
return MaceStatus::MACE_SUCCESS;
}
private:
bool align_corners_;
};
#ifdef MACE_ENABLE_OPENCL
template <typename T>
class ResizeNearestNeighborOp<DeviceType::GPU, T> : public Operation {
public:
explicit ResizeNearestNeighborOp(OpConstructContext *context)
: Operation(context) {
bool align_corners = Operation::GetOptionalArg<bool>(
"align_corners", false);
if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::ResizeNearestNeighborKernel<T>(
align_corners));
} else {
MACE_NOT_IMPLEMENTED;
}
}
MaceStatus Run(OpContext *context) override {
const Tensor *input = this->Input(0);
const Tensor *size = this->Input(1);
Tensor *output = this->Output(0);
MACE_CHECK(input->dim_size() == 4 && size->dim_size() == 1,
"input must be 4-dimensional and size must be 1-dimensional.",
input->dim_size(), size->dim_size());
return kernel_->Compute(context, input, size, output);
}
private:
std::unique_ptr<OpenCLResizeNearestNeighborKernel> kernel_;
};
#endif // MACE_ENABLE_OPENCL
void RegisterResizeNearestNeighbor(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "ResizeNearestNeighbor",
ResizeNearestNeighborOp, DeviceType::CPU, float);
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "ResizeNearestNeighbor",
ResizeNearestNeighborOp, DeviceType::GPU, float);
MACE_REGISTER_OP(op_registry, "ResizeNearestNeighbor",
ResizeNearestNeighborOp, DeviceType::GPU, half);
#endif // MACE_ENABLE_OPENCL
}
} // namespace ops
} // namespace mace
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_RESIZE_NEAREST_NEIGHBOR_H_
#define MACE_OPS_RESIZE_NEAREST_NEIGHBOR_H_
#include "mace/core/types.h"
namespace mace {
namespace ops {
namespace resize_nearest_neighbor {
inline float CalculateResizeScale(index_t in_size,
index_t out_size,
bool align_corners) {
return (align_corners && out_size > 1)
? (in_size - 1) / static_cast<float>(out_size - 1)
: in_size / static_cast<float>(out_size);
}
} // namespace resize_nearest_neighbor
} // namespace ops
} // namespace mace
#endif // MACE_OPS_RESIZE_NEAREST_NEIGHBOR_H_
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include "mace/benchmark/statistics.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h"
namespace mace {
namespace ops {
namespace test {
namespace {
template <DeviceType D, typename T>
void ResizeNearestNeighborBenchmark(int iters,
int batch,
int channels,
int input_height,
int input_width,
int output_height,
int output_width) {
mace::testing::StopTiming();
OpsTestNet net;
// Add input data
std::vector<int32_t> size = {output_height, output_width};
if (D == DeviceType::CPU) {
net.AddRandomInput<D, float>("Input",
{batch, channels, input_height, input_width});
net.AddInputFromArray<D, int32_t>("Size", {2}, size);
} else if (D == DeviceType::GPU) {
net.AddRandomInput<D, float>("Input",
{batch, input_height, input_width, channels});
net.AddInputFromArray<D, int32_t>("Size", {2}, size);
} else {
MACE_NOT_IMPLEMENTED;
}
OpDefBuilder("ResizeNearestNeighbor", "ResizeNearestNeighborBenchmark")
.Input("Input")
.Input("Size")
.Output("Output")
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
// Warm-up
for (int i = 0; i < 5; ++i) {
net.RunOp(D);
}
mace::testing::StartTiming();
while (iters--) {
net.RunOp(D);
}
net.Sync();
}
} // namespace
#define MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, TYPE, \
DEVICE) \
static void \
MACE_BM_RESIZE_NEAREST_NEIGHBOR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_\
##TYPE##_##DEVICE( \
int iters) { \
const int64_t macs = static_cast<int64_t>(iters) * \
mace::benchmark::StatMACs("ResizeNearestNeighbor", \
{}, {N, H1, W1, C}); \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H0 * W0; \
mace::testing::MacsProcessed(macs); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
ResizeNearestNeighborBenchmark<DEVICE, TYPE>(iters, N, C, H0, W0, H1, W1);\
} \
MACE_BENCHMARK( \
MACE_BM_RESIZE_NEAREST_NEIGHBOR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_\
##TYPE##_##DEVICE)
#define MACE_BM_RESIZE_NEAREST_NEIGHBOR(N, C, H0, W0, H1, W1) \
MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \
MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, float, GPU); \
MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, half, GPU);
MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 128, 120, 120, 480, 480);
MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 256, 7, 7, 15, 15);
MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 256, 15, 15, 30, 30);
MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 128, 30, 30, 60, 60);
MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 128, 240, 240, 480, 480);
MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 3, 4032, 3016, 480, 480);
MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 3, 480, 480, 4032, 3016);
} // namespace test
} // namespace ops
} // namespace mace
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vector>
#include "mace/ops/ops_test_util.h"
namespace mace {
namespace ops {
namespace test {
class ResizeNearestNeighborTest : public OpsTestBase {};
TEST_F(ResizeNearestNeighborTest, CPUResizeNearestNeighborWOAlignCorners) {
testing::internal::LogToStderr();
// Construct graph
OpsTestNet net;
// Add input data
std::vector<float> input(24);
std::iota(begin(input), end(input), 0);
std::vector<int32_t> size = {1, 2};
net.AddInputFromArray<DeviceType::CPU, float>("Input", {1, 2, 4, 3}, input);
net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW",
NCHW);
net.AddInputFromArray<DeviceType::CPU, int32_t>("Size", {2}, size);
OpDefBuilder("ResizeNearestNeighbor", "ResizeNearestNeighborTest")
.Input("InputNCHW")
.Input("Size")
.Output("OutputNCHW")
.AddIntsArg("size", {1, 2})
.Finalize(net.NewOperatorDef());
// Run
net.RunOp();
net.TransformDataFormat<DeviceType::CPU, float>("OutputNCHW", NCHW, "Output",
NHWC);
// Check
auto expected = net.CreateTensor<float>({1, 1, 2, 3}, {0, 1, 2, 6, 7, 8});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
}
TEST_F(ResizeNearestNeighborTest, ResizeNearestNeighborWAlignCorners) {
testing::internal::LogToStderr();
// Construct graph
OpsTestNet net;
// Add input data
std::vector<float> input(24);
std::iota(begin(input), end(input), 0);
std::vector<int32_t> size = {1, 2};
net.AddInputFromArray<DeviceType::CPU, float>("Input", {1, 2, 4, 3}, input);
net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW",
NCHW);
net.AddInputFromArray<DeviceType::CPU, int32_t>("Size", {2}, size);
OpDefBuilder("ResizeNearestNeighbor", "ResizeNearestNeighborTest")
.Input("InputNCHW")
.Input("Size")
.Output("OutputNCHW")
.AddIntArg("align_corners", 1)
.AddIntsArg("size", {1, 2})
.Finalize(net.NewOperatorDef());
// Run
net.RunOp();
net.TransformDataFormat<DeviceType::CPU, float>("OutputNCHW", NCHW, "Output",
NHWC);
// Check
auto expected = net.CreateTensor<float>({1, 1, 2, 3}, {0, 1, 2, 9, 10, 11});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
}
namespace {
template <DeviceType D>
void TestRandomResizeNearestNeighbor() {
testing::internal::LogToStderr();
static unsigned int seed = time(NULL);
for (int round = 0; round < 10; ++round) {
int batch = 1 + rand_r(&seed) % 5;
int channels = 1 + rand_r(&seed) % 100;
int in_height = 1 + rand_r(&seed) % 100;
int in_width = 1 + rand_r(&seed) % 100;
int align_corners = rand_r(&seed) % 1;
// Construct graph
OpsTestNet net;
// Add input data
std::vector<int32_t> size = {20, 40};
net.AddRandomInput<D, float>("Input",
{batch, in_height, in_width, channels});
net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW",
NCHW);
net.AddInputFromArray<D, int32_t>("Size",
{2}, size);
OpDefBuilder("ResizeNearestNeighbor", "ResizeNearestNeighborTest")
.Input("InputNCHW")
.Input("Size")
.Output("OutputNCHW")
.AddIntArg("align_corners", align_corners)
.Finalize(net.NewOperatorDef());
// Run on CPU
net.RunOp(DeviceType::CPU);
net.TransformDataFormat<DeviceType::CPU, float>("OutputNCHW", NCHW,
"Output", NHWC);
auto expected = net.CreateTensor<float>();
expected->Copy(*net.GetOutput("Output"));
if (D == DeviceType::GPU) {
OpDefBuilder("ResizeNearestNeighbor", "ResizeNearestNeighborTest")
.Input("Input")
.Input("Size")
.Output("Output")
.AddIntArg("align_corners", align_corners)
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(D);
}
// Check
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5,
1e-6);
}
}
} // namespace
TEST_F(ResizeNearestNeighborTest, RandomResizeNearestNeighbor) {
TestRandomResizeNearestNeighbor<DeviceType::CPU>();
}
} // namespace test
} // namespace ops
} // namespace mace
......@@ -130,6 +130,7 @@ MaceSupportedOps = [
'Reshape',
'ResizeBicubic',
'ResizeBilinear',
'ResizeNearestNeighbor',
'Reverse',
'ScalarMath',
'Split',
......
......@@ -91,6 +91,7 @@ TFSupportedOps = [
'Softmax',
'ResizeBicubic',
'ResizeBilinear',
'ResizeNearestNeighbor',
'Placeholder',
'SpaceToBatchND',
'BatchToSpaceND',
......@@ -239,6 +240,7 @@ class TensorflowConverter(base_converter.ConverterInterface):
TFOpType.Softmax.name: self.convert_softmax,
TFOpType.ResizeBicubic.name: self.convert_resize_bicubic,
TFOpType.ResizeBilinear.name: self.convert_resize_bilinear,
TFOpType.ResizeNearestNeighbor.name: self.convert_resize_nearest_neighbor, # noqa
TFOpType.Placeholder.name: self.convert_nop,
TFOpType.SpaceToBatchND.name: self.convert_space_batch,
TFOpType.BatchToSpaceND.name: self.convert_space_batch,
......@@ -659,8 +661,15 @@ class TensorflowConverter(base_converter.ConverterInterface):
align_corners_arg.name = MaceKeyword.mace_align_corners_str
align_corners_arg.i = tf_op.get_attr(tf_align_corners)
def convert_space_batch(self, tf_op):
def convert_resize_nearest_neighbor(self, tf_op):
op = self.convert_general_op(tf_op)
op.type = MaceOp.ResizeNearestNeighbor.name
align_corners_arg = op.arg.add()
align_corners_arg.name = MaceKeyword.mace_align_corners_str
align_corners_arg.i = tf_op.get_attr(tf_align_corners)
def convert_space_batch(self, tf_op):
op = self.convert_general_op(tf_op)
del op.input[1:]
......
"""Repository rule for opencl encrypt kernel autoconfiguration, borrow from tensorflow
"""
def _opencl_encrypt_kernel_impl(repository_ctx):
repository_ctx.template(
"BUILD",
Label("//repository/opencl-kernel:BUILD.tpl"))
mace_root_path = str(repository_ctx.path(Label("@mace//:BUILD")))[:-len("BUILD")]
generated_files_path = repository_ctx.path("gen")
def _opencl_encrypt_kernel_impl(repository_ctx):
repository_ctx.template(
"BUILD",
Label("//repository/opencl-kernel:BUILD.tpl"),
)
ret = repository_ctx.execute(
["test", "-f", "%s/.git/logs/HEAD" % mace_root_path])
if ret.return_code == 0:
unused_var = repository_ctx.path(Label("//:.git/HEAD"))
ret = repository_ctx.execute(
["test", "-f", "%s/.git/refs/heads/master" % mace_root_path])
if ret.return_code == 0:
unused_var = repository_ctx.path(Label("//:.git/refs/heads/master"))
mace_root_path = str(repository_ctx.path(Label("@mace//:BUILD")))[:-len("BUILD")]
generated_files_path = repository_ctx.path("gen")
ret = repository_ctx.execute(
["test", "-f", "%s/mace/ops/opencl/cl/common.h" % mace_root_path])
if ret.return_code == 0:
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/activation.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/addn.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/batch_norm.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/batch_to_space.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/bias_add.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/buffer_to_image.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/buffer_transform.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/channel_shuffle.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/common.h"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/concat.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_1x1.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_1x1_buffer.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_3x3.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_buffer.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/crop.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/deconv_2d.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depthwise_deconv2d.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depth_to_space.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depthwise_conv2d.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depthwise_conv2d_buffer.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/eltwise.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/fully_connected.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/lstmcell.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/matmul.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pad.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling_buffer.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/reduce.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/resize_bicubic.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/resize_bilinear.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/split.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/softmax.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/softmax_buffer.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/space_to_batch.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/space_to_depth.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/sqrdiff_mean.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/winograd_transform.cl"))
ret = repository_ctx.execute(
["test", "-f", "%s/.git/logs/HEAD" % mace_root_path],
)
if ret.return_code == 0:
unused_var = repository_ctx.path(Label("//:.git/HEAD"))
ret = repository_ctx.execute(
["test", "-f", "%s/.git/refs/heads/master" % mace_root_path],
)
if ret.return_code == 0:
unused_var = repository_ctx.path(Label("//:.git/refs/heads/master"))
python_bin_path = repository_ctx.which("python")
ret = repository_ctx.execute(
["test", "-f", "%s/mace/ops/opencl/cl/common.h" % mace_root_path],
)
if ret.return_code == 0:
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/activation.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/addn.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/batch_norm.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/batch_to_space.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/bias_add.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/buffer_to_image.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/buffer_transform.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/channel_shuffle.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/common.h"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/concat.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_1x1.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_1x1_buffer.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_3x3.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/conv_2d_buffer.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/crop.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/deconv_2d.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depthwise_deconv2d.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depth_to_space.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depthwise_conv2d.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/depthwise_conv2d_buffer.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/eltwise.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/fully_connected.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/lstmcell.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/matmul.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pad.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling_buffer.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/reduce.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/resize_bicubic.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/resize_bilinear.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/resize_nearest_neighbor.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/split.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/softmax.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/softmax_buffer.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/space_to_batch.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/space_to_depth.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/sqrdiff_mean.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/winograd_transform.cl"))
repository_ctx.execute([
python_bin_path, '%s/mace/python/tools/encrypt_opencl_codegen.py' % mace_root_path,
'--cl_kernel_dir=%s/mace/ops/opencl/cl' % mace_root_path,
'--output_path=%s/encrypt_opencl_kernel' % generated_files_path
], quiet=False)
python_bin_path = repository_ctx.which("python")
repository_ctx.execute([
python_bin_path,
"%s/mace/python/tools/encrypt_opencl_codegen.py" % mace_root_path,
"--cl_kernel_dir=%s/mace/ops/opencl/cl" % mace_root_path,
"--output_path=%s/encrypt_opencl_kernel" % generated_files_path,
], quiet = False)
encrypt_opencl_kernel_repository = repository_rule(
implementation = _opencl_encrypt_kernel_impl,
......
......@@ -766,6 +766,7 @@ def validate_model(abi,
"--validation_threshold=%f" % validation_threshold,
"--input_data_type=%s" % ",".join(input_data_types),
"--backend=%s" % ",".join(backend),
"--log_file=%s" % log_file,
_fg=True)
six.print_("Validation done!\n")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册