提交 4888bd5f 编写于 作者: 叶剑武

Merge branch 'fix-one-hot' into 'master'

Remove GPU code of OneHot op for only support 1D input.

See merge request !1039
...@@ -79,12 +79,10 @@ MemoryBlock MemoryOptimizer::CreateMemoryBlock( ...@@ -79,12 +79,10 @@ MemoryBlock MemoryOptimizer::CreateMemoryBlock(
*op_def, "buffer_type", OpenCLBufferType::IN_OUT_CHANNEL)); *op_def, "buffer_type", OpenCLBufferType::IN_OUT_CHANNEL));
} }
std::vector<size_t> image_shape; std::vector<size_t> image_shape;
if (shape.size() == 1) { if (shape.size() == 2) {
shape = {shape[0], 1, 1, 1};
} else if (shape.size() == 2) {
shape = {shape[0], 1, 1, shape[1]}; shape = {shape[0], 1, 1, shape[1]};
} else { } else {
MACE_CHECK(shape.size() == 4) << "GPU only support 1D/2D/4D input"; MACE_CHECK(shape.size() == 4) << "GPU only support 2D/4D input";
} }
OpenCLUtil::CalImage2DShape(shape, buffer_type, &image_shape); OpenCLUtil::CalImage2DShape(shape, buffer_type, &image_shape);
block.set_x(image_shape[0]); block.set_x(image_shape[0]);
......
...@@ -29,22 +29,21 @@ class ConcatOpBase : public Operation { ...@@ -29,22 +29,21 @@ class ConcatOpBase : public Operation {
public: public:
explicit ConcatOpBase(OpConstructContext *context) explicit ConcatOpBase(OpConstructContext *context)
: Operation(context), : Operation(context),
axis_(Operation::GetOptionalArg<int>("axis", 3)), axis_(Operation::GetOptionalArg<int>("axis", 3)) {}
checked_(false) {}
protected: protected:
void Validate() { int FormatAxis() {
const int32_t input_dims = this->Input(0)->dim_size(); const int32_t input_dims = this->Input(0)->dim_size();
axis_ = axis_ =
axis_ < 0 ? axis_ + input_dims : axis_; axis_ < 0 ? axis_ + input_dims : axis_;
MACE_CHECK((0 <= axis_ && axis_ < input_dims), MACE_CHECK((0 <= axis_ && axis_ < input_dims),
"Expected concatenating axis in the range [", -input_dims, ", ", "Expected concatenating axis in the range [", -input_dims, ", ",
input_dims, "], but got ", axis_); input_dims, "], but got ", axis_);
return axis_;
} }
protected: protected:
int axis_; int axis_;
bool checked_;
}; };
template <DeviceType D, class T> template <DeviceType D, class T>
...@@ -54,20 +53,17 @@ template <typename T> ...@@ -54,20 +53,17 @@ template <typename T>
class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase { class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase {
public: public:
explicit ConcatOp(OpConstructContext *context) explicit ConcatOp(OpConstructContext *context)
: ConcatOpBase(context) {} : ConcatOpBase(context),
has_data_format_(Operation::GetOptionalArg<int>(
"has_data_format", 0) == 1) {}
MaceStatus Run(OpContext *context) override { MaceStatus Run(OpContext *context) override {
MACE_UNUSED(context); MACE_UNUSED(context);
if (!checked_) { int axis = FormatAxis();
Validate(); if (has_data_format_ && this->Input(0)->dim_size() == 4) {
auto has_df = Operation::GetOptionalArg<int>( if (axis == 3) axis = 1;
"has_data_format", 0); else if (axis == 2) axis = 3;
if (has_df && this->Input(0)->dim_size() == 4) { else if (axis == 1) axis = 2;
if (axis_ == 3) axis_ = 1;
else if (axis_ == 2) axis_ = 3;
else if (axis_ == 1) axis_ = 2;
}
checked_ = true;
} }
const std::vector<const Tensor *> &inputs = this->Inputs(); const std::vector<const Tensor *> &inputs = this->Inputs();
Tensor *output = this->Output(0); Tensor *output = this->Output(0);
...@@ -76,7 +72,7 @@ class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase { ...@@ -76,7 +72,7 @@ class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase {
std::vector<index_t> output_shape(input0->shape()); std::vector<index_t> output_shape(input0->shape());
index_t inner_size = 1; index_t inner_size = 1;
for (int i = 0; i < axis_; ++i) { for (int i = 0; i < axis; ++i) {
inner_size *= output_shape[i]; inner_size *= output_shape[i];
} }
std::vector<index_t> outer_sizes(inputs_count, 0); std::vector<index_t> outer_sizes(inputs_count, 0);
...@@ -86,14 +82,14 @@ class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase { ...@@ -86,14 +82,14 @@ class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase {
MACE_CHECK(input->dim_size() == input0->dim_size(), MACE_CHECK(input->dim_size() == input0->dim_size(),
"Ranks of all input tensors must be same."); "Ranks of all input tensors must be same.");
for (int j = 0; j < input->dim_size(); ++j) { for (int j = 0; j < input->dim_size(); ++j) {
if (j == axis_) { if (j == axis) {
continue; continue;
} }
MACE_CHECK(input->dim(j) == input0->dim(j), MACE_CHECK(input->dim(j) == input0->dim(j),
"Dimensions of inputs should equal except axis."); "Dimensions of inputs should equal except axis.");
} }
outer_sizes[i] = input->size() / inner_size; outer_sizes[i] = input->size() / inner_size;
output_shape[axis_] += input->dim(axis_); output_shape[axis] += input->dim(axis);
} }
MACE_RETURN_IF_ERROR(output->Resize(output_shape)); MACE_RETURN_IF_ERROR(output->Resize(output_shape));
...@@ -119,6 +115,9 @@ class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase { ...@@ -119,6 +115,9 @@ class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase {
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
private:
bool has_data_format_;
}; };
#ifdef MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_QUANTIZE
...@@ -130,7 +129,7 @@ class ConcatOp<DeviceType::CPU, uint8_t> : public ConcatOpBase { ...@@ -130,7 +129,7 @@ class ConcatOp<DeviceType::CPU, uint8_t> : public ConcatOpBase {
MaceStatus Run(OpContext *context) override { MaceStatus Run(OpContext *context) override {
MACE_UNUSED(context); MACE_UNUSED(context);
Validate(); int axis = FormatAxis();
const std::vector<const Tensor *> &inputs = this->Inputs(); const std::vector<const Tensor *> &inputs = this->Inputs();
Tensor *output = this->Output(0); Tensor *output = this->Output(0);
MACE_CHECK(output->scale() != 0); MACE_CHECK(output->scale() != 0);
...@@ -139,7 +138,7 @@ class ConcatOp<DeviceType::CPU, uint8_t> : public ConcatOpBase { ...@@ -139,7 +138,7 @@ class ConcatOp<DeviceType::CPU, uint8_t> : public ConcatOpBase {
std::vector<index_t> output_shape(input0->shape()); std::vector<index_t> output_shape(input0->shape());
index_t inner_size = 1; index_t inner_size = 1;
for (int i = 0; i < axis_; ++i) { for (int i = 0; i < axis; ++i) {
inner_size *= output_shape[i]; inner_size *= output_shape[i];
} }
std::vector<index_t> outer_sizes(inputs_count, 0); std::vector<index_t> outer_sizes(inputs_count, 0);
...@@ -149,14 +148,14 @@ class ConcatOp<DeviceType::CPU, uint8_t> : public ConcatOpBase { ...@@ -149,14 +148,14 @@ class ConcatOp<DeviceType::CPU, uint8_t> : public ConcatOpBase {
MACE_CHECK(input->dim_size() == input0->dim_size(), MACE_CHECK(input->dim_size() == input0->dim_size(),
"Ranks of all input tensors must be same."); "Ranks of all input tensors must be same.");
for (int j = 0; j < input->dim_size(); ++j) { for (int j = 0; j < input->dim_size(); ++j) {
if (j == axis_) { if (j == axis) {
continue; continue;
} }
MACE_CHECK(input->dim(j) == input0->dim(j), MACE_CHECK(input->dim(j) == input0->dim(j),
"Dimensions of inputs should equal except axis."); "Dimensions of inputs should equal except axis.");
} }
outer_sizes[i] = input->size() / inner_size; outer_sizes[i] = input->size() / inner_size;
output_shape[axis_] += input->dim(axis_); output_shape[axis] += input->dim(axis);
} }
MACE_RETURN_IF_ERROR(output->Resize(output_shape)); MACE_RETURN_IF_ERROR(output->Resize(output_shape));
...@@ -200,15 +199,14 @@ class ConcatOp<DeviceType::GPU, T> : public ConcatOpBase { ...@@ -200,15 +199,14 @@ class ConcatOp<DeviceType::GPU, T> : public ConcatOpBase {
explicit ConcatOp(OpConstructContext *context) explicit ConcatOp(OpConstructContext *context)
: ConcatOpBase(context) { : ConcatOpBase(context) {
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_ = make_unique<opencl::image::ConcatKernel<T>>(axis_); kernel_ = make_unique<opencl::image::ConcatKernel<T>>();
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
} }
MaceStatus Run(OpContext *context) override { MaceStatus Run(OpContext *context) override {
Validate();
Tensor *output = this->Output(0); Tensor *output = this->Output(0);
return kernel_->Compute(context, inputs_, output); return kernel_->Compute(context, inputs_, FormatAxis(), output);
} }
private: private:
......
...@@ -16,9 +16,6 @@ ...@@ -16,9 +16,6 @@
#include <memory> #include <memory>
#include "mace/core/operator.h" #include "mace/core/operator.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/one_hot.h"
#endif // MACE_ENABLE_OPENCL
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -148,52 +145,9 @@ class OneHotOp<DeviceType::CPU, T> : public OneHotOpBase { ...@@ -148,52 +145,9 @@ class OneHotOp<DeviceType::CPU, T> : public OneHotOpBase {
} }
}; };
#ifdef MACE_ENABLE_OPENCL
template <typename T>
class OneHotOp<DeviceType::GPU, T> : public OneHotOpBase {
public:
explicit OneHotOp(OpConstructContext *context) : OneHotOpBase(context) {
if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::OneHotKernel<T>(
depth_, on_value_, off_value_, axis_));
} else {
MACE_NOT_IMPLEMENTED;
}
}
MaceStatus Run(OpContext *context) override {
const Tensor *input = this->Input(0);
Tensor *output = this->Output(0);
return kernel_->Compute(context, input, output);
}
private:
std::unique_ptr<OpenCLOneHotKernel> kernel_;
};
#endif // MACE_ENABLE_OPENCL
void RegisterOneHot(OpRegistryBase *op_registry) { void RegisterOneHot(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "OneHot", OneHotOp, DeviceType::CPU, float); MACE_REGISTER_OP(op_registry, "OneHot", OneHotOp, DeviceType::CPU, float);
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "OneHot", OneHotOp, DeviceType::GPU, float);
MACE_REGISTER_OP(op_registry, "OneHot", OneHotOp, DeviceType::GPU, half);
MACE_REGISTER_OP_CONDITION(
op_registry,
OpConditionBuilder("OneHot")
.SetDevicePlacerFunc(
[](OpConstructContext *context) -> std::set<DeviceType> {
auto op = context->operator_def();
if (op->output_shape_size() != op->output_size()) {
return { DeviceType::CPU, DeviceType::GPU };
}
if (op->output_shape(0).dims_size() != 2) {
return { DeviceType::CPU };
}
return { DeviceType::CPU, DeviceType::GPU };
}));
#endif // MACE_ENABLE_OPENCL
} }
} // namespace ops } // namespace ops
......
...@@ -61,9 +61,7 @@ void OneHot(int iters, int batch, int depth, int axis) { ...@@ -61,9 +61,7 @@ void OneHot(int iters, int batch, int depth, int axis) {
MACE_BENCHMARK(MACE_BM_ONE_HOT_##N##_##DEPTH##_##AXIS##_##TYPE##_##DEVICE) MACE_BENCHMARK(MACE_BM_ONE_HOT_##N##_##DEPTH##_##AXIS##_##TYPE##_##DEVICE)
#define MACE_BM_ONE_HOT(N, DEPTH, AXIS) \ #define MACE_BM_ONE_HOT(N, DEPTH, AXIS) \
MACE_BM_ONE_HOT_MACRO(N, DEPTH, AXIS, float, CPU); \ MACE_BM_ONE_HOT_MACRO(N, DEPTH, AXIS, float, CPU);
MACE_BM_ONE_HOT_MACRO(N, DEPTH, AXIS, float, GPU); \
MACE_BM_ONE_HOT_MACRO(N, DEPTH, AXIS, half, GPU);
MACE_BM_ONE_HOT(512, 16, 0); MACE_BM_ONE_HOT(512, 16, 0);
MACE_BM_ONE_HOT(512, 16, 1); MACE_BM_ONE_HOT(512, 16, 1);
......
...@@ -45,7 +45,6 @@ void TestOneHot(const std::vector<index_t> &input_shape, ...@@ -45,7 +45,6 @@ void TestOneHot(const std::vector<index_t> &input_shape,
.AddFloatArg("on_value", on_value) .AddFloatArg("on_value", on_value)
.AddFloatArg("off_value", off_value) .AddFloatArg("off_value", off_value)
.AddIntArg("axis", axis) .AddIntArg("axis", axis)
.AddIntArg("data_format", DataFormat::NHWC)
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Run // Run
...@@ -77,10 +76,6 @@ TEST_F(OneHotTest, Dim1) { ...@@ -77,10 +76,6 @@ TEST_F(OneHotTest, Dim1) {
TestOneHot<DeviceType::CPU, float>(input_shape, input_data, expected_shape, TestOneHot<DeviceType::CPU, float>(input_shape, input_data, expected_shape,
expected_data, 5, -1); expected_data, 5, -1);
TestOneHot<DeviceType::GPU, float>(input_shape, input_data, expected_shape,
expected_data, 5, -1);
TestOneHot<DeviceType::GPU, half>(input_shape, input_data, expected_shape,
expected_data, 5, -1);
expected_shape = {5, 10}; expected_shape = {5, 10};
expected_data = { expected_data = {
...@@ -93,10 +88,6 @@ TEST_F(OneHotTest, Dim1) { ...@@ -93,10 +88,6 @@ TEST_F(OneHotTest, Dim1) {
TestOneHot<DeviceType::CPU, float>(input_shape, input_data, expected_shape, TestOneHot<DeviceType::CPU, float>(input_shape, input_data, expected_shape,
expected_data, 5, 0); expected_data, 5, 0);
TestOneHot<DeviceType::GPU, float>(input_shape, input_data, expected_shape,
expected_data, 5, 0);
TestOneHot<DeviceType::GPU, half>(input_shape, input_data, expected_shape,
expected_data, 5, 0);
} }
TEST_F(OneHotTest, OnOffValue) { TEST_F(OneHotTest, OnOffValue) {
...@@ -111,10 +102,6 @@ TEST_F(OneHotTest, OnOffValue) { ...@@ -111,10 +102,6 @@ TEST_F(OneHotTest, OnOffValue) {
TestOneHot<DeviceType::CPU, float>(input_shape, input_data, expected_shape, TestOneHot<DeviceType::CPU, float>(input_shape, input_data, expected_shape,
expected_data, 6, -1, 7, 8); expected_data, 6, -1, 7, 8);
TestOneHot<DeviceType::GPU, float>(input_shape, input_data, expected_shape,
expected_data, 6, -1, 7, 8);
TestOneHot<DeviceType::GPU, half>(input_shape, input_data, expected_shape,
expected_data, 6, -1, 7, 8);
} }
TEST_F(OneHotTest, Dim2) { TEST_F(OneHotTest, Dim2) {
......
#include <common.h>
__kernel void one_hot(OUT_OF_RANGE_PARAMS
GLOBAL_WORK_GROUP_SIZE_DIM2
__read_only image2d_t input,
__write_only image2d_t output,
#ifdef AXIS_0
__private const int in_size,
#endif
__private const float on_value,
__private const float off_value) {
const int channel_idx = get_global_id(0);
const int batch_idx = get_global_id(1);
#ifndef NON_UNIFORM_WORK_GROUP
if (channel_idx >= global_size_dim0 || batch_idx >= global_size_dim1) {
return;
}
#endif
DATA_TYPE4 out = off_value;
#ifdef AXIS_0
int in_idx = channel_idx * 4;
DATA_TYPE4 in = READ_IMAGET(input, SAMPLER, (int2)(0, in_idx));
if (in.s0 == batch_idx) {
out.s0 = on_value;
}
if (++in_idx < in_size) {
in = READ_IMAGET(input, SAMPLER, (int2)(0, in_idx));
if (in.s0 == batch_idx) {
out.s1 = on_value;
}
if (++in_idx < in_size) {
in = READ_IMAGET(input, SAMPLER, (int2)(0, in_idx));
if (in.s0 == batch_idx) {
out.s2 = on_value;
}
if (++in_idx < in_size) {
in = READ_IMAGET(input, SAMPLER, (int2)(0, in_idx));
if (in.s0 == batch_idx) {
out.s3 = on_value;
}
}
}
}
#else
DATA_TYPE4 in = READ_IMAGET(input, SAMPLER, (int2)(0, batch_idx));
int i = in.s0;
if (i / 4 == channel_idx) {
switch (i % 4) {
case 0:
out.s0 = on_value;
break;
case 1:
out.s1 = on_value;
break;
case 2:
out.s2 = on_value;
break;
case 3:
out.s3 = on_value;
break;
}
}
#endif
WRITE_IMAGET(output, (int2)(channel_idx, batch_idx), out);
}
...@@ -31,6 +31,7 @@ class OpenCLConcatKernel { ...@@ -31,6 +31,7 @@ class OpenCLConcatKernel {
virtual MaceStatus Compute( virtual MaceStatus Compute(
OpContext *context, OpContext *context,
const std::vector<const Tensor *> &input_list, const std::vector<const Tensor *> &input_list,
const int32_t axis,
Tensor *output) = 0; Tensor *output) = 0;
MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLConcatKernel); MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLConcatKernel);
}; };
......
...@@ -34,8 +34,6 @@ std::vector<index_t> FormatBufferShape( ...@@ -34,8 +34,6 @@ std::vector<index_t> FormatBufferShape(
return buffer_shape; return buffer_shape;
} else if (buffer_shape_size == 2) { // NC } else if (buffer_shape_size == 2) { // NC
return {buffer_shape[0], 1, 1, buffer_shape[1]}; return {buffer_shape[0], 1, 1, buffer_shape[1]};
} else if (buffer_shape_size == 1) { // N
return {buffer_shape[0], 1, 1, 1};
} else { } else {
LOG(FATAL) << "GPU only support 2D or 4D input and output"; LOG(FATAL) << "GPU only support 2D or 4D input and output";
} }
......
...@@ -48,14 +48,14 @@ MaceStatus ConcatN(OpContext *context, ...@@ -48,14 +48,14 @@ MaceStatus ConcatN(OpContext *context,
template <typename T> template <typename T>
class ConcatKernel : public OpenCLConcatKernel { class ConcatKernel : public OpenCLConcatKernel {
public: public:
explicit ConcatKernel(const int32_t axis) : axis_(axis) {} ConcatKernel() {}
MaceStatus Compute( MaceStatus Compute(
OpContext *context, OpContext *context,
const std::vector<const Tensor *> &input_list, const std::vector<const Tensor *> &input_list,
const int32_t axis,
Tensor *output) override; Tensor *output) override;
private: private:
int32_t axis_;
cl::Kernel kernel_; cl::Kernel kernel_;
uint32_t kwg_size_; uint32_t kwg_size_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
...@@ -65,6 +65,7 @@ template <typename T> ...@@ -65,6 +65,7 @@ template <typename T>
MaceStatus ConcatKernel<T>::Compute( MaceStatus ConcatKernel<T>::Compute(
OpContext *context, OpContext *context,
const std::vector<const Tensor *> &input_list, const std::vector<const Tensor *> &input_list,
const int32_t axis,
Tensor *output) { Tensor *output) {
const int inputs_count = input_list.size(); const int inputs_count = input_list.size();
...@@ -76,13 +77,13 @@ MaceStatus ConcatKernel<T>::Compute( ...@@ -76,13 +77,13 @@ MaceStatus ConcatKernel<T>::Compute(
MACE_CHECK(input->dim_size() == input0->dim_size(), MACE_CHECK(input->dim_size() == input0->dim_size(),
"Ranks of all input tensors must be same."); "Ranks of all input tensors must be same.");
for (int j = 0; j < input->dim_size(); ++j) { for (int j = 0; j < input->dim_size(); ++j) {
if (j == axis_) { if (j == axis) {
continue; continue;
} }
MACE_CHECK(input->dim(j) == input0->dim(j), MACE_CHECK(input->dim(j) == input0->dim(j),
"Dimensions of inputs should equal except axis."); "Dimensions of inputs should equal except axis.");
} }
output_shape[axis_] += input->dim(axis_); output_shape[axis] += input->dim(axis);
} }
std::vector<size_t> image_shape; std::vector<size_t> image_shape;
OpenCLUtil::CalImage2DShape(output_shape, OpenCLUtil::CalImage2DShape(output_shape,
......
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_OPENCL_IMAGE_ONE_HOT_H_
#define MACE_OPS_OPENCL_IMAGE_ONE_HOT_H_
#include "mace/ops/opencl/one_hot.h"
#include <memory>
#include <vector>
#include <set>
#include <string>
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/opencl/helper.h"
namespace mace {
namespace ops {
namespace opencl {
namespace image {
template <typename T>
class OneHotKernel : public OpenCLOneHotKernel {
public:
OneHotKernel(const int depth, const float on_value,
const float off_value, const int axis)
: depth_(depth), on_value_(on_value),
off_value_(off_value), axis_(axis) {}
MaceStatus Compute(
OpContext *context,
const Tensor *input,
Tensor *output) override;
private:
int depth_;
float on_value_;
float off_value_;
int axis_;
cl::Kernel kernel_;
uint32_t kwg_size_;
std::vector<index_t> input_shape_;
};
template <typename T>
MaceStatus OneHotKernel<T>::Compute(
OpContext *context,
const Tensor *input,
Tensor *output) {
auto input_shape = input->shape();
index_t axis = axis_ == -1 ? input->dim_size() : axis_;
MACE_CHECK(input->dim_size() == 1, "OneHot GPU only supports 1D input");
MACE_CHECK(axis >= 0 && axis <= input->dim_size());
std::vector<index_t> output_shape =
axis == 0 ? std::vector<index_t>{depth_, input_shape[0]} :
std::vector<index_t>{input_shape[0], depth_};
std::vector<size_t> output_image_shape{
static_cast<size_t>(RoundUpDiv4(output_shape[1])),
static_cast<size_t>(output_shape[0])};
MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape));
auto runtime = context->device()->gpu_runtime()->opencl_runtime();
MACE_OUT_OF_RANGE_DEFINITION;
if (kernel_.get() == nullptr) {
std::set<std::string> built_options;
MACE_OUT_OF_RANGE_CONFIG;
MACE_NON_UNIFORM_WG_CONFIG;
std::string kernel_name = MACE_OBFUSCATE_SYMBOL("one_hot");
built_options.emplace("-Done_hot=" + kernel_name);
auto dt = DataTypeToEnum<T>::value;
built_options.emplace("-DDATA_TYPE=" + DtToCLDt(dt));
built_options.emplace("-DCMD_DATA_TYPE=" + DtToCLCMDDt(dt));
if (axis == 0) {
built_options.emplace("-DAXIS_0");
}
MACE_RETURN_IF_ERROR(runtime->BuildKernel("one_hot", kernel_name,
built_options, &kernel_));
kwg_size_ =
static_cast<uint32_t>(runtime->GetKernelMaxWorkGroupSize(kernel_));
}
const uint32_t gws[2] = {
static_cast<uint32_t>(output_image_shape[0]),
static_cast<uint32_t>(output_image_shape[1])
};
MACE_OUT_OF_RANGE_INIT(kernel_);
if (!IsVecEqual(input_shape_, input->shape())) {
int idx = 0;
MACE_OUT_OF_RANGE_SET_ARGS(kernel_);
MACE_SET_2D_GWS_ARGS(kernel_, gws);
kernel_.setArg(idx++, *(input->opencl_image()));
kernel_.setArg(idx++, *(output->opencl_image()));
if (axis == 0) {
kernel_.setArg(idx++, static_cast<int>(input_shape[0]));
}
kernel_.setArg(idx++, on_value_);
kernel_.setArg(idx++, off_value_);
input_shape_ = input->shape();
}
const std::vector<uint32_t> lws = {kwg_size_ / 64, 64, 0};
std::string tuning_key = Concat("one_hot", output->dim(0), output->dim(1));
MACE_RETURN_IF_ERROR(TuningOrRun2DKernel(runtime, kernel_, tuning_key,
gws, lws, context->future()));
MACE_OUT_OF_RANGE_VALIDATION;
return MaceStatus::MACE_SUCCESS;
}
} // namespace image
} // namespace opencl
} // namespace ops
} // namespace mace
#endif // MACE_OPS_OPENCL_IMAGE_ONE_HOT_H_
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_OPENCL_ONE_HOT_H_
#define MACE_OPS_OPENCL_ONE_HOT_H_
#include "mace/public/mace.h"
#include "mace/utils/utils.h"
namespace mace {
class OpContext;
class Tensor;
namespace ops {
class OpenCLOneHotKernel {
public:
virtual MaceStatus Compute(
OpContext *context,
const Tensor *input,
Tensor *output) = 0;
MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLOneHotKernel);
};
} // namespace ops
} // namespace mace
#endif // MACE_OPS_OPENCL_ONE_HOT_H_
...@@ -50,7 +50,6 @@ def _opencl_encrypt_kernel_impl(repository_ctx): ...@@ -50,7 +50,6 @@ def _opencl_encrypt_kernel_impl(repository_ctx):
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/fully_connected.cl")) unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/fully_connected.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/lstmcell.cl")) unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/lstmcell.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/matmul.cl")) unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/matmul.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/one_hot.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pad.cl")) unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pad.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling.cl")) unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling.cl"))
unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling_buffer.cl")) unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling_buffer.cl"))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册