提交 a2739dec 编写于 作者: R Raman Sarokin 提交者: TensorFlower Gardener

MaxUnpooling converted to generic GPUOperation.

PiperOrigin-RevId: 328193073
Change-Id: Idf41aafe4b095dd2a6ea08fced44e04b3a67566c
上级 ae517dba
......@@ -23,76 +23,26 @@ limitations under the License.
namespace tflite {
namespace gpu {
namespace cl {
MaxUnpooling::MaxUnpooling(const OperationDef& definition,
const MaxUnpooling2DAttributes& attr)
: GPUOperation(definition),
stride_(attr.strides.w, attr.strides.h, 0, 0),
padding_(attr.padding.appended.w, attr.padding.appended.h, 0, 0),
kernel_size_(attr.kernel.w, attr.kernel.h, 0, 0) {
code_ = GetMaxUnpoolingKernelCode(definition_);
}
MaxUnpooling::MaxUnpooling(const OperationDef& definition,
const MaxUnpooling3DAttributes& attr)
: GPUOperation(definition),
stride_(attr.strides.w, attr.strides.h, attr.strides.d, 0),
padding_(attr.padding.appended.w, attr.padding.appended.h,
attr.padding.appended.d, 0),
kernel_size_(attr.kernel.w, attr.kernel.h, attr.kernel.d, 0) {
code_ = GetMaxUnpoolingKernelCode(definition_);
}
MaxUnpooling::MaxUnpooling(MaxUnpooling&& kernel)
: GPUOperation(std::move(kernel)),
stride_(kernel.stride_),
padding_(kernel.padding_),
kernel_size_(kernel.kernel_size_) {}
MaxUnpooling& MaxUnpooling::operator=(MaxUnpooling&& kernel) {
if (this != &kernel) {
std::swap(stride_, kernel.stride_);
std::swap(padding_, kernel.padding_);
std::swap(kernel_size_, kernel.kernel_size_);
GPUOperation::operator=(std::move(kernel));
}
return *this;
}
std::string MaxUnpooling::GetMaxUnpoolingKernelCode(
const OperationDef& op_def) {
namespace {
std::string GetMaxUnpoolingKernelCode(const OperationDef& op_def,
GPUOperation* op) {
auto src_desc = op_def.src_tensors[0];
src_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
if (op_def.IsBatchSupported()) {
src_desc.SetStateVar("BatchedWidth", "true");
}
AddSrcTensor("src_tensor", src_desc);
op->AddSrcTensor("src_tensor", src_desc);
auto src_ind_desc = op_def.src_tensors[1];
src_ind_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
if (op_def.IsBatchSupported()) {
src_ind_desc.SetStateVar("BatchedWidth", "true");
}
AddSrcTensor("src_indices", src_ind_desc);
op->AddSrcTensor("src_indices", src_ind_desc);
auto dst_desc = op_def.dst_tensors[0];
if (op_def.IsBatchSupported()) {
dst_desc.SetStateVar("BatchedWidth", "true");
}
AddDstTensor("dst_tensor", dst_desc);
if (op_def.dst_tensors[0].HasAxis(Axis::WIDTH)) {
args_.AddInt("kernel_size_x");
args_.AddInt("padding_x");
args_.AddInt("stride_x");
}
if (op_def.dst_tensors[0].HasAxis(Axis::HEIGHT)) {
args_.AddInt("kernel_size_y");
args_.AddInt("padding_y");
args_.AddInt("stride_y");
}
if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) {
args_.AddInt("kernel_size_z");
args_.AddInt("padding_z");
args_.AddInt("stride_z");
}
op->AddDstTensor("dst_tensor", dst_desc);
std::string c = GetCommonDefines(op_def.precision);
c += "__kernel void main_function(\n";
......@@ -115,7 +65,8 @@ std::string MaxUnpooling::GetMaxUnpoolingKernelCode(
c += " int linear_id_0 = get_global_id(0);\n";
c += " int X0 = linear_id_0 / args.dst_tensor.Batch();\n";
c += " int B = linear_id_0 % args.dst_tensor.Batch();\n";
c += " int src_x0 = (X0 + args.padding_x) / args.stride_x;\n";
c += " int src_x0 = (X0 + args.padding_x * args.dst_tensor.Batch()) / "
"args.stride_x;\n";
c += " int src_x = src_x0 * args.dst_tensor.Batch() + B;\n";
} else {
c += " int src_x = (X + args.padding_x) / args.stride_x;\n";
......@@ -145,7 +96,8 @@ std::string MaxUnpooling::GetMaxUnpoolingKernelCode(
" int4 ind = convert_int4(args.src_indices.Read(" + src_args + "));\n";
}
if (op_def.dst_tensors[0].HasAxis(Axis::BATCH)) {
c += " int t_x = X0 - (src_x0 * args.stride_x - args.padding_x);\n";
c += " int t_x = X0 - (src_x0 * args.stride_x - args.padding_x * "
"args.dst_tensor.Batch());\n";
} else {
c += " int t_x = X - (src_x * args.stride_x - args.padding_x);\n";
}
......@@ -172,41 +124,37 @@ std::string MaxUnpooling::GetMaxUnpoolingKernelCode(
return c;
}
} // namespace
absl::Status MaxUnpooling::BindArguments() {
if (definition_.dst_tensors[0].HasAxis(Axis::WIDTH)) {
RETURN_IF_ERROR(args_.SetInt("stride_x", stride_.x));
RETURN_IF_ERROR(args_.SetInt("padding_x", padding_.x * src_[0]->Batch()));
RETURN_IF_ERROR(args_.SetInt("kernel_size_x", kernel_size_.x));
}
if (definition_.dst_tensors[0].HasAxis(Axis::HEIGHT)) {
RETURN_IF_ERROR(args_.SetInt("stride_y", stride_.y));
RETURN_IF_ERROR(args_.SetInt("padding_y", padding_.y));
RETURN_IF_ERROR(args_.SetInt("kernel_size_y", kernel_size_.y));
}
if (definition_.dst_tensors[0].HasAxis(Axis::DEPTH)) {
RETURN_IF_ERROR(args_.SetInt("stride_z", stride_.z));
RETURN_IF_ERROR(args_.SetInt("padding_z", padding_.z));
RETURN_IF_ERROR(args_.SetInt("kernel_size_z", kernel_size_.z));
}
return absl::OkStatus();
}
int3 MaxUnpooling::GetGridSize() const {
const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
const int grid_y = dst_[0]->Height() * dst_[0]->Depth();
const int grid_z = dst_[0]->Slices();
return int3(grid_x, grid_y, grid_z);
}
MaxUnpooling CreateMaxUnpooling(const OperationDef& definition,
GPUOperation CreateMaxUnpooling(const OperationDef& definition,
const MaxUnpooling2DAttributes& attr) {
return MaxUnpooling(definition, attr);
GPUOperation op(definition);
op.args_.AddInt("kernel_size_x", attr.kernel.w);
op.args_.AddInt("padding_x", attr.padding.appended.w);
op.args_.AddInt("stride_x", attr.strides.w);
op.args_.AddInt("kernel_size_y", attr.kernel.h);
op.args_.AddInt("padding_y", attr.padding.appended.h);
op.args_.AddInt("stride_y", attr.strides.h);
op.code_ = GetMaxUnpoolingKernelCode(definition, &op);
op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
return op;
}
MaxUnpooling CreateMaxUnpooling(const OperationDef& definition,
GPUOperation CreateMaxUnpooling(const OperationDef& definition,
const MaxUnpooling3DAttributes& attr) {
return MaxUnpooling(definition, attr);
GPUOperation op(definition);
op.args_.AddInt("kernel_size_x", attr.kernel.w);
op.args_.AddInt("padding_x", attr.padding.appended.w);
op.args_.AddInt("stride_x", attr.strides.w);
op.args_.AddInt("kernel_size_y", attr.kernel.h);
op.args_.AddInt("padding_y", attr.padding.appended.h);
op.args_.AddInt("stride_y", attr.strides.h);
op.args_.AddInt("kernel_size_z", attr.kernel.d);
op.args_.AddInt("padding_z", attr.padding.appended.d);
op.args_.AddInt("stride_z", attr.strides.d);
op.code_ = GetMaxUnpoolingKernelCode(definition, &op);
op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
return op;
}
} // namespace cl
......
......@@ -25,34 +25,10 @@ namespace tflite {
namespace gpu {
namespace cl {
class MaxUnpooling : public GPUOperation {
public:
MaxUnpooling(const OperationDef& definition,
const MaxUnpooling2DAttributes& attr);
MaxUnpooling(const OperationDef& definition,
const MaxUnpooling3DAttributes& attr);
absl::Status BindArguments() override;
int3 GetGridSize() const override;
// Move only
MaxUnpooling(MaxUnpooling&& kernel);
MaxUnpooling& operator=(MaxUnpooling&& kernel);
MaxUnpooling(const MaxUnpooling&) = delete;
MaxUnpooling& operator=(const MaxUnpooling&) = delete;
private:
std::string GetMaxUnpoolingKernelCode(const OperationDef& op_def);
int4 stride_;
int4 padding_;
int4 kernel_size_;
};
MaxUnpooling CreateMaxUnpooling(const OperationDef& definition,
GPUOperation CreateMaxUnpooling(const OperationDef& definition,
const MaxUnpooling2DAttributes& attr);
MaxUnpooling CreateMaxUnpooling(const OperationDef& definition,
GPUOperation CreateMaxUnpooling(const OperationDef& definition,
const MaxUnpooling3DAttributes& attr);
} // namespace cl
......
......@@ -55,7 +55,7 @@ TEST_F(OpenCLOperationTest, MaxUnpooling) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
MaxUnpooling operation = CreateMaxUnpooling(op_def, attr);
GPUOperation operation = CreateMaxUnpooling(op_def, attr);
ASSERT_OK(ExecuteGPUOperation({src_tensor, src_ind_tensor},
creation_context_, &operation,
BHWC(1, 4, 4, 1), &dst_tensor));
......
......@@ -252,7 +252,7 @@ absl::Status GPUOperationFromNode(const DeviceInfo& device_info,
case OperationType::MAX_UNPOOLING_2D: {
auto attr =
absl::any_cast<MaxUnpooling2DAttributes>(node.operation.attributes);
SelectMaxUnpooling(attr, op_def, gpu_op);
*gpu_op = SelectMaxUnpooling(attr, op_def);
return absl::OkStatus();
}
case OperationType::MEAN: {
......
......@@ -68,11 +68,9 @@ void SelectPooling(const Pooling2DAttributes& attr, const OperationDef& op_def,
*ptr = absl::make_unique<Pooling>(std::move(pooling));
}
void SelectMaxUnpooling(const MaxUnpooling2DAttributes& attr,
const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr) {
MaxUnpooling operation = CreateMaxUnpooling(op_def, attr);
*ptr = absl::make_unique<MaxUnpooling>(std::move(operation));
std::unique_ptr<GPUOperation> SelectMaxUnpooling(
const MaxUnpooling2DAttributes& attr, const OperationDef& op_def) {
return absl::make_unique<GPUOperation>(CreateMaxUnpooling(op_def, attr));
}
void SelectAdd(const OperationDef& op_def, const std::vector<int>& channels,
......
......@@ -41,9 +41,8 @@ std::unique_ptr<GPUOperation> SelectPReLU(const PReLUAttributes& attr,
void SelectPooling(const Pooling2DAttributes& attr, const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr);
void SelectMaxUnpooling(const MaxUnpooling2DAttributes& attr,
const OperationDef& op_def,
std::unique_ptr<GPUOperation>* ptr);
std::unique_ptr<GPUOperation> SelectMaxUnpooling(
const MaxUnpooling2DAttributes& attr, const OperationDef& op_def);
void SelectAdd(const OperationDef& op_def, const std::vector<int>& channels,
int dst_channels, std::unique_ptr<GPUOperation>* ptr);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册