提交 19dcf2c3 编写于 作者: L liuqi

Feature: Remove the BufferTransform ops of GPU initialization phase.

1. Remove the BufferTransform ops of GPU initialization phase.
2. Add tranformer to tranform the memory type between CPU and GPU.
上级 4983fcb2
......@@ -77,9 +77,12 @@ SerialNet::SerialNet(const OpRegistryBase *op_registry,
}
MaceStatus SerialNet::Init() {
// TODO(liuqi): where to do memory reuse.
MACE_LATENCY_LOGGER(1, "Initializing SerialNet");
OpInitContext init_context(ws_);
// TODO(liuqi): where to do memory reuse.
if (target_device_->device_type() == DeviceType::GPU) {
}
for (auto iter = operators_.begin(); iter != operators_.end(); ++iter) {
auto &op = *iter;
DeviceType device_type = op->device_type();
......
......@@ -131,14 +131,14 @@ class Operation {
}
inline void set_debug_def(
const std::shared_ptr<const OperatorDef> &operator_def) {
const std::shared_ptr<OperatorDef> &operator_def) {
operator_def_ = operator_def;
}
inline bool has_debug_def() const { return operator_def_ != nullptr; }
protected:
std::shared_ptr<const OperatorDef> operator_def_;
std::shared_ptr<OperatorDef> operator_def_;
std::vector<const Tensor *> inputs_;
std::vector<Tensor *> outputs_;
......
......@@ -222,6 +222,17 @@ class Tensor {
return buffer_ != nullptr && !buffer_->OnHost() && !has_opencl_image();
}
inline MemoryType memory_type() const {
MACE_CHECK(buffer_ != nullptr, "Tensor ", name_, " is empty" );
if (buffer_->OnHost()) {
return MemoryType::CPU_BUFFER;
} else if (typeid(*buffer_) == typeid(Image)) {
return MemoryType::GPU_IMAGE;
} else {
return MemoryType::GPU_BUFFER;
}
}
#ifdef MACE_ENABLE_OPENCL
inline cl::Image *opencl_image() const {
MACE_CHECK(has_opencl_image(), name_, " do not have image");
......
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_CORE_TRANSFORMER_H_
#define MACE_CORE_TRANSFORMER_H_
#include "mace/proto/mace.pb.h"
namespace mace {
class TransformerBase {
public:
// Construct transform operation.
virtual std::vector<std::unique_ptr<OperatorDef>> ConstructTranformOp(
OperatorDef *op_def,
bool transform_filter = true) = 0;
};
} // namespace mace
#endif // MACE_CORE_TRANSFORMER_H_
......@@ -31,7 +31,7 @@ cc_library(
"ops_registry.cc",
"ops_test_util.cc",
"buffer_inverse_transform.cc",
"buffer_transform.cc",
"buffer_transformer.cc",
"lstm_cell.cc",
"winograd_transform.cc",
"quantize.cc",
......@@ -42,7 +42,7 @@ cc_library(
"opencl/image/*.cc",
"opencl/buffer/*.cc",
"buffer_inverse_transform.cc",
"buffer_transform.cc",
"buffer_transformer.cc",
"lstm_cell.cc",
"winograd_transform.cc",
],
......
......@@ -19,6 +19,7 @@
#include "mace/core/operator.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/buffer_transformer.h"
#include "mace/ops/opencl/image/activation.h"
#endif // MACE_ENABLE_OPENCL
......@@ -79,12 +80,19 @@ class ActivationOp<DeviceType::GPU, T> : public Operation {
"NOOP"));
auto relux_max_limit = static_cast<T>(
Operation::GetOptionalArg<float>("max_limit", 0.0f));
MemoryType mem_type;
if (context->device()->opencl_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE;
kernel_.reset(
new opencl::image::ActivationKernel<T>(type, relux_max_limit));
} else {
MACE_NOT_IMPLEMENTED;
}
if (type == ActivationType::PRELU) {
MACE_CHECK(TransformFilter<T>(
context, operator_def_.get(), 1, BufferType::ARGUMENT, mem_type)
== MaceStatus::MACE_SUCCESS);
}
}
MaceStatus Run(OpContext *context) override {
const Tensor *input = this->Input(0);
......
......@@ -237,12 +237,10 @@ void TestSimplePrelu() {
if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
ops::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(&net, "Alpha", "AlphaImage",
ops::BufferType::ARGUMENT);
OpDefBuilder("Activation", "PreluTest")
.Input("InputImage")
.Input("AlphaImage")
.Input("Alpha")
.Output("OutputImage")
.AddStringArg("activation", "PRELU")
.Finalize(net.NewOperatorDef());
......
......@@ -19,6 +19,7 @@
#include "mace/core/operator.h"
#include "mace/ops/activation.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/buffer_transformer.h"
#include "mace/ops/opencl/image/batch_norm.h"
#endif // MACE_ENABLE_OPENCL
......@@ -147,12 +148,25 @@ class BatchNormOp<DeviceType::GPU, T> : public Operation {
ActivationType activation = ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", "NOOP"));
float relux_max_limit = Operation::GetOptionalArg<float>("max_limit", 0.0f);
MemoryType mem_type;
if (context->device()->opencl_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE;
kernel_.reset(new opencl::image::BatchNormKernel<T>(
epsilon, activation, relux_max_limit));
} else {
MACE_NOT_IMPLEMENTED;
}
// Transform filters
int input_size = operator_def_->input_size();
for (int i = 0; i < input_size; ++i) {
const Tensor *input_tensor = context->workspace()->GetTensor(
operator_def_->input(i));
if (input_tensor != nullptr && input_tensor->is_weight()) {
MACE_CHECK(TransformFilter<T>(
context, operator_def_.get(), i, BufferType::ARGUMENT, mem_type)
== MaceStatus::MACE_SUCCESS);
}
}
}
MaceStatus Run(OpContext *context) override {
bool not_folded = this->InputSize() == 5;
......
......@@ -19,6 +19,7 @@
#include "mace/core/operator.h"
#include "mace/ops/activation.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/buffer_transformer.h"
#include "mace/ops/opencl/image/bias_add.h"
#endif // MACE_ENABLE_OPENCL
......@@ -99,11 +100,16 @@ class BiasAddOp<DeviceType::GPU, T> : public Operation {
: Operation(context),
data_format_(static_cast<DataFormat>(Operation::GetOptionalArg<int>(
"data_format", NHWC))) {
MemoryType mem_type;
if (context->device()->opencl_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE;
kernel_.reset(new opencl::image::BiasAddKernel<T>);
} else {
MACE_NOT_IMPLEMENTED;
}
MACE_CHECK(TransformFilter<T>(
context, operator_def_.get(), 1, BufferType::ARGUMENT, mem_type)
== MaceStatus::MACE_SUCCESS);
}
MaceStatus Run(OpContext *context) override {
const Tensor *input = this->Input(0);
......
......@@ -51,7 +51,7 @@ class BufferInverseTransformOp<DeviceType::GPU, T> : public Operation {
private:
const int wino_blk_size_;
std::unique_ptr<OpenCLBufferInverseTransformKernel> kernel_;
std::unique_ptr<OpenCLBufferTransformKernel> kernel_;
};
......
......@@ -15,8 +15,7 @@
#include <memory>
#include "mace/core/operator.h"
#include "mace/ops/opencl/buffer/buffer_transform.h"
#include "mace/ops/opencl/image/buffer_to_image.h"
#include "mace/ops/opencl/buffer_transformer.h"
namespace mace {
namespace ops {
......@@ -29,12 +28,16 @@ class BufferTransformOp<DeviceType::GPU, T> : public Operation {
public:
explicit BufferTransformOp(OpConstructContext *context)
: Operation(context),
wino_blk_size_(Operation::GetOptionalArg<int>("wino_block_size", 2)) {
wino_blk_size_(Operation::GetOptionalArg<int>("wino_block_size", 2)),
out_mem_type_(MemoryType::GPU_BUFFER),
transformer_(nullptr) {
MemoryType in_mem_type = context->workspace()->GetTensor(
operator_def_->input(0))->memory_type();
if (context->device()->opencl_runtime()->UseImageMemory()) {
kernel_.reset(new opencl::image::BufferToImage<T>);
} else {
kernel_.reset(new opencl::buffer::BufferTransform<T>);
out_mem_type_ = MemoryType::GPU_IMAGE;
}
transformer_.reset(new OpenCLBufferTransformer<T>(in_mem_type,
out_mem_type_));
}
MaceStatus Run(OpContext *context) override {
......@@ -45,13 +48,14 @@ class BufferTransformOp<DeviceType::GPU, T> : public Operation {
static_cast<ops::BufferType>(Operation::GetOptionalArg<int>(
"buffer_type", static_cast<int>(ops::CONV2D_FILTER)));
return kernel_->Compute(context, input, type,
wino_blk_size_, output);
return transformer_->Transform(
context, input, type, wino_blk_size_, out_mem_type_, output);
}
private:
const int wino_blk_size_;
std::unique_ptr<OpenCLBufferTransformKernel> kernel_;
MemoryType out_mem_type_;
std::unique_ptr<OpenCLBufferTransformer<T>> transformer_;
};
......
......@@ -38,8 +38,9 @@
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/conv_2d.h"
#include "mace/ops/opencl/buffer_transformer.h"
#include "mace/ops/opencl/buffer/conv_2d.h"
#include "mace/ops/opencl/image/conv_2d.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
......@@ -960,11 +961,23 @@ class Conv2dOp<DeviceType::GPU, T> : public ConvPool2dOpBase {
Operation::GetOptionalArg<std::string>("activation",
"NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {
MemoryType mem_type;
if (context->device()->opencl_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE;
kernel_.reset(new opencl::image::Conv2dKernel<T>);
} else {
mem_type = MemoryType::GPU_BUFFER;
kernel_.reset(new opencl::buffer::Conv2dKernel<T>);
}
// Transform filter tensor to target format
MACE_CHECK(TransformFilter<T>(
context, operator_def_.get(), 1, BufferType::CONV2D_FILTER, mem_type)
== MaceStatus::MACE_SUCCESS);
if (operator_def_->input_size() > 2) {
MACE_CHECK(TransformFilter<T>(
context, operator_def_.get(), 2, BufferType::ARGUMENT, mem_type)
== MaceStatus::MACE_SUCCESS);
}
}
MaceStatus Run(OpContext *context) override {
const Tensor *input = this->Input(INPUT);
......
......@@ -34,8 +34,9 @@
#include "mace/public/mace.h"
#include "mace/utils/quantize.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/depthwise_conv2d.h"
#include "mace/ops/opencl/buffer_transformer.h"
#include "mace/ops/opencl/buffer/depthwise_conv2d.h"
#include "mace/ops/opencl/image/depthwise_conv2d.h"
#endif // MACE_ENABLE_OPENCL
namespace mace {
......@@ -490,11 +491,23 @@ class DepthwiseConv2dOp<DeviceType::GPU, T> : public DepthwiseConv2dOpBase {
public:
explicit DepthwiseConv2dOp(OpConstructContext *context)
: DepthwiseConv2dOpBase(context) {
MemoryType mem_type;
if (context->device()->opencl_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE;
kernel_.reset(new opencl::image::DepthwiseConv2dKernel<T>);
} else {
mem_type = MemoryType::GPU_BUFFER;
kernel_.reset(new opencl::buffer::DepthwiseConv2dKernel<T>);
}
// Transform filter tensor to target format
MACE_CHECK(TransformFilter<T>(
context, operator_def_.get(), 1, BufferType::DW_CONV2D_FILTER, mem_type)
== MaceStatus::MACE_SUCCESS);
if (operator_def_->input_size() > 2) {
MACE_CHECK(TransformFilter<T>(
context, operator_def_.get(), 2, BufferType::ARGUMENT, mem_type)
== MaceStatus::MACE_SUCCESS);
}
}
MaceStatus Run(OpContext *context) override {
const Tensor *input = this->Input(INPUT);
......
......@@ -26,6 +26,7 @@
#include "mace/core/tensor.h"
#include "mace/utils/quantize.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/buffer_transformer.h"
#include "mace/ops/opencl/image/eltwise.h"
#endif // MACE_ENABLE_OPENCL
......@@ -1086,12 +1087,25 @@ class EltwiseOp<DeviceType::GPU, T> : public Operation {
float scalar_input = Operation::GetOptionalArg<float>("scalar_input", 1.0);
int32_t scalar_input_index = Operation::GetOptionalArg<int32_t>(
"scalar_input_index", 1);
MemoryType mem_type;
if (context->device()->opencl_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE;
kernel_.reset(new opencl::image::EltwiseKernel<T>(
type, coeff, scalar_input, scalar_input_index));
} else {
MACE_NOT_IMPLEMENTED;
}
// Transform filters
int input_size = operator_def_->input_size();
for (int i = 0; i < input_size; ++i) {
const Tensor *input_tensor = context->workspace()->GetTensor(
operator_def_->input(i));
if (input_tensor != nullptr && input_tensor->is_weight()) {
MACE_CHECK(TransformFilter<T>(
context, operator_def_.get(), i, BufferType::ARGUMENT, mem_type)
== MaceStatus::MACE_SUCCESS);
}
}
}
MaceStatus Run(OpContext *context) override {
const Tensor *input0 = this->Input(0);
......
......@@ -27,6 +27,7 @@
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/buffer_transformer.h"
#include "mace/ops/opencl/image/fully_connected.h"
#endif // MACE_ENABLE_OPENCL
......@@ -192,11 +193,22 @@ class FullyConnectedOp<DeviceType::GPU, T> : public FullyConnectedOpBase {
public:
explicit FullyConnectedOp(OpConstructContext *context)
: FullyConnectedOpBase(context) {
MemoryType mem_type;
if (context->device()->opencl_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE;
kernel_.reset(new opencl::image::FullyConnectedKernel<T>);
} else {
MACE_NOT_IMPLEMENTED;
}
// Transform filter tensor to target format
MACE_CHECK(TransformFilter<T>(
context, operator_def_.get(), 1, BufferType::WEIGHT_WIDTH, mem_type)
== MaceStatus::MACE_SUCCESS);
if (operator_def_->input_size() > 2) {
MACE_CHECK(TransformFilter<T>(
context, operator_def_.get(), 2, BufferType::ARGUMENT, mem_type)
== MaceStatus::MACE_SUCCESS);
}
}
MaceStatus Run(OpContext *context) override {
const Tensor *input = this->Input(INPUT);
......
......@@ -15,7 +15,7 @@
#ifndef MACE_OPS_OPENCL_BUFFER_BUFFER_INVERSE_TRANSFORM_H_
#define MACE_OPS_OPENCL_BUFFER_BUFFER_INVERSE_TRANSFORM_H_
#include "mace/ops/opencl/buffer_inverse_transform.h"
#include "mace/ops/opencl/buffer_transformer.h"
#include "mace/core/op_context.h"
#include "mace/core/tensor.h"
......@@ -34,7 +34,7 @@ MaceStatus BufferTypeTransform(
Tensor *output);
template <typename T>
class BufferInverseTransform: public OpenCLBufferInverseTransformKernel {
class BufferInverseTransform: public OpenCLBufferTransformKernel {
public:
MaceStatus Compute(OpContext *context,
const Tensor *input,
......
......@@ -15,7 +15,7 @@
#ifndef MACE_OPS_OPENCL_BUFFER_BUFFER_TRANSFORM_H_
#define MACE_OPS_OPENCL_BUFFER_BUFFER_TRANSFORM_H_
#include "mace/ops/opencl/buffer_transform.h"
#include "mace/ops/opencl/buffer_transform_kernel.h"
#include <vector>
......
......@@ -12,18 +12,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_OPENCL_BUFFER_TRANSFORM_H_
#define MACE_OPS_OPENCL_BUFFER_TRANSFORM_H_
#ifndef MACE_OPS_OPENCL_BUFFER_TRANSFORM_KERNEL_H_
#define MACE_OPS_OPENCL_BUFFER_TRANSFORM_KERNEL_H_
#include "mace/ops/opencl/common.h"
#include "mace/public/mace.h"
#include "mace/utils/utils.h"
namespace mace {
class OpContext;
class Tensor;
namespace ops {
class OpenCLBufferTransformKernel {
public:
......@@ -32,10 +30,9 @@ class OpenCLBufferTransformKernel {
const BufferType type,
const int wino_blk_size,
Tensor *output) = 0;
MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLBufferTransformKernel)
MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLBufferTransformKernel)
};
} // namespace ops
} // namespace mace
#endif // MACE_OPS_OPENCL_BUFFER_TRANSFORM_H_
#endif // MACE_OPS_OPENCL_BUFFER_TRANSFORM_KERNEL_H_
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/opencl/buffer_transformer.h"
namespace mace {
namespace ops {
std::string TransformedName(const std::string &name) {
// TODO(liuqi): This may create a conflict.
const char *postfix = "_mace_identity_transformed";
return name + postfix;
}
} // namespace ops
} // namespace mace
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_OPENCL_BUFFER_TRANSFORMER_H_
#define MACE_OPS_OPENCL_BUFFER_TRANSFORMER_H_
#include "mace/core/operator.h"
#include "mace/ops/opencl/common.h"
#include "mace/ops/opencl/image/buffer_to_image.h"
#include "mace/ops/opencl/image/image_to_buffer.h"
#include "mace/ops/opencl/buffer/buffer_transform.h"
namespace mace {
namespace ops {
// Only used for GPU Operation(BufferTransform)
template <typename T>
class OpenCLBufferTransformer {
public:
OpenCLBufferTransformer(const MemoryType in_mem_type,
const MemoryType out_mem_type) {
if (out_mem_type == MemoryType::GPU_IMAGE) {
kernel_.reset(new opencl::image::BufferToImage<T>);
} else if (in_mem_type == MemoryType::GPU_IMAGE){
kernel_.reset(new opencl::image::ImageToBuffer<T>);
} else {
kernel_.reset(new opencl::buffer::BufferTransform<T>);
}
}
MaceStatus Transform(OpContext *context,
const Tensor *input,
const BufferType type,
const int wino_blk_size,
const MemoryType out_mem_type,
Tensor *output) {
Workspace *ws = context->workspace();
DataType dt = DataTypeToEnum<T>::value;
MemoryType in_mem_type = input->memory_type();
if (out_mem_type == MemoryType::GPU_IMAGE ||
out_mem_type == MemoryType::GPU_BUFFER) {
if (in_mem_type != MemoryType::CPU_BUFFER) {
return kernel_->Compute(
context, input, type, wino_blk_size, output);
} else {
// convert to the GPU Buffer with the input's data type.
Tensor *internal_tensor = ws->CreateTensor(
InternalTransformedName(input->name()),
context->device()->allocator(), input->dtype());
output->Resize(input->shape());
const uint8_t *input_ptr = input->data<uint8_t>();
Tensor::MappingGuard guard(internal_tensor);
uint8_t *internal_ptr = internal_tensor->mutable_data<uint8_t>();
memcpy(internal_ptr, input_ptr, input->raw_size());
// convert the internal GPU Buffer to output.
return kernel_->Compute(
context, internal_tensor, type, wino_blk_size, output);
}
} else { // out_mem_type == MemoryType::CPU_BUFFER
// convert to the GPU Buffer with the output's data type.
Tensor internal_tensor(context->device()->allocator(),
dt,
false,
InternalTransformedName(input->name()));
MACE_RETURN_IF_ERROR(kernel_->Compute(
context, input, type, wino_blk_size, &internal_tensor));
// convert the internal GPU Buffer to output.
Tensor::MappingGuard guard(&internal_tensor);
const T *internal_ptr = internal_tensor.data<T>();
output->Resize(internal_tensor.shape());
T *output_ptr = output->mutable_data<T>();
memcpy(output_ptr, internal_ptr, internal_tensor.size() * sizeof(T));
return MaceStatus::MACE_SUCCESS;
}
}
private:
std::string InternalTransformedName(const std::string &name) {
// TODO(liuqi): This may create a conflict.
const char *postfix = "_mace_identity_internal";
return name + postfix;
}
private:
std::unique_ptr<OpenCLBufferTransformKernel> kernel_;
};
std::string TransformedName(const std::string &name);
template <typename T>
MaceStatus TransformFilter(
mace::OpConstructContext *context,
OperatorDef *op_def,
const int input_idx,
const BufferType buffer_type,
const MemoryType mem_type) {
const DataType dt = DataTypeToEnum<T>::value;
OpContext op_context(context->workspace(), context->device());
Workspace *ws = context->workspace();
std::string input_name = op_def->input(input_idx);
Tensor *input = ws->GetTensor(input_name);
std::string output_name = TransformedName(input_name);
Tensor *output =
ws->CreateTensor(output_name, context->device()->allocator(), dt);
// update the information
op_def->set_input(input_idx, output_name);
input->MarkUnused();
return OpenCLBufferTransformer<T>(input->memory_type(), mem_type).
Transform(&op_context, input, buffer_type, 0, mem_type, output);
}
} // namespace ops
} // namespace mace
#endif // MACE_OPS_OPENCL_BUFFER_TRANSFORMER_H_
......@@ -45,7 +45,7 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS
DATA_TYPE4 out;
#if ELTWISE_TYPE == 0
#ifdef COEFF_SUM
out = mad(coeff1, in0, mad(coeff0, in1, 0));
out = mad(coeff0, in0, mad(coeff1, in1, 0));
#else
out = in0 + in1;
#endif
......
......@@ -29,7 +29,6 @@ enum BufferType {
WEIGHT_HEIGHT = 7,
WEIGHT_WIDTH = 8,
};
} // namespace ops
} // namespace mace
#endif // MACE_OPS_OPENCL_COMMON_H_
......@@ -15,7 +15,7 @@
#ifndef MACE_OPS_OPENCL_IMAGE_BUFFER_TO_IMAGE_H_
#define MACE_OPS_OPENCL_IMAGE_BUFFER_TO_IMAGE_H_
#include "mace/ops/opencl/buffer_transform.h"
#include "mace/ops/opencl/buffer_transform_kernel.h"
#include <set>
#include <string>
......
......@@ -20,7 +20,7 @@
#include <vector>
#include "mace/core/op_context.h"
#include "mace/ops/opencl/buffer_inverse_transform.h"
#include "mace/ops/opencl/buffer_transform_kernel.h"
#include "mace/ops/opencl/helper.h"
namespace mace {
......@@ -29,7 +29,7 @@ namespace opencl {
namespace image {
template <typename T>
class ImageToBuffer : public OpenCLBufferInverseTransformKernel {
class ImageToBuffer : public OpenCLBufferTransformKernel {
public:
MaceStatus Compute(OpContext *context,
const Tensor *input,
......
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/transformer.h"
#include <string>
#include <memory>
namespace mace {
namespace ops {
std::unique_ptr<OperatorDef> Transformer::DoTransform(
mace::OperatorDef *op_def,
const int input_idx,
const mace::DataType dt,
const BufferType buffer_type,
const MemoryType mem_type) {
int32_t device = op_def->device_type();
std::string input_name = op_def->input(input_idx);
std::string output_name = input_name + "_transformed";
op_def->set_input(input_idx, output_name);
std::unique_ptr<OperatorDef> op(new OperatorDef);
op->set_name(output_name);
op->set_type("BufferTransform");
op->add_input(input_name);
op->add_output(output_name);
Argument *arg = op->add_arg();
arg->set_name("buffer_type");
arg->set_i(static_cast<int32_t>(buffer_type));
arg = op->add_arg();
arg->set_name("mem_type");
arg->set_i(static_cast<int32_t>(mem_type));
arg = op->add_arg();
arg->set_name("T");
arg->set_i(static_cast<int32_t>(dt));
arg = op->add_arg();
arg->set_name("device");
arg->set_i(device);
return std::move(op);
}
} // namespace ops
} // namespace mace
......@@ -12,30 +12,33 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_OPENCL_BUFFER_INVERSE_TRANSFORM_H_
#define MACE_OPS_OPENCL_BUFFER_INVERSE_TRANSFORM_H_
#ifndef MACE_KERNELS_TRANSFORMER_H_
#define MACE_KERNELS_TRANSFORMER_H_
#include "mace/core/transformer.h"
#include "mace/ops/opencl/common.h"
#include "mace/public/mace.h"
#include "mace/utils/utils.h"
namespace mace {
class OpContext;
class Tensor;
namespace ops {
class OpenCLBufferInverseTransformKernel {
class Transformer : public TransformerBase {
public:
virtual MaceStatus Compute(OpContext *context,
const Tensor *input,
const BufferType type,
const int wino_blk_size,
Tensor *output) = 0;
MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLBufferInverseTransformKernel)
// Transform source tensor to target.
std::vector<std::unique_ptr<OperatorDef>> ConstructTranformOp(
OperatorDef *op_def,
bool transform_filter = true) override;
private:
std::unique_ptr<OperatorDef> DoTransform(
mace::OperatorDef *op_def,
const int input_idx,
const mace::DataType dt,
const BufferType buffer_type,
const MemoryType mem_type);
};
} // namespace ops
} // namespace mace
#endif // MACE_OPS_OPENCL_BUFFER_INVERSE_TRANSFORM_H_
#endif // MACE_KERNELS_TENSOR_TRANSFORMER_H_
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册