提交 8959c4ce 编写于 作者: R Raman Sarokin 提交者: TensorFlower Gardener

DepthwiseConvolution(2D/3D) converted to generic GPUOperation.

PiperOrigin-RevId: 328239471
Change-Id: I404f86d58a73b353c49aa56fefcba134026a1eb8
上级 7d262f59
......@@ -66,100 +66,24 @@ std::string GetSrcValue(int channel_multiplier, const std::string coords) {
return c;
}
} // namespace
DepthwiseConvolution::DepthwiseConvolution(
const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr, bool weights_are_buffer)
: GPUOperation(definition),
weights_are_buffer_(weights_are_buffer),
kernel_size_(attr.weights.shape.w, attr.weights.shape.h, 0, 0),
stride_(attr.strides.w, attr.strides.h, 0, 0),
padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, 0, 0),
dilation_(attr.dilations.w, attr.dilations.h, 0, 0),
channel_multiplier_(attr.weights.shape.o) {
work_group_size_ = int3(8, 8, 1);
const bool stride_correction =
definition_.IsBatchSupported() && stride_.x != 1;
code_ = GenerateDepthwiseConvolutionCode(
definition_, stride_correction, channel_multiplier_, weights_are_buffer_);
}
DepthwiseConvolution::DepthwiseConvolution(
const OperationDef& definition,
const DepthwiseConvolution3DAttributes& attr, bool weights_are_buffer)
: GPUOperation(definition),
weights_are_buffer_(weights_are_buffer),
kernel_size_(attr.weights.shape.w, attr.weights.shape.h,
attr.weights.shape.d, 0),
stride_(attr.strides.w, attr.strides.h, attr.strides.d, 0),
padding_(-attr.padding.prepended.w, -attr.padding.prepended.h,
-attr.padding.prepended.d, 0),
dilation_(attr.dilations.w, attr.dilations.h, attr.dilations.d, 0),
channel_multiplier_(attr.weights.shape.o) {
work_group_size_ = int3(8, 8, 1);
const bool stride_correction =
definition_.IsBatchSupported() && stride_.x != 1;
code_ = GenerateDepthwiseConvolutionCode(
definition_, stride_correction, channel_multiplier_, weights_are_buffer_);
}
DepthwiseConvolution::DepthwiseConvolution(DepthwiseConvolution&& operation)
: GPUOperation(std::move(operation)),
weights_are_buffer_(operation.weights_are_buffer_),
kernel_size_(operation.kernel_size_),
stride_(operation.stride_),
padding_(operation.padding_),
dilation_(operation.dilation_),
channel_multiplier_(operation.channel_multiplier_) {}
DepthwiseConvolution& DepthwiseConvolution::operator=(
DepthwiseConvolution&& operation) {
if (this != &operation) {
std::swap(weights_are_buffer_, operation.weights_are_buffer_);
std::swap(kernel_size_, operation.kernel_size_);
std::swap(stride_, operation.stride_);
std::swap(padding_, operation.padding_);
std::swap(dilation_, operation.dilation_);
std::swap(channel_multiplier_, operation.channel_multiplier_);
GPUOperation::operator=(std::move(operation));
}
return *this;
}
std::string DepthwiseConvolution::GenerateDepthwiseConvolutionCode(
const OperationDef& op_def, bool stride_correction, int channel_multiplier,
bool weights_are_buffer) {
std::string GenerateDepthwiseConvolutionCode(const OperationDef& op_def,
bool stride_correction,
int channel_multiplier,
bool weights_are_buffer,
GPUOperation* op) {
auto src_desc = op_def.src_tensors[0];
src_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
if (op_def.IsBatchSupported()) {
src_desc.SetStateVar("BatchedWidth", "true");
}
AddSrcTensor("src_tensor", src_desc);
op->AddSrcTensor("src_tensor", src_desc);
auto dst_desc = op_def.dst_tensors[0];
if (op_def.IsBatchSupported()) {
dst_desc.SetStateVar("BatchedWidth", "true");
}
AddDstTensor("dst_tensor", dst_desc);
args_.AddInt("kernel_size_x");
args_.AddInt("stride_x");
args_.AddInt("padding_x");
args_.AddInt("dilation_x");
args_.AddInt("kernel_size_y");
args_.AddInt("stride_y");
args_.AddInt("padding_y");
args_.AddInt("dilation_y");
if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) {
args_.AddInt("kernel_size_z");
args_.AddInt("stride_z");
args_.AddInt("padding_z");
args_.AddInt("dilation_z");
}
if (!IsSpecializedCase(channel_multiplier)) {
args_.AddInt("ch_multiplier");
}
op->AddDstTensor("dst_tensor", dst_desc);
const auto src_tensor_type = op_def.src_tensors[0].storage_type;
......@@ -171,14 +95,14 @@ std::string DepthwiseConvolution::GenerateDepthwiseConvolutionCode(
c += "__kernel void main_function(\n";
c += "$0) {\n";
c += " int X = get_global_id(0);\n";
c += " int Y = get_global_id(1);\n";
if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) {
c += " int linear_id_2 = get_global_id(2);\n";
c += " int S = linear_id_2 / args.dst_tensor.Depth();\n";
c += " int Z = linear_id_2 % args.dst_tensor.Depth();\n";
c += " int linear_id_1 = get_global_id(1);\n";
c += " int Y = linear_id_1 / args.dst_tensor.Depth();\n";
c += " int Z = linear_id_1 % args.dst_tensor.Depth();\n";
} else {
c += " int S = get_global_id(2);\n";
c += " int Y = get_global_id(1);\n";
}
c += " int S = get_global_id(2);\n";
c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
"S >= args.dst_tensor.Slices()) { \n";
c += " return; \n";
......@@ -186,11 +110,16 @@ std::string DepthwiseConvolution::GenerateDepthwiseConvolutionCode(
c += " ACCUM_FLT4 r = (ACCUM_FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
if (stride_correction) {
c += " int x_offseted = " +
GetXStrideCorrected("X", "args.src_tensor.Batch()", "args.stride_x",
"args.padding_x") +
GetXStrideCorrectedV2("X", "args.src_tensor.Batch()", "args.stride_x",
"args.padding_x") +
";\n";
} else {
c += " int x_offseted = X * args.stride_x + args.padding_x;\n";
if (op_def.IsBatchSupported()) {
c += " int x_offseted = X * args.stride_x + args.padding_x * "
"args.src_tensor.Batch();\n";
} else {
c += " int x_offseted = X * args.stride_x + args.padding_x;\n";
}
}
c += " int y_offseted = Y * args.stride_y + args.padding_y;\n";
std::string weights_offset = "args.kernel_size_x * args.kernel_size_y";
......@@ -218,7 +147,10 @@ std::string DepthwiseConvolution::GenerateDepthwiseConvolutionCode(
c += " int y_c = y_offseted + ky * args.dilation_y;\n";
c += " bool outside_y = y_c < 0 || y_c >= args.src_tensor.Height();\n";
c += " for (int kx = 0; kx < args.kernel_size_x; ++kx) {\n";
c += " int x_c = x_offseted + kx * args.dilation_x;\n";
const std::string dilation_x =
op_def.IsBatchSupported() ? "args.dilation_x * args.src_tensor.Batch()"
: "args.dilation_x";
c += " int x_c = x_offseted + kx * " + dilation_x + ";\n";
c += " bool outside_x = x_c < 0 || x_c >= args.src_tensor.Width();\n";
c += " if (" + check + ") {\n";
if (weights_are_buffer) {
......@@ -252,7 +184,10 @@ std::string DepthwiseConvolution::GenerateDepthwiseConvolutionCode(
c += " for (int ky = 0; ky < args.kernel_size_y; ++ky) {\n";
c += " int y_c = y_offseted + ky * args.dilation_y;\n";
c += " for (int kx = 0; kx < args.kernel_size_x; ++kx) {\n";
c += " int x_c = x_offseted + kx * args.dilation_x;\n";
const std::string dilation_x =
op_def.IsBatchSupported() ? "args.dilation_x * args.src_tensor.Batch()"
: "args.dilation_x";
c += " int x_c = x_offseted + kx * " + dilation_x + ";\n";
c += GetSrcValue(channel_multiplier, flat_coords);
if (weights_are_buffer) {
c += " FLT4 f = args.weights.Read(fx_c);\n";
......@@ -277,67 +212,80 @@ std::string DepthwiseConvolution::GenerateDepthwiseConvolutionCode(
return c;
}
} // namespace
absl::Status DepthwiseConvolution::BindArguments() {
RETURN_IF_ERROR(args_.SetInt("kernel_size_x", kernel_size_.x));
RETURN_IF_ERROR(args_.SetInt("stride_x", stride_.x));
RETURN_IF_ERROR(args_.SetInt("padding_x", padding_.x * src_[0]->Batch()));
RETURN_IF_ERROR(args_.SetInt("dilation_x", dilation_.x * src_[0]->Batch()));
RETURN_IF_ERROR(args_.SetInt("kernel_size_y", kernel_size_.y));
RETURN_IF_ERROR(args_.SetInt("stride_y", stride_.y));
RETURN_IF_ERROR(args_.SetInt("padding_y", padding_.y));
RETURN_IF_ERROR(args_.SetInt("dilation_y", dilation_.y));
if (definition_.dst_tensors[0].HasAxis(Axis::DEPTH)) {
RETURN_IF_ERROR(args_.SetInt("kernel_size_z", kernel_size_.z));
RETURN_IF_ERROR(args_.SetInt("stride_z", stride_.z));
RETURN_IF_ERROR(args_.SetInt("padding_z", padding_.z));
RETURN_IF_ERROR(args_.SetInt("dilation_z", dilation_.z));
}
if (!IsSpecializedCase(channel_multiplier_)) {
RETURN_IF_ERROR(args_.SetInt("ch_multiplier", channel_multiplier_));
}
return absl::OkStatus();
}
int3 DepthwiseConvolution::GetGridSize() const {
const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
const int grid_y = dst_[0]->Height();
const int grid_z = dst_[0]->Slices() * dst_[0]->Depth();
return int3(grid_x, grid_y, grid_z);
}
DepthwiseConvolution CreateDepthwiseConvolution(
GPUOperation CreateDepthwiseConvolution2D(
const DeviceInfo& device_info, const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr) {
bool weights_are_buffer = device_info.IsMali();
DepthwiseConvolution result(definition, attr, weights_are_buffer);
result.UploadWeights(attr.weights);
GPUOperation op(definition);
op.args_.AddInt("kernel_size_x", attr.weights.shape.w);
op.args_.AddInt("stride_x", attr.strides.w);
op.args_.AddInt("padding_x", -attr.padding.prepended.w);
op.args_.AddInt("dilation_x", attr.dilations.w);
op.args_.AddInt("kernel_size_y", attr.weights.shape.h);
op.args_.AddInt("stride_y", attr.strides.h);
op.args_.AddInt("padding_y", -attr.padding.prepended.h);
op.args_.AddInt("dilation_y", attr.dilations.h);
if (!IsSpecializedCase(attr.weights.shape.o)) {
op.args_.AddInt("ch_multiplier", attr.weights.shape.o);
}
const bool stride_correction =
definition.IsBatchSupported() && attr.strides.w != 1;
op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction,
attr.weights.shape.o,
weights_are_buffer, &op);
UploadWeightsForDWConv2D(attr.weights, weights_are_buffer,
definition.precision, &op);
op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
TensorLinearDescriptor desc;
desc.storage_type = weights_are_buffer ? LinearStorageType::BUFFER
: LinearStorageType::TEXTURE_2D;
desc.element_type = definition.GetDataType();
desc.UploadLinearData(attr.bias);
result.args_.AddObject(
op.args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return result;
return op;
}
DepthwiseConvolution CreateDepthwiseConvolution(
GPUOperation CreateDepthwiseConvolution3D(
const DeviceInfo& device_info, const OperationDef& definition,
const DepthwiseConvolution3DAttributes& attr) {
bool weights_are_buffer = device_info.IsMali();
DepthwiseConvolution result(definition, attr, weights_are_buffer);
result.UploadWeights(attr.weights);
GPUOperation op(definition);
op.args_.AddInt("kernel_size_x", attr.weights.shape.w);
op.args_.AddInt("stride_x", attr.strides.w);
op.args_.AddInt("padding_x", -attr.padding.prepended.w);
op.args_.AddInt("dilation_x", attr.dilations.w);
op.args_.AddInt("kernel_size_y", attr.weights.shape.h);
op.args_.AddInt("stride_y", attr.strides.h);
op.args_.AddInt("padding_y", -attr.padding.prepended.h);
op.args_.AddInt("dilation_y", attr.dilations.h);
op.args_.AddInt("kernel_size_z", attr.weights.shape.d);
op.args_.AddInt("stride_z", attr.strides.d);
op.args_.AddInt("padding_z", -attr.padding.prepended.d);
op.args_.AddInt("dilation_z", attr.dilations.d);
if (!IsSpecializedCase(attr.weights.shape.o)) {
op.args_.AddInt("ch_multiplier", attr.weights.shape.o);
}
const bool stride_correction =
definition.IsBatchSupported() && attr.strides.w != 1;
op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction,
attr.weights.shape.o,
weights_are_buffer, &op);
UploadWeightsForDWConv3D(attr.weights, weights_are_buffer,
definition.precision, &op);
op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
TensorLinearDescriptor desc;
desc.storage_type = weights_are_buffer ? LinearStorageType::BUFFER
: LinearStorageType::TEXTURE_2D;
desc.element_type = definition.GetDataType();
desc.UploadLinearData(attr.bias);
result.args_.AddObject(
op.args_.AddObject(
"biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
return result;
return op;
}
} // namespace cl
......
......@@ -35,102 +35,9 @@ namespace tflite {
namespace gpu {
namespace cl {
class DepthwiseConvolution : public GPUOperation {
public:
DepthwiseConvolution() = default;
absl::Status BindArguments() override;
int3 GetGridSize() const override;
// Move only
DepthwiseConvolution(DepthwiseConvolution&& operation);
DepthwiseConvolution& operator=(DepthwiseConvolution&& operation);
DepthwiseConvolution(const DepthwiseConvolution&) = delete;
DepthwiseConvolution& operator=(const DepthwiseConvolution&) = delete;
private:
friend DepthwiseConvolution CreateDepthwiseConvolution(
const DeviceInfo& device_info, const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr);
friend DepthwiseConvolution CreateDepthwiseConvolution(
const DeviceInfo& device_info, const OperationDef& definition,
const DepthwiseConvolution3DAttributes& attr);
DepthwiseConvolution(const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr,
bool weights_are_buffer);
DepthwiseConvolution(const OperationDef& definition,
const DepthwiseConvolution3DAttributes& attr,
bool weights_are_buffer);
template <DataType T>
void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights);
template <DataType S, typename T>
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWI, S>& weights,
absl::Span<T> dst);
template <DataType T>
void UploadWeights(const tflite::gpu::Tensor<OHWDI, T>& weights);
template <DataType S, typename T>
void RearrangeWeightsData(const tflite::gpu::Tensor<OHWDI, S>& weights,
absl::Span<T> dst);
std::string GenerateDepthwiseConvolutionCode(const OperationDef& op_def,
bool stride_correction,
int channel_multiplier,
bool weights_are_buffer);
bool weights_are_buffer_;
int4 kernel_size_;
int4 stride_;
int4 padding_;
int4 dilation_;
int channel_multiplier_;
};
template <DataType T>
void DepthwiseConvolution::UploadWeights(
const tflite::gpu::Tensor<OHWI, T>& weights) {
const int dst_channels = weights.shape.i * weights.shape.o;
const int dst_slices = DivideRoundUp(dst_channels, 4);
const int kernel_x = weights.shape.w;
const int kernel_y = weights.shape.h;
const int elements_count = kernel_x * kernel_y * dst_slices;
const bool fp32_weights = definition_.precision == CalculationsPrecision::F32;
const int float4_size = fp32_weights ? 16 : 8;
std::vector<uint8_t> data(float4_size * elements_count);
if (fp32_weights) {
float4* ptr = reinterpret_cast<float4*>(data.data());
RearrangeWeightsData(weights, absl::MakeSpan(ptr, elements_count));
} else {
half4* ptr = reinterpret_cast<half4*>(data.data());
RearrangeWeightsData(weights, absl::MakeSpan(ptr, elements_count));
}
if (weights_are_buffer_) {
BufferDescriptor desc;
desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
desc.element_size = 4;
desc.size = float4_size * elements_count;
desc.data = std::move(data);
args_.AddObject("weights", absl::make_unique<BufferDescriptor>(desc));
} else {
Texture2DDescriptor desc;
desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
desc.size = int2(kernel_x * kernel_y, dst_slices);
desc.data = std::move(data);
args_.AddObject("weights", absl::make_unique<Texture2DDescriptor>(desc));
}
}
template <DataType S, typename T>
void DepthwiseConvolution::RearrangeWeightsData(
const tflite::gpu::Tensor<OHWI, S>& weights, absl::Span<T> dst) {
void RearrangeWeightsForDWConv2D(const tflite::gpu::Tensor<OHWI, S>& weights,
absl::Span<T> dst) {
const int dst_channels = weights.shape.i * weights.shape.o;
const int dst_depth = DivideRoundUp(dst_channels, 4);
const int kernel_x = weights.shape.w;
......@@ -158,50 +65,50 @@ void DepthwiseConvolution::RearrangeWeightsData(
}
template <DataType T>
void DepthwiseConvolution::UploadWeights(
const tflite::gpu::Tensor<OHWDI, T>& weights) {
void UploadWeightsForDWConv2D(const tflite::gpu::Tensor<OHWI, T>& weights,
bool weights_are_buffer,
CalculationsPrecision precision,
GPUOperation* op) {
const int dst_channels = weights.shape.i * weights.shape.o;
const int dst_slices = DivideRoundUp(dst_channels, 4);
const int kernel_x = weights.shape.w;
const int kernel_y = weights.shape.h;
const int kernel_z = weights.shape.d;
const int elements_count = kernel_x * kernel_y * kernel_z * dst_slices;
const int elements_count = kernel_x * kernel_y * dst_slices;
const bool fp32_weights = definition_.precision == CalculationsPrecision::F32;
const bool fp32_weights = precision == CalculationsPrecision::F32;
const int float4_size = fp32_weights ? 16 : 8;
std::vector<uint8_t> data(float4_size * elements_count);
if (fp32_weights) {
float4* ptr = reinterpret_cast<float4*>(data.data());
RearrangeWeightsData(weights, absl::MakeSpan(ptr, elements_count));
RearrangeWeightsForDWConv2D(weights, absl::MakeSpan(ptr, elements_count));
} else {
half4* ptr = reinterpret_cast<half4*>(data.data());
RearrangeWeightsData(weights, absl::MakeSpan(ptr, elements_count));
RearrangeWeightsForDWConv2D(weights, absl::MakeSpan(ptr, elements_count));
}
if (weights_are_buffer_) {
if (weights_are_buffer) {
BufferDescriptor desc;
desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
desc.element_size = 4;
desc.size = float4_size * elements_count;
desc.data = std::move(data);
args_.AddObject("weights",
absl::make_unique<BufferDescriptor>(std::move(desc)));
op->args_.AddObject("weights", absl::make_unique<BufferDescriptor>(desc));
} else {
Texture2DDescriptor desc;
desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
desc.size = int2(kernel_x * kernel_y * kernel_z, dst_slices);
desc.size = int2(kernel_x * kernel_y, dst_slices);
desc.data = std::move(data);
args_.AddObject("weights",
absl::make_unique<Texture2DDescriptor>(std::move(desc)));
op->args_.AddObject("weights",
absl::make_unique<Texture2DDescriptor>(desc));
}
}
template <DataType S, typename T>
void DepthwiseConvolution::RearrangeWeightsData(
const tflite::gpu::Tensor<OHWDI, S>& weights, absl::Span<T> dst) {
void RearrangeWeightsForDWConv3D(const tflite::gpu::Tensor<OHWDI, S>& weights,
absl::Span<T> dst) {
const int dst_channels = weights.shape.i * weights.shape.o;
const int dst_slices = DivideRoundUp(dst_channels, 4);
const int kernel_x = weights.shape.w;
......@@ -231,11 +138,55 @@ void DepthwiseConvolution::RearrangeWeightsData(
}
}
DepthwiseConvolution CreateDepthwiseConvolution(
template <DataType T>
void UploadWeightsForDWConv3D(const tflite::gpu::Tensor<OHWDI, T>& weights,
bool weights_are_buffer,
CalculationsPrecision precision,
GPUOperation* op) {
const int dst_channels = weights.shape.i * weights.shape.o;
const int dst_slices = DivideRoundUp(dst_channels, 4);
const int kernel_x = weights.shape.w;
const int kernel_y = weights.shape.h;
const int kernel_z = weights.shape.d;
const int elements_count = kernel_x * kernel_y * kernel_z * dst_slices;
const bool fp32_weights = precision == CalculationsPrecision::F32;
const int float4_size = fp32_weights ? 16 : 8;
std::vector<uint8_t> data(float4_size * elements_count);
if (fp32_weights) {
float4* ptr = reinterpret_cast<float4*>(data.data());
RearrangeWeightsForDWConv3D(weights, absl::MakeSpan(ptr, elements_count));
} else {
half4* ptr = reinterpret_cast<half4*>(data.data());
RearrangeWeightsForDWConv3D(weights, absl::MakeSpan(ptr, elements_count));
}
if (weights_are_buffer) {
BufferDescriptor desc;
desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
desc.element_size = 4;
desc.size = float4_size * elements_count;
desc.data = std::move(data);
op->args_.AddObject("weights",
absl::make_unique<BufferDescriptor>(std::move(desc)));
} else {
Texture2DDescriptor desc;
desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
desc.size = int2(kernel_x * kernel_y * kernel_z, dst_slices);
desc.data = std::move(data);
op->args_.AddObject(
"weights", absl::make_unique<Texture2DDescriptor>(std::move(desc)));
}
}
GPUOperation CreateDepthwiseConvolution2D(
const DeviceInfo& device_info, const OperationDef& definition,
const DepthwiseConvolution2DAttributes& attr);
DepthwiseConvolution CreateDepthwiseConvolution(
GPUOperation CreateDepthwiseConvolution3D(
const DeviceInfo& device_info, const OperationDef& definition,
const DepthwiseConvolution3DAttributes& attr);
......
......@@ -55,7 +55,7 @@ TEST_F(OpenCLOperationTest, DepthwiseConvSimpleWeights) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
DepthwiseConvolution operation = CreateDepthwiseConvolution(
GPUOperation operation = CreateDepthwiseConvolution2D(
creation_context_.GetDeviceInfo(), op_def, attr);
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 2, 2, 2), &dst_tensor));
......@@ -90,7 +90,7 @@ TEST_F(OpenCLOperationTest, DepthwiseConvNoMultiplier) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
DepthwiseConvolution operation = CreateDepthwiseConvolution(
GPUOperation operation = CreateDepthwiseConvolution2D(
creation_context_.GetDeviceInfo(), op_def, attr);
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 2, 2, 2), &dst_tensor));
......@@ -126,7 +126,7 @@ TEST_F(OpenCLOperationTest, DepthwiseConvMultiplier2) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
DepthwiseConvolution operation = CreateDepthwiseConvolution(
GPUOperation operation = CreateDepthwiseConvolution2D(
creation_context_.GetDeviceInfo(), op_def, attr);
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 2, 2, 4), &dst_tensor));
......
......@@ -33,8 +33,8 @@ std::unique_ptr<GPUOperation> SelectDWConvolutionAdreno(
return absl::make_unique<DepthwiseConv3x3>(
CreateDepthwiseConv3x3(device_info, op_def, attr));
} else {
return absl::make_unique<DepthwiseConvolution>(
CreateDepthwiseConvolution(device_info, op_def, attr));
return absl::make_unique<GPUOperation>(
CreateDepthwiseConvolution2D(device_info, op_def, attr));
}
}
......@@ -45,8 +45,8 @@ std::unique_ptr<GPUOperation> SelectDWConvolutionPowerVR(
return absl::make_unique<DepthwiseConv3x3>(
CreateDepthwiseConv3x3(device_info, op_def, attr));
} else {
return absl::make_unique<DepthwiseConvolution>(
CreateDepthwiseConvolution(device_info, op_def, attr));
return absl::make_unique<GPUOperation>(
CreateDepthwiseConvolution2D(device_info, op_def, attr));
}
}
......@@ -62,8 +62,8 @@ std::unique_ptr<GPUOperation> SelectDWConvolutionMali(
return absl::make_unique<DepthwiseConv3x3>(
CreateDepthwiseConv3x3(device_info, op_def, attr));
} else {
return absl::make_unique<DepthwiseConvolution>(
CreateDepthwiseConvolution(device_info, op_def, attr));
return absl::make_unique<GPUOperation>(
CreateDepthwiseConvolution2D(device_info, op_def, attr));
}
}
} // namespace
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册