提交 c4d04f5b 编写于 作者: L Liangliang He

Merge branch 'concat-opencl' into 'master'

Finish concat opencl kernel(just support channel dim).

See merge request !142
......@@ -147,6 +147,7 @@ const std::map<std::string, std::string>
{"depthwise_conv_3x3", "depthwise_conv_3x3.cl"},
{"pooling", "pooling.cl"},
{"relu", "relu.cl"},
{"concat", "concat.cl"},
{"resize_bilinear", "resize_bilinear.cl"},
{"space_to_batch", "space_to_batch.cl"},
{"buffer_to_image", "buffer_to_image.cl"},
......
......@@ -8,25 +8,64 @@
#include "mace/core/common.h"
#include "mace/core/types.h"
#include "mace/proto/mace.pb.h"
#include "mace/core/tensor.h"
namespace mace {
namespace kernels {
template <DeviceType D, typename T>
struct ConcatFunctor {
void operator()(std::vector<const T *> &input_list,
const index_t inner_dim,
const index_t *outer_dims,
T *output) {
const size_t input_count = input_list.size();
for (int inner_idx = 0; inner_idx < inner_dim; ++inner_idx) {
for (size_t i = 0; i < input_count; ++i) {
struct ConcatFunctorBase {
ConcatFunctorBase(const int32_t axis): axis_(axis){}
int32_t axis_;
};
template<DeviceType D, typename T>
struct ConcatFunctor : ConcatFunctorBase {
ConcatFunctor(const int32_t axis): ConcatFunctorBase(axis){}
void operator()(const std::vector<const Tensor *> &input_list,
Tensor *output) {
const Tensor *input0 = input_list.front();
const int inputs_count = input_list.size();
std::vector<index_t> output_shape(input0->shape());
index_t inner_size = 1;
for (int i = 0; i < axis_; ++i) {
inner_size *= output_shape[i];
}
std::vector<index_t> outer_sizes(inputs_count, 0);
outer_sizes[0] = input0->size() / inner_size;
for (int i = 1; i < inputs_count; ++i) {
const Tensor *input = input_list[i];
MACE_CHECK(input->dim_size() == input0->dim_size(),
"Ranks of all input tensors must be same.");
for (int j = 0; j < input->dim_size(); ++j) {
if (j == axis_) {
continue;
}
MACE_CHECK(input->dim(j) == input0->dim(j),
"Dimensions of inputs should equal except axis.");
}
outer_sizes[i] = input->size() / inner_size;
output_shape[axis_] += input->dim(axis_);
}
output->Resize(output_shape);
T *output_ptr = output->mutable_data<T>();
std::vector<const T *> input_ptrs(input_list.size(), nullptr);
for (size_t i = 0; i < inputs_count; ++i) {
input_ptrs[i] = input_list[i]->data<T>();
}
for (int inner_idx = 0; inner_idx < inner_size; ++inner_idx) {
for (size_t i = 0; i < inputs_count; ++i) {
if (DataTypeCanUseMemcpy(DataTypeToEnum<T>::v())) {
memcpy(output, input_list[i], outer_dims[i] * sizeof(T));
output += outer_dims[i];
input_list[i] += outer_dims[i];
memcpy(output_ptr, input_ptrs[i], outer_sizes[i] * sizeof(T));
output_ptr += outer_sizes[i];
input_ptrs[i] += outer_sizes[i];
} else {
for (index_t k = 0; k < outer_dims[i]; ++k) {
*output++ = *input_list[i]++;
for (index_t k = 0; k < outer_sizes[i]; ++k) {
*output_ptr++ = *input_ptrs[i]++;
}
}
}
......@@ -34,6 +73,15 @@ struct ConcatFunctor {
}
};
template<typename T>
struct ConcatFunctor<DeviceType::OPENCL, T> : ConcatFunctorBase{
ConcatFunctor(const int32_t axis): ConcatFunctorBase(axis){}
void operator()(const std::vector<const Tensor *> &input_list,
Tensor *output);
};
} // namepsace kernels
} // namespace mace
......
#include <common.h>
DATA_TYPE4 stitch_vector(DATA_TYPE4 left,
DATA_TYPE4 right,
const int pos,
const bool reversed) {
if (!reversed) {
switch (pos) {
case 1:return (DATA_TYPE4)(left.x, right.x, right.y, right.z);
case 2:return (DATA_TYPE4)(left.x, left.y, right.x, right.y);
case 3:return (DATA_TYPE4)(left.x, left.y, left.z, right.x);
default:return (DATA_TYPE4) 0;
}
} else {
switch (pos) {
case 1:return (DATA_TYPE4)(left.w, right.x, right.y, right.z);
case 2:return (DATA_TYPE4)(left.z, left.w, right.x, right.y);
case 3:return (DATA_TYPE4)(left.y, left.z, left.w, right.x);
default:return (DATA_TYPE4) 0;
}
}
}
// Supported data type: half/float
__kernel void concat_channel(__read_only image2d_t input0,
__read_only image2d_t input1,
__private const int input0_chan,
__write_only image2d_t output) {
const int chan_blk_idx = get_global_id(0);
const int width_idx = get_global_id(1);
const int width = get_global_size(1);
const int hb_idx = get_global_id(2);
const int input0_chan_blk = (input0_chan + 3) / 4;
DATA_TYPE4 data = 0;
#ifdef DIVISIBLE_FOUR
if (chan_blk_idx + 1 <= input0_chan_blk) {
data = READ_IMAGET(input0,
SAMPLER,
(int2)(chan_blk_idx * width + width_idx, hb_idx));
} else {
data = READ_IMAGET(input1,
SAMPLER,
(int2)((chan_blk_idx - input0_chan_blk) * width + width_idx, hb_idx));
}
#else
if (chan_blk_idx + 1 < input0_chan_blk) {
data = READ_IMAGET(input0,
SAMPLER,
(int2)(chan_blk_idx * width + width_idx, hb_idx));
} else if (chan_blk_idx >= input0_chan_blk) {
const int in_chan_idx = chan_blk_idx - input0_chan_blk;
DATA_TYPE4 data0 = READ_IMAGET(input1,
SAMPLER,
(int2)(in_chan_idx * width + width_idx, hb_idx));
DATA_TYPE4 data1 = READ_IMAGET(input1,
SAMPLER,
(int2)((in_chan_idx + 1) * width + width_idx, hb_idx));
data = stitch_vector(data0, data1, input0_chan % 4, true);
} else {
DATA_TYPE4 data0 = READ_IMAGET(input0,
SAMPLER,
(int2)(chan_blk_idx * width + width_idx, hb_idx));
DATA_TYPE4 data1 = READ_IMAGET(input1,
SAMPLER,
(int2)(width_idx, hb_idx));
data = stitch_vector(data0, data1, input0_chan % 4, false);
}
#endif
WRITE_IMAGET(output, (int2)(chan_blk_idx * width + width_idx, hb_idx), data);
}
//__kernel void concat_width(__read_only image2d_t input0,
// __read_only image2d_t input1,
// __private const int input0_width,
// __write_only image2d_t output) {
// const int chan_blk_idx = get_global_id(0);
// const int width_idx = get_global_id(1);
// const int width = get_global_size(1);
// const int hb_idx = get_global_id(2);
//
// const sampler_t SAMPLER = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
//
// DATA_TYPE4 data = 0;
// if (width_idx < input0_width) {
// data = READ_IMAGET(input0,
// SAMPLER,
// (int2)(chan_blk_idx * width + width_idx, hb_idx));
// } else {
// data = READ_IMAGET(input1,
// SAMPLER,
// (int2)(chan_blk_idx * width + (width_idx - input0_width), hb_idx));
// }
//
// WRITE_IMAGET(output, (int2)(chan_blk_idx * width + width_idx, hb_idx), data);
//}
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/kernels/concat.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/opencl/helper.h"
#include "mace/utils/utils.h"
namespace mace {
namespace kernels {
static void Concat2(const Tensor *input0,
const Tensor *input1,
const DataType dt,
Tensor *output) {
const index_t batch = output->dim(0);
const index_t height = output->dim(1);
const index_t width = output->dim(2);
const index_t channel = output->dim(3);
const int channel_blk = RoundUpDiv4(channel);
auto runtime = OpenCLRuntime::Get();
std::set<std::string> built_options;
if (input0->dtype() == output->dtype()) {
built_options.emplace("-DDATA_TYPE=" + DtToCLDt(dt));
built_options.emplace("-DCMD_DATA_TYPE=" + DtToCLCMDDt(dt));
} else {
built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt));
built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));
}
if (input0->dim(3) % 4 == 0) {
built_options.emplace("-DDIVISIBLE_FOUR");
}
auto concat_kernel = runtime->BuildKernel("concat", "concat_channel", built_options);
uint32_t idx = 0;
concat_kernel.setArg(idx++, *(static_cast<const cl::Image2D *>(input0->buffer())));
concat_kernel.setArg(idx++, *(static_cast<const cl::Image2D *>(input1->buffer())));
concat_kernel.setArg(idx++, static_cast<int32_t>(input0->dim(3)));
concat_kernel.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(concat_kernel);
uint32_t lws[3];
lws[0] = std::min<uint32_t>(channel_blk, kwg_size);
lws[1] = std::min<uint32_t>(width, kwg_size / lws[0]);
lws[2] = std::min<uint32_t>(height * batch, kwg_size / (lws[0] * lws[1]));
cl_int error = runtime->command_queue().enqueueNDRangeKernel(
concat_kernel, cl::NullRange,
cl::NDRange(static_cast<uint32_t>(channel_blk),
static_cast<uint32_t>(width),
static_cast<uint32_t>(height * batch)),
cl::NDRange(lws[0], lws[1], lws[2]),
NULL, OpenCLRuntime::Get()->GetDefaultEvent());
MACE_CHECK(error == CL_SUCCESS);
}
template<typename T>
void ConcatFunctor<DeviceType::OPENCL, T>::operator()(const std::vector<const Tensor *> &input_list,
Tensor *output) {
const int inputs_count = input_list.size();
MACE_CHECK(inputs_count == 2 && axis_ == 3)
<< "Concat opencl kernel only support two elements with axis == 3";
const Tensor *input0 = input_list[0];
std::vector<index_t> output_shape(input0->shape());
for (int i = 1; i < inputs_count; ++i) {
const Tensor *input = input_list[i];
MACE_CHECK(input->dim_size() == input0->dim_size(),
"Ranks of all input tensors must be same.");
for (int j = 0; j < input->dim_size(); ++j) {
if (j == axis_) {
continue;
}
MACE_CHECK(input->dim(j) == input0->dim(j),
"Dimensions of inputs should equal except axis.");
}
output_shape[axis_] += input->dim(axis_);
}
std::vector<size_t> image_shape;
CalImage2DShape(output_shape, BufferType::IN_OUT, image_shape);
output->ResizeImage(output_shape, image_shape);
switch (inputs_count) {
case 2:
Concat2(input_list[0], input_list[1], DataTypeToEnum<T>::value, output);
break;
default:MACE_NOT_IMPLEMENTED;
}
};
template
struct ConcatFunctor<DeviceType::OPENCL, float>;
template
struct ConcatFunctor<DeviceType::OPENCL, half>;
} // namespace kernels
} // namespace mace
......@@ -7,8 +7,20 @@
namespace mace {
REGISTER_CPU_OPERATOR(OpKeyBuilder("Concat")
.TypeConstraint<float>("T")
.Build(),
.TypeConstraint<float>("T")
.Build(),
ConcatOp<DeviceType::CPU, float>);
REGISTER_CPU_OPERATOR(OpKeyBuilder("Concat")
.TypeConstraint<half>("T")
.Build(),
ConcatOp<DeviceType::CPU, half>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Concat")
.TypeConstraint<float>("T")
.Build(),
ConcatOp<DeviceType::OPENCL, float>);
REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Concat")
.TypeConstraint<half>("T")
.Build(),
ConcatOp<DeviceType::OPENCL, half>);
} // namespace mace
......@@ -14,53 +14,23 @@ template <DeviceType D, typename T>
class ConcatOp : public Operator<D, T> {
public:
ConcatOp(const OperatorDef &op_def, Workspace *ws)
: Operator<D, T>(op_def, ws) {}
: Operator<D, T>(op_def, ws),
functor_(OperatorBase::GetSingleArgument<int>("axis", 3)){}
bool Run() override {
int32_t values_count = this->InputSize() - 1;
const Tensor *input0 = this->Input(0);
const Tensor *axis_tensor = this->Input(values_count);
MACE_CHECK(axis_tensor->dim_size() == 0,
"axis should be a scalar integer, but got shape: ",
axis_tensor->dim_size());
const int32_t concat_axis = *(axis_tensor->data<int32_t>());
const int32_t input_dims = input0->dim_size();
MACE_CHECK(this->InputSize() >= 2) << "There must be at least two inputs to concat";
const std::vector<const Tensor *> input_list = this->Inputs();
const int32_t concat_axis = OperatorBase::GetSingleArgument<int>("axis", 3);
const int32_t input_dims = input_list[0]->dim_size();
const int32_t axis =
concat_axis < 0 ? concat_axis + input_dims : concat_axis;
MACE_CHECK((0 <= axis && axis < input_dims),
"Expected concatenating axis in the range [", -input_dims, ", ",
input_dims, "], but got", concat_axis);
std::vector<index_t> output_shape(input0->shape());
index_t inner_size = 1;
for (int i = 0; i < axis; ++i) {
inner_size *= output_shape[i];
}
std::vector<index_t> outer_sizes(values_count, 0);
std::vector<const T *> input_list(values_count, nullptr);
input_list[0] = input0->data<T>();
outer_sizes[0] = input0->size() / inner_size;
const Tensor *input = nullptr;
for (int i = 1; i < values_count; ++i) {
input = this->Input(i);
MACE_CHECK(input->dim_size() == input0->dim_size(),
"Ranks of all input tensors must be same.");
for (int j = 0; j < axis_tensor->dim_size(); ++j) {
if (j == axis) {
continue;
}
MACE_CHECK(input->dim(j) == input0->dim(j),
"Dimensions of inputs should equal except axis.");
}
input_list[i] = input->data<T>();
outer_sizes[i] = input->size() / inner_size;
output_shape[axis] += input->dim(axis);
}
Tensor *output = this->Output(OUTPUT);
output->Resize(output_shape);
functor_(input_list, inner_size, outer_sizes.data(),
output->mutable_data<T>());
functor_(input_list, output);
return true;
}
......
......@@ -15,7 +15,7 @@ static void ConcatHelper(int iters, int concat_dim, int dim1) {
OpDefBuilder("Concat", "ConcatBM")
.Input("Input0")
.Input("Input1")
.Input("Axis")
.AddIntArg("axis", concat_dim)
.Output("Output")
.Finalize(net.NewOperatorDef());
......@@ -23,7 +23,6 @@ static void ConcatHelper(int iters, int concat_dim, int dim1) {
const int kDim0 = 100;
net.AddRandomInput<DeviceType::CPU, T>("Input0", {kDim0, dim1});
net.AddRandomInput<DeviceType::CPU, T>("Input1", {kDim0, dim1});
net.AddInputFromArray<DeviceType::CPU, int32_t>("Axis", {}, {concat_dim});
// Warm-up
for (int i = 0; i < 5; ++i) {
......@@ -38,14 +37,65 @@ static void ConcatHelper(int iters, int concat_dim, int dim1) {
}
}
static void BM_ConcatDim0Float(int iters, int dim1) {
static void BM_CONCAT_Dim0Float(int iters, int dim1) {
ConcatHelper<DeviceType::CPU, float>(iters, 0, dim1);
}
static void BM_ConcatDim1Float(int iters, int dim1) {
static void BM_CONCAT_Dim1Float(int iters, int dim1) {
ConcatHelper<DeviceType::CPU, float>(iters, 1, dim1);
}
BENCHMARK(BM_ConcatDim0Float)->Arg(1000)->Arg(100000);
BENCHMARK(BM_ConcatDim1Float)->Arg(1000)->Arg(100000);
BENCHMARK(BM_CONCAT_Dim0Float)->Arg(1000)->Arg(100000);
BENCHMARK(BM_CONCAT_Dim1Float)->Arg(1000)->Arg(100000);
template <typename T>
static void OpenclConcatHelper(int iters,
const std::vector<index_t> &shape0,
const std::vector<index_t> &shape1,
int concat_dim) {
mace::testing::StopTiming();
OpsTestNet net;
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input0", shape0);
net.AddRandomInput<DeviceType::OPENCL, float>("Input1", shape1);
BufferToImage<DeviceType::OPENCL, T>(net, "Input0", "InputImage0", kernels::BufferType::IN_OUT);
BufferToImage<DeviceType::OPENCL, T>(net, "Input1", "InputImage1", kernels::BufferType::IN_OUT);
OpDefBuilder("Concat", "ConcatBM")
.Input("InputImage0")
.Input("InputImage1")
.AddIntArg("axis", concat_dim)
.Output("OutputImage")
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
// Warm-up
for (int i = 0; i < 5; ++i) {
net.RunOp(DeviceType::OPENCL);
}
const int64_t tot = static_cast<int64_t>(iters) *
(net.GetTensor("Input0")->size() + net.GetTensor("Input1")->size());
mace::testing::ItemsProcessed(tot);
testing::BytesProcessed(tot * sizeof(T));
mace::testing::StartTiming();
while (iters--) {
net.RunOp(DeviceType::OPENCL);
}
}
static void BM_CONCATOPENCLFloat(int iters, int dim1) {
std::vector<index_t> shape = {3, 32, 32, dim1};
OpenclConcatHelper<float>(iters, shape, shape, 3);
}
static void BM_CONCATOPENCLHalf(int iters, int dim1) {
std::vector<index_t> shape = {3, 32, 32, dim1};
OpenclConcatHelper<half>(iters, shape, shape, 3);
}
BENCHMARK(BM_CONCATOPENCLFloat)->Arg(32)->Arg(64)->Arg(128)->Arg(256);
BENCHMARK(BM_CONCATOPENCLHalf)->Arg(32)->Arg(64)->Arg(128)->Arg(256);
} // namespace mace
\ No newline at end of file
......@@ -10,13 +10,13 @@ using namespace mace;
class ConcatOpTest : public OpsTestBase {};
TEST_F(ConcatOpTest, Simple_Horizon) {
TEST_F(ConcatOpTest, CPUSimpleHorizon) {
// Construct graph
auto &net = test_net();
OpDefBuilder("Concat", "ConcatTest")
.Input("Input0")
.Input("Input1")
.Input("Axis")
.AddIntArg("axis", 0)
.Output("Output")
.Finalize(net.NewOperatorDef());
......@@ -28,7 +28,6 @@ TEST_F(ConcatOpTest, Simple_Horizon) {
// Add inputs
net.AddInputFromArray<DeviceType::CPU, float>("Input0", input_shape, input0);
net.AddInputFromArray<DeviceType::CPU, float>("Input1", input_shape, input1);
net.AddInputFromArray<DeviceType::CPU, int>("Axis", {}, {0});
// Run
net.RunOp();
......@@ -48,13 +47,13 @@ TEST_F(ConcatOpTest, Simple_Horizon) {
}
}
TEST_F(ConcatOpTest, Simple_Vertical) {
TEST_F(ConcatOpTest, CPUSimpleVertical) {
// Construct graph
auto &net = test_net();
OpDefBuilder("Concat", "ConcatTest")
.Input("Input0")
.Input("Input1")
.Input("Axis")
.AddIntArg("axis", 1)
.Output("Output")
.Finalize(net.NewOperatorDef());
......@@ -66,7 +65,6 @@ TEST_F(ConcatOpTest, Simple_Vertical) {
// Add inputs
net.AddInputFromArray<DeviceType::CPU, float>("Input0", input_shape, input0);
net.AddInputFromArray<DeviceType::CPU, float>("Input1", input_shape, input1);
net.AddInputFromArray<DeviceType::CPU, int>("Axis", {}, {1});
// Run
net.RunOp();
......@@ -88,7 +86,7 @@ TEST_F(ConcatOpTest, Simple_Vertical) {
}
}
TEST_F(ConcatOpTest, Random) {
TEST_F(ConcatOpTest, CPURandom) {
srand(time(nullptr));
int dim = 5;
int num_inputs = 2 + rand() % 10;
......@@ -99,7 +97,7 @@ TEST_F(ConcatOpTest, Random) {
for (int i = 0; i < num_inputs; ++i) {
builder = builder.Input(("Input" + ToString(i)).c_str());
}
builder.Input("Axis").Output("Output").Finalize(net.NewOperatorDef());
builder.AddIntArg("axis", axis).Output("Output").Finalize(net.NewOperatorDef());
std::vector<index_t> shape_data;
GenerateRandomIntTypeData<index_t>({dim}, shape_data, 1, dim);
......@@ -115,7 +113,6 @@ TEST_F(ConcatOpTest, Random) {
net.AddInputFromArray<DeviceType::CPU, float>(("Input" + ToString(i)).c_str(),
input_shapes[i], inputs[i]);
}
net.AddInputFromArray<DeviceType::CPU, int>("Axis", {}, {axis});
// Run
net.RunOp();
......@@ -139,3 +136,87 @@ TEST_F(ConcatOpTest, Random) {
}
}
}
template<typename T>
void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
const int axis) {
srand(time(nullptr));
int num_inputs = 2;
int concat_axis_size = 0;
// Construct graph
OpsTestNet net;
for (int i = 0; i < num_inputs; ++i) {
const std::string input_name = ("Input" + ToString(i)).c_str();
const std::string image_name = ("InputImage" + ToString(i)).c_str();
concat_axis_size += shapes[i][axis];
net.AddRandomInput<DeviceType::OPENCL, float>(input_name,
shapes[i]);
BufferToImage<DeviceType::OPENCL, T>(net, input_name, image_name, kernels::BufferType::IN_OUT);
}
auto builder = OpDefBuilder("Concat", "ConcatTest");
for (int i = 0; i < num_inputs; ++i) {
const std::string image_name = ("InputImage" + ToString(i)).c_str();
builder = builder.Input(image_name);
}
builder.AddIntArg("axis", axis)
.Output("OutputImage")
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(DeviceType::OPENCL);
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "Output", kernels::BufferType::IN_OUT);
// Check
auto output = net.GetOutput("Output");
std::vector<index_t> expected_shape = shapes[0];
expected_shape[axis] = concat_axis_size;
EXPECT_THAT(output->shape(), ::testing::ContainerEq(expected_shape));
Tensor::MappingGuard output_mapper(output);
const float *output_ptr = output->data<float>();
int k = 0;
while (output_ptr != (output->data<float>() + output->size())) {
for (int i = 0; i < num_inputs; ++i) {
index_t num_elements =
std::accumulate(shapes[i].begin() + axis, shapes[i].end(),
1, std::multiplies<index_t>());
const std::string input_name = ("Input" + ToString(i)).c_str();
const Tensor *input_tensor = net.GetTensor(input_name.data());
Tensor::MappingGuard input_guard(input_tensor);
const float *input_ptr = input_tensor->data<float>() + k * num_elements;
for (int j = 0; j < num_elements; ++j) {
EXPECT_NEAR(*(input_ptr + j), *output_ptr++, 1e-2) << "With index: " << i << ", " << j;
}
}
k++;
}
}
TEST_F(ConcatOpTest, OPENCLAligned) {
OpenclRandomTest<float>({
{3, 32, 32, 32},
{3, 32, 32, 64}
},
3);
}
TEST_F(ConcatOpTest, OPENCLHalfAligned) {
OpenclRandomTest<half>({
{3, 32, 32, 32},
{3, 32, 32, 64}
},
3);
}
TEST_F(ConcatOpTest, OPENCLUnAligned) {
OpenclRandomTest<float>({
{3, 32, 32, 13},
{3, 32, 32, 17}
},
3);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册