未验证 提交 9a9d1cf2 编写于 作者: H HappyAngel 提交者: GitHub

[arm]Bilinear resize compute error fix (#4351)

* fix bilinear_resize result not equal with fluid. test=develop

* fix cv build error. test=develop


* fix format. test=develop
上级 db2ab554
...@@ -70,7 +70,8 @@ void bilinear_interp(const float* src, ...@@ -70,7 +70,8 @@ void bilinear_interp(const float* src,
int h_out, int h_out,
float scale_x, float scale_x,
float scale_y, float scale_y,
bool with_align) { bool align_corners,
bool align_mode) {
int* buf = new int[w_out + h_out + w_out * 2 + h_out * 2]; int* buf = new int[w_out + h_out + w_out * 2 + h_out * 2];
int* xofs = buf; int* xofs = buf;
...@@ -78,14 +79,13 @@ void bilinear_interp(const float* src, ...@@ -78,14 +79,13 @@ void bilinear_interp(const float* src,
float* alpha = reinterpret_cast<float*>(buf + w_out + h_out); float* alpha = reinterpret_cast<float*>(buf + w_out + h_out);
float* beta = reinterpret_cast<float*>(buf + w_out + h_out + w_out * 2); float* beta = reinterpret_cast<float*>(buf + w_out + h_out + w_out * 2);
bool with_align = (align_mode == 0 && !align_corners);
float fx = 0.0f; float fx = 0.0f;
float fy = 0.0f; float fy = 0.0f;
int sx = 0; int sx = 0;
int sy = 0; int sy = 0;
if (with_align) { if (!with_align) {
scale_x = static_cast<float>(w_in - 1) / (w_out - 1);
scale_y = static_cast<float>(h_in - 1) / (h_out - 1);
// calculate x axis coordinate // calculate x axis coordinate
for (int dx = 0; dx < w_out; dx++) { for (int dx = 0; dx < w_out; dx++) {
fx = dx * scale_x; fx = dx * scale_x;
...@@ -105,8 +105,6 @@ void bilinear_interp(const float* src, ...@@ -105,8 +105,6 @@ void bilinear_interp(const float* src,
beta[dy * 2 + 1] = fy; beta[dy * 2 + 1] = fy;
} }
} else { } else {
scale_x = static_cast<float>(w_in) / w_out;
scale_y = static_cast<float>(h_in) / h_out;
// calculate x axis coordinate // calculate x axis coordinate
for (int dx = 0; dx < w_out; dx++) { for (int dx = 0; dx < w_out; dx++) {
fx = scale_x * (dx + 0.5f) - 0.5f; fx = scale_x * (dx + 0.5f) - 0.5f;
...@@ -468,15 +466,9 @@ void nearest_interp(const float* src, ...@@ -468,15 +466,9 @@ void nearest_interp(const float* src,
float* dst, float* dst,
int w_out, int w_out,
int h_out, int h_out,
float scale_x, float scale_w_new,
float scale_y, float scale_h_new,
bool with_align) { bool with_align) {
float scale_w_new = (with_align)
? (static_cast<float>(w_in - 1) / (w_out - 1))
: (static_cast<float>(w_in) / (w_out));
float scale_h_new = (with_align)
? (static_cast<float>(h_in - 1) / (h_out - 1))
: (static_cast<float>(h_in) / (h_out));
if (with_align) { if (with_align) {
for (int h = 0; h < h_out; ++h) { for (int h = 0; h < h_out; ++h) {
float* dst_p = dst + h * w_out; float* dst_p = dst + h * w_out;
...@@ -506,7 +498,8 @@ void interpolate(lite::Tensor* X, ...@@ -506,7 +498,8 @@ void interpolate(lite::Tensor* X,
int out_height, int out_height,
int out_width, int out_width,
float scale, float scale,
bool with_align, bool align_corners,
bool align_mode,
std::string interpolate_type) { std::string interpolate_type) {
int in_h = X->dims()[2]; int in_h = X->dims()[2];
int in_w = X->dims()[3]; int in_w = X->dims()[3];
...@@ -531,12 +524,12 @@ void interpolate(lite::Tensor* X, ...@@ -531,12 +524,12 @@ void interpolate(lite::Tensor* X,
out_width = out_size_data[1]; out_width = out_size_data[1];
} }
} }
float height_scale = scale; // float height_scale = scale;
float width_scale = scale; // float width_scale = scale;
if (out_width > 0 && out_height > 0) { // if (out_width > 0 && out_height > 0) {
height_scale = static_cast<float>(out_height / X->dims()[2]); // height_scale = static_cast<float>(out_height / X->dims()[2]);
width_scale = static_cast<float>(out_width / X->dims()[3]); // width_scale = static_cast<float>(out_width / X->dims()[3]);
} // }
int num_cout = X->dims()[0]; int num_cout = X->dims()[0];
int c_cout = X->dims()[1]; int c_cout = X->dims()[1];
Out->Resize({num_cout, c_cout, out_height, out_width}); Out->Resize({num_cout, c_cout, out_height, out_width});
...@@ -551,6 +544,10 @@ void interpolate(lite::Tensor* X, ...@@ -551,6 +544,10 @@ void interpolate(lite::Tensor* X,
int spatial_in = in_h * in_w; int spatial_in = in_h * in_w;
int spatial_out = out_h * out_w; int spatial_out = out_h * out_w;
float scale_x = (align_corners) ? (static_cast<float>(in_w - 1) / (out_w - 1))
: (static_cast<float>(in_w) / (out_w));
float scale_y = (align_corners) ? (static_cast<float>(in_h - 1) / (out_h - 1))
: (static_cast<float>(in_h) / (out_h));
if ("Bilinear" == interpolate_type) { if ("Bilinear" == interpolate_type) {
#pragma omp parallel for #pragma omp parallel for
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
...@@ -560,9 +557,10 @@ void interpolate(lite::Tensor* X, ...@@ -560,9 +557,10 @@ void interpolate(lite::Tensor* X,
dout + spatial_out * i, dout + spatial_out * i,
out_w, out_w,
out_h, out_h,
1.f / width_scale, scale_x,
1.f / height_scale, scale_y,
with_align); align_corners,
align_mode);
} }
} else if ("Nearest" == interpolate_type) { } else if ("Nearest" == interpolate_type) {
#pragma omp parallel for #pragma omp parallel for
...@@ -573,9 +571,9 @@ void interpolate(lite::Tensor* X, ...@@ -573,9 +571,9 @@ void interpolate(lite::Tensor* X,
dout + spatial_out * i, dout + spatial_out * i,
out_w, out_w,
out_h, out_h,
1.f / width_scale, scale_x,
1.f / height_scale, scale_y,
with_align); align_corners);
} }
} }
} }
......
...@@ -30,7 +30,8 @@ void bilinear_interp(const float* src, ...@@ -30,7 +30,8 @@ void bilinear_interp(const float* src,
int h_out, int h_out,
float scale_x, float scale_x,
float scale_y, float scale_y,
bool with_align); bool align_corners,
bool align_mode);
void nearest_interp(const float* src, void nearest_interp(const float* src,
int w_in, int w_in,
...@@ -40,7 +41,7 @@ void nearest_interp(const float* src, ...@@ -40,7 +41,7 @@ void nearest_interp(const float* src,
int h_out, int h_out,
float scale_x, float scale_x,
float scale_y, float scale_y,
bool with_align); bool align_corners);
void interpolate(lite::Tensor* X, void interpolate(lite::Tensor* X,
lite::Tensor* OutSize, lite::Tensor* OutSize,
...@@ -50,7 +51,8 @@ void interpolate(lite::Tensor* X, ...@@ -50,7 +51,8 @@ void interpolate(lite::Tensor* X,
int out_height, int out_height,
int out_width, int out_width,
float scale, float scale,
bool with_align, bool align_corners,
bool align_mode,
std::string interpolate_type); std::string interpolate_type);
} /* namespace math */ } /* namespace math */
......
...@@ -128,7 +128,7 @@ bool test_convert(bool cv_run, ...@@ -128,7 +128,7 @@ bool test_convert(bool cv_run,
for (int i = 0; i < test_iter; i++) { for (int i = 0; i < test_iter; i++) {
clock_t begin = clock(); clock_t begin = clock();
// resize default linear // resize default linear
image_preprocess.imageConvert(src, resize_lite); image_preprocess.image_convert(src, resize_lite);
clock_t end = clock(); clock_t end = clock();
to_lite += (end - begin); to_lite += (end - begin);
} }
...@@ -226,7 +226,7 @@ bool test_flip(bool cv_run, ...@@ -226,7 +226,7 @@ bool test_flip(bool cv_run,
for (int i = 0; i < test_iter; i++) { for (int i = 0; i < test_iter; i++) {
clock_t begin = clock(); clock_t begin = clock();
// resize default linear // resize default linear
image_preprocess.imageFlip(src, resize_lite); image_preprocess.image_flip(src, resize_lite);
clock_t end = clock(); clock_t end = clock();
to_lite += (end - begin); to_lite += (end - begin);
} }
...@@ -330,7 +330,7 @@ bool test_rotate(bool cv_run, ...@@ -330,7 +330,7 @@ bool test_rotate(bool cv_run,
for (int i = 0; i < test_iter; i++) { for (int i = 0; i < test_iter; i++) {
clock_t begin = clock(); clock_t begin = clock();
// resize default linear // resize default linear
image_preprocess.imageRotate(src, resize_lite); image_preprocess.image_rotate(src, resize_lite);
clock_t end = clock(); clock_t end = clock();
to_lite += (end - begin); to_lite += (end - begin);
} }
...@@ -426,7 +426,7 @@ bool test_resize(bool cv_run, ...@@ -426,7 +426,7 @@ bool test_resize(bool cv_run,
for (int i = 0; i < test_iter; i++) { for (int i = 0; i < test_iter; i++) {
clock_t begin = clock(); clock_t begin = clock();
// resize default linear // resize default linear
image_preprocess.imageResize(src, resize_lite); image_preprocess.image_resize(src, resize_lite);
clock_t end = clock(); clock_t end = clock();
to_lite += (end - begin); to_lite += (end - begin);
} }
...@@ -526,7 +526,7 @@ bool test_crop(bool cv_run, ...@@ -526,7 +526,7 @@ bool test_crop(bool cv_run,
std::cout << "lite compute:" << std::endl; std::cout << "lite compute:" << std::endl;
for (int i = 0; i < test_iter; i++) { for (int i = 0; i < test_iter; i++) {
clock_t begin = clock(); clock_t begin = clock();
image_preprocess.imageCrop( image_preprocess.image_crop(
src, resize_lite, dstFormat, srcw, srch, left_x, left_y, dstw, dsth); src, resize_lite, dstFormat, srcw, srch, left_x, left_y, dstw, dsth);
clock_t end = clock(); clock_t end = clock();
to_lite += (end - begin); to_lite += (end - begin);
......
...@@ -88,13 +88,13 @@ void pre_process(const cv::Mat& img, int width, int height, Tensor dstTensor) { ...@@ -88,13 +88,13 @@ void pre_process(const cv::Mat& img, int width, int height, Tensor dstTensor) {
uint8_t* rgb_ptr = new uint8_t[img.cols * img.rows * 3]; uint8_t* rgb_ptr = new uint8_t[img.cols * img.rows * 3];
uint8_t* resize_ptr = new uint8_t[width * height * 3]; uint8_t* resize_ptr = new uint8_t[width * height * 3];
// do convert bgr--rgb // do convert bgr--rgb
img_process.imageConvert(img_ptr, rgb_ptr); img_process.image_convert(img_ptr, rgb_ptr);
// do resize // do resize
img_process.imageResize(rgb_ptr, resize_ptr); img_process.image_resize(rgb_ptr, resize_ptr);
// data--tensor and normalize // data--tensor and normalize
float means[3] = {103.94f, 116.78f, 123.68f}; float means[3] = {103.94f, 116.78f, 123.68f};
float scales[3] = {0.017f, 0.017f, 0.017f}; float scales[3] = {0.017f, 0.017f, 0.017f};
img_process.image2Tensor( img_process.image_to_tensor(
resize_ptr, &dstTensor, LayoutType::kNCHW, means, scales); resize_ptr, &dstTensor, LayoutType::kNCHW, means, scales);
float* data = dstTensor.mutable_data<float>(); float* data = dstTensor.mutable_data<float>();
#else #else
......
...@@ -35,6 +35,7 @@ void BilinearInterpCompute::Run() { ...@@ -35,6 +35,7 @@ void BilinearInterpCompute::Run() {
int out_w = param.out_w; int out_w = param.out_w;
int out_h = param.out_h; int out_h = param.out_h;
bool align_corners = param.align_corners; bool align_corners = param.align_corners;
bool align_mode = param.align_mode;
std::string interp_method = "Bilinear"; std::string interp_method = "Bilinear";
lite::arm::math::interpolate(X, lite::arm::math::interpolate(X,
OutSize, OutSize,
...@@ -45,6 +46,7 @@ void BilinearInterpCompute::Run() { ...@@ -45,6 +46,7 @@ void BilinearInterpCompute::Run() {
out_w, out_w,
scale, scale,
align_corners, align_corners,
align_mode,
interp_method); interp_method);
} }
...@@ -59,6 +61,7 @@ void NearestInterpCompute::Run() { ...@@ -59,6 +61,7 @@ void NearestInterpCompute::Run() {
int out_w = param.out_w; int out_w = param.out_w;
int out_h = param.out_h; int out_h = param.out_h;
bool align_corners = param.align_corners; bool align_corners = param.align_corners;
bool align_mode = param.align_mode;
std::string interp_method = "Nearest"; std::string interp_method = "Nearest";
lite::arm::math::interpolate(X, lite::arm::math::interpolate(X,
OutSize, OutSize,
...@@ -69,6 +72,7 @@ void NearestInterpCompute::Run() { ...@@ -69,6 +72,7 @@ void NearestInterpCompute::Run() {
out_w, out_w,
scale, scale,
align_corners, align_corners,
align_mode,
interp_method); interp_method);
} }
......
...@@ -293,15 +293,15 @@ void test_img(const std::vector<int>& cluster_id, ...@@ -293,15 +293,15 @@ void test_img(const std::vector<int>& cluster_id,
// LOG(INFO) << "image convert saber compute"; // LOG(INFO) << "image convert saber compute";
t_convert.Start(); t_convert.Start();
// 方法一: image_preprocess.imageCovert(src, lite_dst); // 方法一: image_preprocess.image_convert(src, lite_dst);
image_preprocess.imageConvert( image_preprocess.image_convert(
src, lite_dst, (ImageFormat)srcFormat, (ImageFormat)dstFormat); src, lite_dst, (ImageFormat)srcFormat, (ImageFormat)dstFormat);
t_convert.Stop(); t_convert.Stop();
// LOG(INFO) << "image resize saber compute"; // LOG(INFO) << "image resize saber compute";
t_resize.Start(); t_resize.Start();
// 方法一:image_preprocess.imageResize(lite_dst, resize_tmp); // 方法一:image_preprocess.image_resize(lite_dst, resize_tmp);
image_preprocess.imageResize(lite_dst, image_preprocess.image_resize(lite_dst,
resize_tmp, resize_tmp,
(ImageFormat)dstFormat, (ImageFormat)dstFormat,
srcw, srcw,
...@@ -312,8 +312,8 @@ void test_img(const std::vector<int>& cluster_id, ...@@ -312,8 +312,8 @@ void test_img(const std::vector<int>& cluster_id,
// LOG(INFO) << "image rotate saber compute"; // LOG(INFO) << "image rotate saber compute";
t_rotate.Start(); t_rotate.Start();
// 方法一: image_preprocess.imageRotate(resize_tmp, tv_out_ratote); // 方法一: image_preprocess.image_rotate(resize_tmp, tv_out_ratote);
image_preprocess.imageRotate(resize_tmp, image_preprocess.image_rotate(resize_tmp,
tv_out_ratote, tv_out_ratote,
(ImageFormat)dstFormat, (ImageFormat)dstFormat,
dstw, dstw,
...@@ -323,16 +323,16 @@ void test_img(const std::vector<int>& cluster_id, ...@@ -323,16 +323,16 @@ void test_img(const std::vector<int>& cluster_id,
// LOG(INFO) << "image flip saber compute"; // LOG(INFO) << "image flip saber compute";
t_flip.Start(); t_flip.Start();
// 方法一: image_preprocess.imageFlip(resize_tmp, tv_out_flip); // 方法一: image_preprocess.image_flip(resize_tmp, tv_out_flip);
image_preprocess.imageFlip( image_preprocess.image_flip(
resize_tmp, tv_out_flip, (ImageFormat)dstFormat, dstw, dsth, flip); resize_tmp, tv_out_flip, (ImageFormat)dstFormat, dstw, dsth, flip);
t_flip.Stop(); t_flip.Stop();
// LOG(INFO) << "image to tensor compute"; // LOG(INFO) << "image to tensor compute";
t_tensor.Start(); t_tensor.Start();
// 方法一: image_preprocess.image2Tensor( // 方法一: image_preprocess.image_to_tensor(
// resize_tmp, &dst_tensor, layout, means, scales); // resize_tmp, &dst_tensor, layout, means, scales);
image_preprocess.image2Tensor(resize_tmp, image_preprocess.image_to_tensor(resize_tmp,
&dst_tensor, &dst_tensor,
(ImageFormat)dstFormat, (ImageFormat)dstFormat,
dstw, dstw,
...@@ -680,7 +680,7 @@ void test_rotate(const std::vector<int>& cluster_id, ...@@ -680,7 +680,7 @@ void test_rotate(const std::vector<int>& cluster_id,
for (int i = 0; i < test_iter; ++i) { for (int i = 0; i < test_iter; ++i) {
t_rotate.Start(); t_rotate.Start();
image_preprocess.imageRotate(src, lite_dst); image_preprocess.image_rotate(src, lite_dst);
t_rotate.Stop(); t_rotate.Stop();
} }
LOG(INFO) << "image rotate avg time : " << t_rotate.LapTimes().Avg() LOG(INFO) << "image rotate avg time : " << t_rotate.LapTimes().Avg()
...@@ -847,7 +847,7 @@ void test_flip(const std::vector<int>& cluster_id, ...@@ -847,7 +847,7 @@ void test_flip(const std::vector<int>& cluster_id,
for (int i = 0; i < test_iter; ++i) { for (int i = 0; i < test_iter; ++i) {
t_rotate.Start(); t_rotate.Start();
image_preprocess.imageFlip(src, lite_dst); image_preprocess.image_flip(src, lite_dst);
t_rotate.Stop(); t_rotate.Stop();
} }
LOG(INFO) << "image flip avg time : " << t_rotate.LapTimes().Avg() LOG(INFO) << "image flip avg time : " << t_rotate.LapTimes().Avg()
...@@ -1016,7 +1016,7 @@ void test_resize(const std::vector<int>& cluster_id, ...@@ -1016,7 +1016,7 @@ void test_resize(const std::vector<int>& cluster_id,
for (int i = 0; i < test_iter; ++i) { for (int i = 0; i < test_iter; ++i) {
t_rotate.Start(); t_rotate.Start();
image_preprocess.imageResize(src, lite_dst); image_preprocess.image_resize(src, lite_dst);
t_rotate.Stop(); t_rotate.Stop();
} }
LOG(INFO) << "image Resize avg time : " << t_rotate.LapTimes().Avg() LOG(INFO) << "image Resize avg time : " << t_rotate.LapTimes().Avg()
...@@ -1191,7 +1191,7 @@ void test_convert(const std::vector<int>& cluster_id, ...@@ -1191,7 +1191,7 @@ void test_convert(const std::vector<int>& cluster_id,
for (int i = 0; i < test_iter; ++i) { for (int i = 0; i < test_iter; ++i) {
t_rotate.Start(); t_rotate.Start();
image_preprocess.imageConvert(src, lite_dst); image_preprocess.image_convert(src, lite_dst);
t_rotate.Stop(); t_rotate.Stop();
} }
LOG(INFO) << "image Convert avg time : " << t_rotate.LapTimes().Avg() LOG(INFO) << "image Convert avg time : " << t_rotate.LapTimes().Avg()
......
...@@ -163,7 +163,7 @@ void test_convert(const std::vector<int>& cluster_id, ...@@ -163,7 +163,7 @@ void test_convert(const std::vector<int>& cluster_id,
for (int i = 0; i < test_iter; ++i) { for (int i = 0; i < test_iter; ++i) {
t_lite.Start(); t_lite.Start();
image_preprocess.imageConvert(src, lite_dst); image_preprocess.image_convert(src, lite_dst);
t_lite.Stop(); t_lite.Stop();
} }
LOG(INFO) << "image Convert avg time : " << t_lite.LapTimes().Avg() LOG(INFO) << "image Convert avg time : " << t_lite.LapTimes().Avg()
...@@ -284,7 +284,7 @@ void test_resize(const std::vector<int>& cluster_id, ...@@ -284,7 +284,7 @@ void test_resize(const std::vector<int>& cluster_id,
for (int i = 0; i < test_iter; ++i) { for (int i = 0; i < test_iter; ++i) {
t_rotate.Start(); t_rotate.Start();
image_preprocess.imageResize(src, lite_dst); image_preprocess.image_resize(src, lite_dst);
t_rotate.Stop(); t_rotate.Stop();
} }
LOG(INFO) << "image Resize avg time : " << t_rotate.LapTimes().Avg() LOG(INFO) << "image Resize avg time : " << t_rotate.LapTimes().Avg()
...@@ -405,7 +405,7 @@ void test_flip(const std::vector<int>& cluster_id, ...@@ -405,7 +405,7 @@ void test_flip(const std::vector<int>& cluster_id,
for (int i = 0; i < test_iter; ++i) { for (int i = 0; i < test_iter; ++i) {
t_lite.Start(); t_lite.Start();
image_preprocess.imageFlip(src, lite_dst); image_preprocess.image_flip(src, lite_dst);
t_lite.Stop(); t_lite.Stop();
} }
LOG(INFO) << "image flip avg time : " << t_lite.LapTimes().Avg() LOG(INFO) << "image flip avg time : " << t_lite.LapTimes().Avg()
...@@ -523,7 +523,7 @@ void test_rotate(const std::vector<int>& cluster_id, ...@@ -523,7 +523,7 @@ void test_rotate(const std::vector<int>& cluster_id,
for (int i = 0; i < test_iter; ++i) { for (int i = 0; i < test_iter; ++i) {
t_lite.Start(); t_lite.Start();
image_preprocess.imageRotate(src, lite_dst); image_preprocess.image_rotate(src, lite_dst);
t_lite.Stop(); t_lite.Stop();
} }
LOG(INFO) << "image rotate avg time : " << t_lite.LapTimes().Avg() LOG(INFO) << "image rotate avg time : " << t_lite.LapTimes().Avg()
...@@ -667,7 +667,7 @@ void test_to_tensor(const std::vector<int>& cluster_id, ...@@ -667,7 +667,7 @@ void test_to_tensor(const std::vector<int>& cluster_id,
for (int i = 0; i < test_iter; ++i) { for (int i = 0; i < test_iter; ++i) {
t_lite.Start(); t_lite.Start();
image_preprocess.image2Tensor(src, image_preprocess.image_to_tensor(src,
&dst_tensor, &dst_tensor,
(ImageFormat)dstFormat, (ImageFormat)dstFormat,
dstw, dstw,
......
...@@ -416,11 +416,6 @@ void TestInterpAlignMode(Place place, float abs_error = 2e-5) { ...@@ -416,11 +416,6 @@ void TestInterpAlignMode(Place place, float abs_error = 2e-5) {
for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) { for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) {
for (bool align_corners : {true, false}) { for (bool align_corners : {true, false}) {
for (int align_mode : {0, 1}) { for (int align_mode : {0, 1}) {
// may exist bug in arm kernel
if (place == TARGET(kARM) && align_mode == 1 && !align_corners) {
continue;
}
// align_mode = 0 && align_corners = false NOT supported in Huawei
// Ascend NPU DDK // Ascend NPU DDK
if (place == TARGET(kHuaweiAscendNPU) && align_mode == 0 && if (place == TARGET(kHuaweiAscendNPU) && align_mode == 0 &&
!align_corners) { !align_corners) {
......
...@@ -47,6 +47,7 @@ DEFINE_bool(basic_test, true, "do all tests"); ...@@ -47,6 +47,7 @@ DEFINE_bool(basic_test, true, "do all tests");
#else #else
DEFINE_bool(basic_test, false, "do all tests"); DEFINE_bool(basic_test, false, "do all tests");
#endif #endif
DEFINE_bool(check_result, true, "check the result"); DEFINE_bool(check_result, true, "check the result");
DEFINE_int32(M, 512, "gemm: M"); DEFINE_int32(M, 512, "gemm: M");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册