From 7e2e776f7701eb02401e43cf12173a88eb8364ef Mon Sep 17 00:00:00 2001 From: liutuo Date: Tue, 23 Oct 2018 18:01:35 +0800 Subject: [PATCH] fix deconv cpu thread safty --- mace/kernels/arm/deconv_2d_neon_3x3.cc | 4 ++-- mace/kernels/arm/deconv_2d_neon_4x4.cc | 4 ++-- mace/kernels/deconv_2d.h | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/mace/kernels/arm/deconv_2d_neon_3x3.cc b/mace/kernels/arm/deconv_2d_neon_3x3.cc index c8f5006b..cdba42c0 100644 --- a/mace/kernels/arm/deconv_2d_neon_3x3.cc +++ b/mace/kernels/arm/deconv_2d_neon_3x3.cc @@ -293,11 +293,11 @@ void Deconv2dNeonK3x3S2(const float *input, const index_t outw = out_shape[3]; const index_t out_img_size = outh * outw; -#pragma omp parallel for collapse(3) +#pragma omp parallel for collapse(2) for (index_t b = 0; b < out_shape[0]; ++b) { for (index_t oc = 0; oc < outch; ++oc) { + float *out_base = output + (b * outch + oc) * out_img_size; for (index_t ic = 0; ic < inch; ++ic) { - float *out_base = output + (b * outch + oc) * out_img_size; const float *input_base = input + (b * inch + ic) * h * w; const float *kernel_base = filter + (oc * inch + ic) * 9; const float *in = input_base; diff --git a/mace/kernels/arm/deconv_2d_neon_4x4.cc b/mace/kernels/arm/deconv_2d_neon_4x4.cc index dd371ada..575a8494 100644 --- a/mace/kernels/arm/deconv_2d_neon_4x4.cc +++ b/mace/kernels/arm/deconv_2d_neon_4x4.cc @@ -386,11 +386,11 @@ void Deconv2dNeonK4x4S2(const float *input, const index_t outch = out_shape[1]; const index_t out_img_size = outh * outw; -#pragma omp parallel for collapse(3) +#pragma omp parallel for collapse(2) for (int b = 0; b < out_shape[0]; ++b) { for (int p = 0; p < outch; p++) { + float *out_base = output + (b * outch + p) * out_img_size; for (int q = 0; q < inch; q++) { - float *out_base = output + (b * outch + p) * out_img_size; const float *input_base = input + (b * inch + q) * h * w; const float *kernel_base = filter + (p * inch + q) * 16; const float *in = input_base; diff --git a/mace/kernels/deconv_2d.h b/mace/kernels/deconv_2d.h index ff1875fe..7e1ed460 100644 --- a/mace/kernels/deconv_2d.h +++ b/mace/kernels/deconv_2d.h @@ -214,13 +214,13 @@ struct Deconv2dFunctor: Deconv2dFunctorBase { const index_t out_channels = out_shape[1]; const index_t in_channels = in_shape[1]; -#pragma omp parallel for collapse(4) +#pragma omp parallel for collapse(2) for (int b = 0; b < batch; ++b) { for (int oc = 0; oc < out_channels; ++oc) { + float *out_base = + output + (b * out_channels + oc) * out_img_size; for (int i = 0; i < in_height; ++i) { for (int j = 0; j < in_width; ++j) { - float *out_base = - output + (b * out_channels + oc) * out_img_size; const index_t out_offset = i * strides[0] * out_width + j * strides[1]; for (int ic = 0; ic < in_channels; ++ic) { -- GitLab