From 582dd4ceb87cdf2575f1759bff9600d6b600fb9f Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Fri, 28 Oct 2022 16:53:36 +0800 Subject: [PATCH] fix(dnn/sfotmax): call cpu dispatch for softmax opr GitOrigin-RevId: a606e66101614a4bf1135d047a163bd54ad7a650 --- dnn/src/fallback/softmax/opr_impl.cpp | 58 ++++++++++--------- .../test/unit/functional/test_functional.py | 15 +++++ 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/dnn/src/fallback/softmax/opr_impl.cpp b/dnn/src/fallback/softmax/opr_impl.cpp index 15623ad4e..85d050881 100644 --- a/dnn/src/fallback/softmax/opr_impl.cpp +++ b/dnn/src/fallback/softmax/opr_impl.cpp @@ -6,35 +6,19 @@ namespace megdnn { namespace fallback { -void SoftmaxForwardImpl::exec( - _megdnn_tensor_in src, _megdnn_tensor_out dst, _megdnn_workspace workspace) { - auto axis = param().axis; - if (axis < 0) - axis += src.layout.ndim; - megdnn_assert(axis >= 0); - check_exec(src.layout, dst.layout, workspace.size); - - if (!usable(src.layout)) { - naive::SoftmaxForwardImpl::exec(src, dst, workspace); - return; - } - - typedef DTypeTrait::ctype Float32; - auto sptr = src.ptr(); - auto dptr = dst.ptr(); - - constexpr auto float_min = std::numeric_limits::min(); - constexpr auto step = GI_SIMD_LEN_BYTE / sizeof(Float32); - - size_t A, B, C; - reduce::get_ABC(src.layout, A, B, C, axis); +static void do_softmax( + const float* sptr, float* dptr, size_t A, size_t B, size_t C, + _megdnn_workspace workspace) { + constexpr auto float_min = std::numeric_limits::min(); + constexpr auto step = GI_SIMD_LEN_BYTE / sizeof(float); // TODO: When C=2,3,4..., src_ptr span is relatively large, the performance may // be poor + if (C != 1) { WorkspaceBundle workspace_bundle{ - workspace.raw_ptr, {A * C * sizeof(Float32), A * C * sizeof(Float32)}}; - Float32* max = workspace_bundle.get_workspace(0).raw_ptr->as(); + workspace.raw_ptr, {A * C * sizeof(float), A * C * sizeof(float)}}; + float* max = workspace_bundle.get_workspace(0).raw_ptr->as(); GI_FLOAT32_t v_max = GiBroadcastFloat32(float_min); size_t i = 0; for (; i + step <= A * C; i += step) @@ -60,8 +44,8 @@ void SoftmaxForwardImpl::exec( } } - Float32* sum = workspace_bundle.get_workspace(1).raw_ptr->as(); - memset(sum, 0, A * C * sizeof(Float32)); + float* sum = workspace_bundle.get_workspace(1).raw_ptr->as(); + memset(sum, 0, A * C * sizeof(float)); for (size_t a = 0; a < A; a++) { for (size_t b = 0; b < B; b++) { auto max_ptr = max + a * C; @@ -157,6 +141,28 @@ void SoftmaxForwardImpl::exec( } } +void SoftmaxForwardImpl::exec( + _megdnn_tensor_in src, _megdnn_tensor_out dst, _megdnn_workspace workspace) { + auto axis = param().axis; + if (axis < 0) + axis += src.layout.ndim; + megdnn_assert(axis >= 0); + check_exec(src.layout, dst.layout, workspace.size); + + if (!usable(src.layout)) { + naive::SoftmaxForwardImpl::exec(src, dst, workspace); + return; + } + + typedef DTypeTrait::ctype Float32; + auto sptr = src.ptr(); + auto dptr = dst.ptr(); + + size_t A, B, C; + reduce::get_ABC(src.layout, A, B, C, axis); + MEGDNN_DISPATCH_CPU_KERN_OPR(do_softmax(sptr, dptr, A, B, C, workspace)); +} + } // namespace fallback } // namespace megdnn diff --git a/imperative/python/test/unit/functional/test_functional.py b/imperative/python/test/unit/functional/test_functional.py index e091c6abd..d2c2e8e68 100644 --- a/imperative/python/test/unit/functional/test_functional.py +++ b/imperative/python/test/unit/functional/test_functional.py @@ -1653,3 +1653,18 @@ def test_conv_transpose3d(): np.testing.assert_equal( output_shape.numpy(), np.array([20, 33, 32, 96, 197], dtype=np.int32) ) + + +@pytest.mark.skip(reason="pytest aborted") +def test_softmax(): + def np_softmax(x): + return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True) + + data = (np.random.random(size=(1, 16, 224, 224)).astype(np.float32) - 0.5) * 100 + desired = np_softmax(data[:, :3, 0, 0]) + + data = Tensor(data) + data = data[:, :3, 0, 0] + actual = F.softmax(data) + + np.testing.assert_allclose(actual.numpy(), desired, rtol=1e-5) -- GitLab