From 9f352b1c45204f6a108fd2a4655cdb93bf2d6c66 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Fri, 5 Jun 2020 15:14:44 +0800 Subject: [PATCH] feat(megbrain/dnn): add indexing remap int32 for naive and cuda GitOrigin-RevId: 5f66d51de4751d77fc05e2849388fc6dfbae4a53 --- dnn/src/common/tensor_remap.cpp | 6 +- dnn/src/cuda/tensor_remap/opr_impl.cpp | 51 ++++-- dnn/src/cuda/tensor_remap/tensor_remap.cu | 173 +++++++++++---------- dnn/src/cuda/tensor_remap/tensor_remap.cuh | 30 ++-- dnn/src/naive/tensor_remap/opr_impl.cpp | 144 ++++++++++------- dnn/test/cuda/tensor_remap.cpp | 57 +++---- 6 files changed, 265 insertions(+), 196 deletions(-) diff --git a/dnn/src/common/tensor_remap.cpp b/dnn/src/common/tensor_remap.cpp index 903bbd792..eabbd6acb 100644 --- a/dnn/src/common/tensor_remap.cpp +++ b/dnn/src/common/tensor_remap.cpp @@ -32,9 +32,11 @@ void IndexingRemapBase::check_layout_fwd(const TensorLayout &src, } megdnn_assert(map.shape[dst.ndim] == src.ndim, "%s", errmsg_c); - megdnn_assert(src.dtype == dtype::Float32()); + megdnn_assert(dst.dtype == src.dtype); + megdnn_assert(src.dtype == dtype::Float32() || src.dtype == dtype::Int32(), + "indexing remap only support float32/int32, got %s", + src.dtype.name()); megdnn_assert(map.dtype == dtype::Int32()); - megdnn_assert(dst.dtype == dtype::Float32()); } void IndexingRemapForward::deduce_layout(const TensorLayout &src, diff --git a/dnn/src/cuda/tensor_remap/opr_impl.cpp b/dnn/src/cuda/tensor_remap/opr_impl.cpp index 65feeb9dd..c1c8a9194 100644 --- a/dnn/src/cuda/tensor_remap/opr_impl.cpp +++ b/dnn/src/cuda/tensor_remap/opr_impl.cpp @@ -36,13 +36,23 @@ void IndexingRemapForwardImpl::exec(_megdnn_tensor_in src, for (size_t i = 0_z; i < dst.layout.ndim; ++i) { dshape.data[i] = dst.layout.shape[i]; } - // Invoke kernel - tensor_remap::forward(src.ptr(), - map.ptr(), - dst.ptr(), - src.layout.ndim, dst.layout.ndim, - sstride, dstride, dshape, - cuda_stream(handle())); + // Invoke kernel +#define cb(dt) \ + if (src.layout.dtype.enumv() == DTypeTrait
::enumv) { \ + using ctype = DTypeTrait
::ctype; \ + tensor_remap::forward(src.ptr(), map.ptr(), \ + dst.ptr(), src.layout.ndim, \ + dst.layout.ndim, sstride, dstride, \ + dshape, cuda_stream(handle())); \ + return; \ + } + cb(dtype::Float32) + cb(dtype::Int32) +#undef cb + megdnn_throw( + ssprintf("cuda indexing remap forward only support " + "float32/int32 dtype, got %s", + src.layout.dtype.name())); } void IndexingRemapBackwardImpl::exec(_megdnn_tensor_in diff, @@ -69,18 +79,27 @@ void IndexingRemapBackwardImpl::exec(_megdnn_tensor_in diff, for (size_t i = 0_z; i < diff.layout.ndim; ++i) { dshape.data[i] = diff.layout.shape[i]; } - // Invoke kernel - tensor_remap::backward(diff.ptr(), - map.ptr(), - grad.ptr(), - grad.layout.ndim, diff.layout.ndim, - sstride, dstride, sshape, dshape, - param().is_non_overlapping, - cuda_stream(handle())); + + // Invoke kernel +#define cb(dt) \ + if (diff.layout.dtype.enumv() == DTypeTrait
::enumv) { \ + using ctype = DTypeTrait
::ctype; \ + tensor_remap::backward( \ + diff.ptr(), map.ptr(), grad.ptr(), \ + grad.layout.ndim, diff.layout.ndim, sstride, dstride, sshape, \ + dshape, param().is_non_overlapping, cuda_stream(handle())); \ + return; \ + } + cb(dtype::Float32) + cb(dtype::Int32) + + megdnn_throw( + ssprintf("cuda indexing remap forward only support " + "float32/int32 dtype, got %s", + diff.layout.dtype.name())); } } // namespace cuda } // namespace megdnn // vim: syntax=cpp.doxygen - diff --git a/dnn/src/cuda/tensor_remap/tensor_remap.cu b/dnn/src/cuda/tensor_remap/tensor_remap.cu index 24aea89c8..0c79b28ed 100644 --- a/dnn/src/cuda/tensor_remap/tensor_remap.cu +++ b/dnn/src/cuda/tensor_remap/tensor_remap.cu @@ -6,28 +6,29 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ -#include "src/cuda/tensor_remap/tensor_remap.cuh" #include "src/cuda/query_blocksize.cuh" +#include "src/cuda/tensor_remap/tensor_remap.cuh" namespace megdnn { namespace cuda { -namespace tensor_remap { +namespace { -__global__ void forward_kernel(const float *src, const int *map, float *dst, - uint32_t sdim, uint32_t ddim, - array_wrapper sstride, - array_wrapper dstride, - array_wrapper dshape, - uint32_t total) -{ +template +__global__ void forward_kernel(const ctype* src, const int* map, ctype* dst, + uint32_t sdim, uint32_t ddim, + array_wrapper sstride, + array_wrapper dstride, + array_wrapper dshape, + uint32_t total) { uint32_t didx_cont = threadIdx.x + blockIdx.x * blockDim.x; if (didx_cont < total) { uint32_t midx = didx_cont * sdim; uint32_t didx = 0u; for (uint32_t j = ddim; j > 0u; --j) { - uint32_t i = j-1u; + uint32_t i = j - 1u; uint32_t didx_cur = didx_cont % dshape.data[i]; didx_cont /= dshape.data[i]; didx += didx_cur * dstride.data[i]; @@ -41,34 +42,16 @@ __global__ void forward_kernel(const float *src, const int *map, float *dst, } } -void forward(const float *src, const int *map, float *dst, - uint32_t sdim, uint32_t ddim, - const array_wrapper &sstride, - const array_wrapper &dstride, - const array_wrapper &dshape, - cudaStream_t stream) -{ - uint32_t total = 1u; - for (uint32_t i = 0u; i < ddim; ++i) total *= dshape.data[i]; - uint32_t threads = query_blocksize_for_kernel((void *)&forward_kernel); - uint32_t blocks = DIVUP(total, threads); - forward_kernel<<>>(src, map, dst, - sdim, ddim, - sstride, dstride, dshape, - total); - after_kernel_launch(); -} - -__global__ void fill_zero_kernel(float *a, uint32_t dim, - array_wrapper stride, - array_wrapper shape, - uint32_t total) -{ +template +__global__ void fill_zero_kernel(ctype* a, uint32_t dim, + array_wrapper stride, + array_wrapper shape, + uint32_t total) { uint32_t idx_cont = threadIdx.x + blockIdx.x * blockDim.x; if (idx_cont < total) { uint32_t idx = 0u; for (uint32_t j = dim; j > 0u; --j) { - uint32_t i = j-1u; + uint32_t i = j - 1u; uint32_t idx_cur = idx_cont % shape.data[i]; idx_cont /= shape.data[i]; idx += idx_cur * stride.data[i]; @@ -77,19 +60,19 @@ __global__ void fill_zero_kernel(float *a, uint32_t dim, } } -__global__ void backward_kernel(const float *diff, const int *map, float *grad, - uint32_t sdim, uint32_t ddim, - array_wrapper sstride, - array_wrapper dstride, - array_wrapper dshape, - uint32_t total) -{ +template +__global__ void backward_kernel(const ctype* diff, const int* map, ctype* grad, + uint32_t sdim, uint32_t ddim, + array_wrapper sstride, + array_wrapper dstride, + array_wrapper dshape, + uint32_t total) { uint32_t didx_cont = threadIdx.x + blockIdx.x * blockDim.x; if (didx_cont < total) { uint32_t midx = didx_cont * sdim; uint32_t didx = 0u; for (uint32_t j = ddim; j > 0u; --j) { - uint32_t i = j-1u; + uint32_t i = j - 1u; uint32_t didx_cur = didx_cont % dshape.data[i]; didx_cont /= dshape.data[i]; didx += didx_cur * dstride.data[i]; @@ -103,20 +86,18 @@ __global__ void backward_kernel(const float *diff, const int *map, float *grad, } } +template __global__ void backward_kernel_non_overlapping( - const float *diff, const int *map, float *grad, - uint32_t sdim, uint32_t ddim, - array_wrapper sstride, + const ctype* diff, const int* map, ctype* grad, uint32_t sdim, + uint32_t ddim, array_wrapper sstride, array_wrapper dstride, - array_wrapper dshape, - uint32_t total) -{ + array_wrapper dshape, uint32_t total) { uint32_t didx_cont = threadIdx.x + blockIdx.x * blockDim.x; if (didx_cont < total) { uint32_t midx = didx_cont * sdim; uint32_t didx = 0u; for (uint32_t j = ddim; j > 0u; --j) { - uint32_t i = j-1u; + uint32_t i = j - 1u; uint32_t didx_cur = didx_cont % dshape.data[i]; didx_cont /= dshape.data[i]; didx += didx_cur * dstride.data[i]; @@ -130,55 +111,91 @@ __global__ void backward_kernel_non_overlapping( } } -void backward(const float *diff, const int *map, float *grad, - uint32_t sdim, uint32_t ddim, - const array_wrapper &sstride, - const array_wrapper &dstride, - const array_wrapper &sshape, - const array_wrapper &dshape, - bool is_non_overlapping, - cudaStream_t stream) -{ +} // anonymous namespace + +namespace tensor_remap { +template +void forward(const ctype* src, const int* map, ctype* dst, uint32_t sdim, + uint32_t ddim, const array_wrapper& sstride, + const array_wrapper& dstride, + const array_wrapper& dshape, + cudaStream_t stream) { + uint32_t total = 1u; + for (uint32_t i = 0u; i < ddim; ++i) + total *= dshape.data[i]; + uint32_t threads = + query_blocksize_for_kernel((void*)&forward_kernel); + uint32_t blocks = DIVUP(total, threads); + forward_kernel<<>>( + src, map, dst, sdim, ddim, sstride, dstride, dshape, total); + after_kernel_launch(); +} + +template +void backward(const ctype* diff, const int* map, ctype* grad, uint32_t sdim, + uint32_t ddim, const array_wrapper& sstride, + const array_wrapper& dstride, + const array_wrapper& sshape, + const array_wrapper& dshape, + bool is_non_overlapping, cudaStream_t stream) { { // Fill grad with zeros. uint32_t total = 1u; - for (uint32_t i = 0u; i < sdim; ++i) total *= sshape.data[i]; - uint32_t threads = query_blocksize_for_kernel((void *)&fill_zero_kernel); + for (uint32_t i = 0u; i < sdim; ++i) + total *= sshape.data[i]; + uint32_t threads = + query_blocksize_for_kernel((void*)&fill_zero_kernel); uint32_t blocks = DIVUP(total, threads); - fill_zero_kernel<<>>( + fill_zero_kernel<<>>( grad, sdim, sstride, sshape, total); after_kernel_launch(); } { // Update grad. uint32_t total = 1u; - for (uint32_t i = 0u; i < ddim; ++i) total *= dshape.data[i]; + for (uint32_t i = 0u; i < ddim; ++i) + total *= dshape.data[i]; if (is_non_overlapping) { uint32_t threads = query_blocksize_for_kernel( - (void *)&backward_kernel_non_overlapping); + (void*)&backward_kernel_non_overlapping); uint32_t blocks = DIVUP(total, threads); - backward_kernel_non_overlapping<<>>( - diff, map, grad, - sdim, ddim, - sstride, dstride, dshape, - total); + backward_kernel_non_overlapping + <<>>(diff, map, grad, sdim, + ddim, sstride, dstride, + dshape, total); } else { - uint32_t threads = query_blocksize_for_kernel( - (void *)&backward_kernel); + uint32_t threads = + query_blocksize_for_kernel((void*)&backward_kernel); uint32_t blocks = DIVUP(total, threads); - backward_kernel<<>>(diff, map, grad, - sdim, ddim, - sstride, dstride, dshape, + backward_kernel<<>>( + diff, map, grad, sdim, ddim, sstride, dstride, dshape, total); } after_kernel_launch(); } } -} // namespace tensor_remap -} // namespace cuda -} // namespace megdnn +#define INST(T) \ + template void forward( \ + const T* src, const int* map, T* dst, uint32_t sdim, \ + uint32_t ddim, const array_wrapper& sstride, \ + const array_wrapper& dstride, \ + const array_wrapper& dshape, \ + cudaStream_t stream); \ + template void backward( \ + const T* diff, const int* map, T* grad, uint32_t sdim, \ + uint32_t ddim, const array_wrapper& sstride, \ + const array_wrapper& dstride, \ + const array_wrapper& sshape, \ + const array_wrapper& dshape, \ + bool is_non_overlapping, cudaStream_t stream); +INST(dt_float32) +INST(dt_int32) -// vim: syntax=cpp.doxygen +#undef INST +} // namespace tensor_remap +} // namespace cuda +} // namespace megdnn +// vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/tensor_remap/tensor_remap.cuh b/dnn/src/cuda/tensor_remap/tensor_remap.cuh index 0e71be9e9..fa63c3f76 100644 --- a/dnn/src/cuda/tensor_remap/tensor_remap.cuh +++ b/dnn/src/cuda/tensor_remap/tensor_remap.cuh @@ -17,25 +17,23 @@ namespace megdnn { namespace cuda { namespace tensor_remap { -void forward(const float *src, const int *map, float *dst, - uint32_t sdim, uint32_t ddim, - const array_wrapper &sstride, - const array_wrapper &dstride, - const array_wrapper &dshape, - cudaStream_t stream); +template +void forward(const ctype* src, const int* map, ctype* dst, uint32_t sdim, + uint32_t ddim, const array_wrapper& sstride, + const array_wrapper& dstride, + const array_wrapper& dshape, + cudaStream_t stream); -void backward(const float *diff, const int *map, float *grad, - uint32_t sdim, uint32_t ddim, - const array_wrapper &sstride, - const array_wrapper &dstride, - const array_wrapper &sshape, - const array_wrapper &dshape, - bool is_non_overlapping, - cudaStream_t stream); +template +void backward(const ctype* diff, const int* map, ctype* grad, uint32_t sdim, + uint32_t ddim, const array_wrapper& sstride, + const array_wrapper& dstride, + const array_wrapper& sshape, + const array_wrapper& dshape, + bool is_non_overlapping, cudaStream_t stream); -} // namespace tensor_remap +} // namespace tensor_remap } // namespace cuda } // namespace megdnn // vim: syntax=cpp.doxygen - diff --git a/dnn/src/naive/tensor_remap/opr_impl.cpp b/dnn/src/naive/tensor_remap/opr_impl.cpp index d808fa34f..93c11e4d9 100644 --- a/dnn/src/naive/tensor_remap/opr_impl.cpp +++ b/dnn/src/naive/tensor_remap/opr_impl.cpp @@ -6,75 +6,107 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "src/naive/tensor_remap/opr_impl.h" #include "src/common/utils.h" #include "src/naive/handle.h" -namespace megdnn { -namespace naive { +using namespace megdnn; +using namespace naive; +namespace { + +template +void forward(const TensorND& src, const TensorND& map, const TensorND& dst) { + auto&& sshape = src.layout; + auto&& mshape = map.layout; + auto&& dshape = dst.layout; + // Last element is zero to facilitate maddr calculation. + std::vector didx(dshape.ndim + 1, 0_z); + do { + auto maddr = get_linear_addr(didx.data(), mshape.shape, mshape.ndim); + std::vector sidx(sshape.ndim); + for (size_t i = 0_z; i < sshape.ndim; ++i) { + sidx[i] = map.ptr()[maddr + i]; + } + auto saddr = get_linear_addr_noncont(sidx.data(), src.layout); + auto daddr = get_linear_addr_noncont(didx.data(), dst.layout); + dst.ptr()[daddr] = src.ptr()[saddr]; + } while (get_next_addr(didx.data(), dshape.shape, dshape.ndim)); +} + +template +void backward(const TensorND& diff, const TensorND& map, const TensorND& grad) { + auto&& sshape = grad.layout; + auto&& mshape = map.layout; + auto&& dshape = diff.layout; + std::vector sidx(sshape.ndim, 0_z); + { + // Set grad to zero. + do { + auto saddr = get_linear_addr_noncont(sidx.data(), grad.layout); + grad.ptr()[saddr] = 0.0f; + } while (get_next_addr(sidx.data(), sshape.shape, sshape.ndim)); + } + std::vector didx(dshape.ndim + 1, 0_z); + do { + auto maddr = get_linear_addr(didx.data(), mshape.shape, mshape.ndim); + std::vector sidx(sshape.ndim); + for (size_t i = 0_z; i < sshape.ndim; ++i) { + sidx[i] = map.ptr()[maddr + i]; + } + auto saddr = get_linear_addr_noncont(sidx.data(), grad.layout); + auto daddr = get_linear_addr_noncont(didx.data(), diff.layout); + grad.ptr()[saddr] += diff.ptr()[daddr]; + } while (get_next_addr(didx.data(), dshape.shape, dshape.ndim)); +} + +} // anonymous namespace void IndexingRemapForwardImpl::exec(_megdnn_tensor_in src, - _megdnn_tensor_in map, - _megdnn_tensor_out dst, - _megdnn_workspace workspace) -{ + _megdnn_tensor_in map, + _megdnn_tensor_out dst, + _megdnn_workspace workspace) { check_exec(src.layout, map.layout, dst.layout, workspace.size); - auto kern = [=]() { - auto &&sshape = src.layout; - auto &&mshape = map.layout; - auto &&dshape = dst.layout; - // Last element is zero to facilitate maddr calculation. - std::vector didx(dshape.ndim+1, 0_z); - do { - auto maddr = get_linear_addr(didx.data(), mshape.shape, mshape.ndim); - std::vector sidx(sshape.ndim); - for (size_t i = 0_z; i < sshape.ndim; ++i) { - sidx[i] = map.ptr()[maddr+i]; - } - auto saddr = get_linear_addr_noncont(sidx.data(), src.layout); - auto daddr = get_linear_addr_noncont(didx.data(), dst.layout); - dst.ptr()[daddr] = src.ptr()[saddr]; - } while (get_next_addr(didx.data(), dshape.shape, dshape.ndim)); - }; - MEGDNN_DISPATCH_CPU_KERN_OPR(kern()); + switch (src.layout.dtype.enumv()) { +#define cb(dt) \ + case DTypeTrait
::enumv: \ + MEGDNN_DISPATCH_CPU_KERN_OPR( \ + forward::ctype>(src, map, dst)); \ + return; + cb(dtype::Float32) + cb(dtype::Int32) +#undef cb + + default: + megdnn_throw( + ssprintf("unsupported dtype %s in indexing " + "remap forward naive\n", + src.layout.dtype.name())); + } } void IndexingRemapBackwardImpl::exec(_megdnn_tensor_in diff, - _megdnn_tensor_in map, - _megdnn_tensor_out grad, - _megdnn_workspace workspace) -{ + _megdnn_tensor_in map, + _megdnn_tensor_out grad, + _megdnn_workspace workspace) { check_exec(diff.layout, map.layout, grad.layout, workspace.size); - auto kern = [=]() { - auto &&sshape = grad.layout; - auto &&mshape = map.layout; - auto &&dshape = diff.layout; - std::vector sidx(sshape.ndim, 0_z); - { - // Set grad to zero. - do { - auto saddr = get_linear_addr_noncont(sidx.data(), grad.layout); - grad.ptr()[saddr] = 0.0f; - } while (get_next_addr(sidx.data(), sshape.shape, sshape.ndim)); - } - std::vector didx(dshape.ndim+1, 0_z); - do { - auto maddr = get_linear_addr(didx.data(), mshape.shape, mshape.ndim); - std::vector sidx(sshape.ndim); - for (size_t i = 0_z; i < sshape.ndim; ++i) { - sidx[i] = map.ptr()[maddr+i]; - } - auto saddr = get_linear_addr_noncont(sidx.data(), grad.layout); - auto daddr = get_linear_addr_noncont(didx.data(), diff.layout); - grad.ptr()[saddr] += diff.ptr()[daddr]; - } while (get_next_addr(didx.data(), dshape.shape, dshape.ndim)); - }; - MEGDNN_DISPATCH_CPU_KERN_OPR(kern()); + switch (diff.layout.dtype.enumv()) { +#define cb(dt) \ + case DTypeTrait
::enumv: \ + MEGDNN_DISPATCH_CPU_KERN_OPR( \ + backward::ctype>(diff, map, grad)); \ + return; + cb(dtype::Float32) + cb(dtype::Int32) +#undef cb + default: + megdnn_throw(ssprintf( + "unsupported dtype %s in indexing remap backward naive\n", + diff.layout.dtype.name())); + } } -} // namespace naive -} // namespace megdnn // vim: syntax=cpp.doxygen diff --git a/dnn/test/cuda/tensor_remap.cpp b/dnn/test/cuda/tensor_remap.cpp index 0f4bfd8ac..af3828bea 100644 --- a/dnn/test/cuda/tensor_remap.cpp +++ b/dnn/test/cuda/tensor_remap.cpp @@ -16,39 +16,42 @@ namespace megdnn { namespace test { -TEST_F(CUDA, TENSOR_REMAP_FORWARD) -{ +TEST_F(CUDA, TENSOR_REMAP_FORWARD) { Checker checker(handle_cuda()); + TensorShape src{11, 13, 17}, map{3, 5, 7, 3}, dst{3, 5, 7}; checker.set_dtype(1, dtype::Int32()); - TensorShape src{11, 13, 17}, - map{3, 5, 7, 3}, - dst{3, 5, 7}; - using namespace tensor_remap; - { - MapRNG rng(src); - checker.set_rng(1, &rng).execs({src, map, {}}); - } - { - NonoverlappingMapRNG rng(src); - checker.set_rng(1, &rng).execs({src, map, {}}); + for (auto dt : std::vector{dtype::Float32(), dtype::Int32()}) { + checker.set_dtype(0, dt); + checker.set_dtype(2, dt); + using namespace tensor_remap; + { + MapRNG rng(src); + checker.set_rng(1, &rng).execs({src, map, {}}); + } + { + NonoverlappingMapRNG rng(src); + checker.set_rng(1, &rng).execs({src, map, {}}); + } } } -TEST_F(CUDA, TENSOR_REMAP_BACKWARD) -{ +TEST_F(CUDA, TENSOR_REMAP_BACKWARD) { Checker checker(handle_cuda()); checker.set_dtype(1, dtype::Int32()); - TensorShape src{11, 13, 17}, - map{3, 5, 7, 3}, - dst{3, 5, 7}; - using namespace tensor_remap; - { - MapRNG rng(src); - checker.set_rng(1, &rng).execs({dst, map, src}); - } - { - NonoverlappingMapRNG rng(src); - checker.set_rng(1, &rng).execs({dst, map, src}); + TensorShape src{11, 13, 17}, map{3, 5, 7, 3}, dst{3, 5, 7}; + checker.set_dtype(1, dtype::Int32()); + for (auto dt : std::vector{dtype::Float32(), dtype::Int32()}) { + checker.set_dtype(0, dt); + checker.set_dtype(2, dt); + using namespace tensor_remap; + { + MapRNG rng(src); + checker.set_rng(1, &rng).execs({dst, map, src}); + } + { + NonoverlappingMapRNG rng(src); + checker.set_rng(1, &rng).execs({dst, map, src}); + } } } @@ -56,5 +59,3 @@ TEST_F(CUDA, TENSOR_REMAP_BACKWARD) } // namespace megdnn // vim: syntax=cpp.doxygen - - -- GitLab