提交 07ab8cb6 编写于 作者: M Megvii Engine Team

feat(dnn): add param_pack for rocm

GitOrigin-RevId: 2180504c714a52974f9d96fe49597415a9f1c8fe
上级 6ac68090
......@@ -36,6 +36,7 @@
#include "src/rocm/argmxx/opr_impl.h"
#include "src/rocm/sleep/opr_impl.h"
#include "src/rocm/batch_normalization/opr_impl.h"
#include "src/rocm/param_pack/opr_impl.h"
#include <miopen/version.h>
#include <hip/hip_version.h>
......@@ -174,6 +175,7 @@ MEGDNN_SPECIALIZE_CREATE_OPERATOR(ArgminForward);
MEGDNN_SPECIALIZE_CREATE_OPERATOR(SleepForward);
MEGDNN_SPECIALIZE_CREATE_OPERATOR(BNForward);
MEGDNN_SPECIALIZE_CREATE_OPERATOR(BNBackward);
MEGDNN_SPECIALIZE_CREATE_OPERATOR(ParamPackConcat);
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpragmas"
......
/**
* \file dnn/src/rocm/param_pack/opr_impl.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "hcc_detail/hcc_defs_prologue.h"
#include "src/rocm/param_pack/opr_impl.h"
#include "src/rocm/param_pack/param_pack.h.hip"
#include "src/rocm/utils.h"
namespace megdnn {
namespace rocm {
size_t ParamPackConcatImpl::get_workspace_in_bytes(const TensorShapeArray& srcs,
const TensorShape&,
const TensorShape&) {
return sizeof(size_t) * srcs.size();
}
template <typename T>
void ParamPackConcatImpl::exec_internal(_megdnn_tensor_in srcs,
_megdnn_tensor_in offsets,
_megdnn_tensor_out dst,
_megdnn_workspace workspace) {
size_t inp_size = srcs.layout.shape[0],
out_size = dst.layout.total_nr_elems();
auto stream = hip_stream(this->handle());
auto src_cpu = static_cast<const T**>(srcs.raw_ptr);
megdnn_assert_internal(src_cpu);
auto src_gpu = reinterpret_cast<const T**>(workspace.raw_ptr);
auto offsets_gpu = offsets.ptr<int32_t>();
hip_check(hipMemcpyAsync(src_gpu, src_cpu, sizeof(const T*) * inp_size,
hipMemcpyHostToDevice, stream));
param_pack::concat_proxy<T>(src_gpu, dst.ptr<T>(), inp_size, out_size,
offsets_gpu, stream);
}
void ParamPackConcatImpl::exec(_megdnn_tensor_in srcs,
_megdnn_tensor_in offsets,
_megdnn_tensor_out dst,
_megdnn_workspace workspace) {
check_exec(dst.layout, offsets.layout, srcs.layout);
#define cb(DType) \
if (dst.layout.dtype == DType()) { \
using ctype = typename DTypeTrait<DType>::ctype; \
exec_internal<ctype>(srcs, offsets, dst, workspace); \
return; \
}
MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
megdnn_throw("bad type");
#undef cb
}
} // namespace rocm
} // namespace megdnn
/**
* \file dnn/src/rocm/param_pack/opr_impl.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "megdnn/oprs.h"
namespace megdnn {
namespace rocm {
class ParamPackConcatImpl final : public ParamPackConcat {
public:
using ParamPackConcat::ParamPackConcat;
void exec(_megdnn_tensor_in srcs, _megdnn_tensor_in table,
_megdnn_tensor_out dst, _megdnn_workspace workspace) override;
size_t get_workspace_in_bytes(const TensorShapeArray& srcs,
const TensorShape& table,
const TensorShape& dst) override;
private:
template <typename T>
void exec_internal(_megdnn_tensor_in srcs, _megdnn_tensor_in table,
_megdnn_tensor_out dst, _megdnn_workspace workspace);
};
} // namespace rocm
} // namespace megdnn
/**
* \file dnn/src/rocm/param_pack/param_pack.cpp.hip
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "hcc_detail/hcc_defs_prologue.h"
#include "src/rocm/param_pack/param_pack.h.hip"
#include "megdnn/dtype.h"
#include "src/rocm/utils.h.hip"
namespace megdnn {
namespace rocm {
namespace param_pack {
template <typename T>
__global__ void concat_kernel(const T** srcs, T* dst,
const int32_t* offsets,
size_t srcs_size,
size_t total_size) {
size_t addr = threadIdx.x + blockIdx.x * blockDim.x;
if (addr < total_size) {
size_t l = 0, r = srcs_size - 1, mid;
while (l < r) {
mid = (l + r) >> 1;
if (offsets[(mid << 1) + 1] > addr) {
r = mid;
} else {
l = mid + 1;
}
}
if (addr < offsets[l << 1])
dst[addr] = 0;
else
dst[addr] = srcs[l][addr - offsets[l << 1]];
}
}
template <typename T>
void concat_proxy(const T** srcs, T* dst, size_t srcs_size, size_t total_size,
const int32_t* offsets,
hipStream_t stream) {
size_t NR_BLOCKS = DIVUP(total_size, NR_THREADS);
hipLaunchKernelGGL(concat_kernel, NR_BLOCKS, NR_THREADS, 0, stream,
srcs, dst, offsets, srcs_size, total_size);
after_kernel_launch();
}
#define INST(T) \
template void concat_proxy<T>(const T**, T*, size_t, size_t, \
const int32_t*, \
hipStream_t);
#define cb(DType) INST(typename DTypeTrait<DType>::ctype)
MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
#undef cb
#undef INST
} // namespace param_pack
} // namespace hip
} // namespace megdnn
// vim: syntax=cpp.doxygen
/**
* \file dnn/src/rocm/param_pack/param_pack.h.hip
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "hip_header.h"
#include <stdint.h>
#include <stdio.h>
namespace megdnn {
namespace rocm {
namespace param_pack {
template <typename T>
void concat_proxy(const T** srcs, T* dst, size_t srcs_size, size_t total_size,
const int32_t* offsets, hipStream_t stream);
} // namespace param_pack
} // namespace rocm
} // namespace megdnn
// vim: syntax=cpp.doxygen
/**
* \file dnn/test/rocm/param_pack.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "hcc_detail/hcc_defs_prologue.h"
#include "test/rocm/fixture.h"
#include "test/common/checker.h"
#include "test/common/utils.h"
using namespace megdnn;
using namespace test;
namespace {
template <class T>
std::vector<int32_t> create_offsets(const TensorShapeArray& shapes,
size_t alignment) {
size_t dtype_size = sizeof(T);
if (alignment < dtype_size)
alignment = dtype_size;
alignment /= dtype_size;
auto get_aligned = [alignment](size_t v) {
auto mod = v & (alignment - 1);
return v + ((alignment - mod) & (alignment - 1));
};
std::vector<dt_int32> offsets(shapes.size() << 1);
size_t offset = 0;
for (size_t i = 0; i < shapes.size(); i++) {
offset = get_aligned(offset);
offsets[i << 1] = offset;
offset += shapes[i].total_nr_elems();
offsets[(i << 1) + 1] = offset;
}
return offsets;
}
template <class T>
std::vector<T> create_pack(size_t pack_size,
const std::vector<int32_t>& offsets,
const std::vector<std::vector<T>>& ptr) {
megdnn_assert(pack_size == static_cast<size_t>(offsets.back()));
std::vector<T> data(pack_size, 0);
for (size_t i = 0; i * 2 < offsets.size(); ++i) {
size_t begin = offsets[i * 2], end = offsets[i * 2 + 1];
for (size_t j = 0; j < end - begin; j++)
data[begin + j] = ptr[i][j];
}
return data;
}
template <class T>
std::vector<std::vector<T>> create_params(size_t nr_params,
const TensorShapeArray& shapes) {
std::vector<std::vector<T>> params;
for (size_t i = 0; i < nr_params; ++i) {
std::vector<T> expected_data;
for (size_t x = 0; x < shapes[i].total_nr_elems(); ++x) {
expected_data.push_back(rand());
}
params.push_back(std::move(expected_data));
}
return params;
}
template <class T>
T* create_device_data(Handle* handle, const T* data, size_t size) {
T* data_device =
static_cast<T*>(test::megdnn_malloc(handle, size * sizeof(T)));
if (data)
test::megdnn_memcpy_H2D(handle, data_device, data, size * sizeof(T));
return data_device;
}
template <class T>
void test_param_pack_concat(Handle* handle, const TensorShapeArray& shapes,
DType type) {
auto concat = handle->create_operator<ParamPackConcat>();
size_t nr_params = shapes.size();
std::vector<T*> param_ptrs;
std::vector<std::vector<T>> params = create_params<T>(nr_params, shapes);
for (size_t i = 0; i < nr_params; ++i) {
param_ptrs.push_back(create_device_data<T>(handle, params[i].data(),
shapes[i].total_nr_elems()));
}
std::vector<int32_t> offsets =
create_offsets<T>(shapes, handle->alignment_requirement());
size_t pack_size = offsets.back();
int32_t* offsets_gpu =
create_device_data<int32_t>(handle, offsets.data(), offsets.size());
std::vector<T> expected_pack = create_pack<T>(pack_size, offsets, params);
T* pack_gpu = create_device_data<T>(handle, nullptr, expected_pack.size());
TensorLayout dst_layout({pack_size}, type);
TensorND dst_tensor(pack_gpu, dst_layout);
TensorLayout offsets_layout({offsets.size()}, dtype::Int32());
TensorND offsets_tensor(offsets_gpu, offsets_layout);
test::WorkspaceWrapper workspace(
handle, concat->get_workspace_in_bytes(shapes, offsets_layout,
{pack_size}));
TensorND src_tensor(param_ptrs.data(),
TensorLayout({nr_params}, dtype::Int32()));
concat->exec(src_tensor, offsets_tensor, dst_tensor, workspace.workspace());
// check
T* actual_pack = static_cast<T*>(malloc(pack_size * sizeof(T)));
test::megdnn_memcpy_D2H(handle, actual_pack, pack_gpu,
sizeof(T) * pack_size);
for (size_t i = 0; i < pack_size; ++i) {
ASSERT_EQ(actual_pack[i], expected_pack[i]);
}
free(actual_pack);
test::megdnn_free(handle, pack_gpu);
test::megdnn_free(handle, offsets_gpu);
for (auto ptr : param_ptrs) {
test::megdnn_free(handle, ptr);
}
}
} // namespace
TEST_F(ROCM, PARAM_PACK) {
SmallVector<TensorShapeArray> shapes_vec;
shapes_vec.push_back({{1}});
shapes_vec.push_back({{129}, {21}});
shapes_vec.push_back({{15}, {21}, {34}});
shapes_vec.push_back({{1, 2}, {3, 5}, {5, 8}, {7, 11}, {9, 14}});
shapes_vec.push_back({{1, 2},
{3, 5},
{1},
{3, 3, 3, 4},
{71},
{9, 14},
{111, 111, 111},
{128, 128, 128}});
for (auto shapes : shapes_vec) {
test_param_pack_concat<int32_t>(handle_rocm(), shapes, dtype::Int32());
test_param_pack_concat<int16_t>(handle_rocm(), shapes, dtype::Int16());
test_param_pack_concat<float>(handle_rocm(), shapes, dtype::Float32());
}
}
// vim: syntax=cpp.doxygen
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册