relayout.cpp 9.0 KB
Newer Older
1 2 3 4
#include "hcc_detail/hcc_defs_prologue.h"

#include "megdnn/oprs.h"
#include "test/common/benchmarker.h"
M
Megvii Engine Team 已提交
5
#include "test/common/checker.h"
6 7
#include "test/common/relayout.h"
#include "test/rocm/benchmarker.h"
M
Megvii Engine Team 已提交
8
#include "test/rocm/fixture.h"
9 10 11 12 13

using namespace megdnn;
using namespace test;

namespace {
M
Megvii Engine Team 已提交
14 15
template <typename tag>
class ROCM_RELAYOUT : public ROCM {};
16
TYPED_TEST_SUITE(ROCM_RELAYOUT, relayout::test_types);
17 18 19
TYPED_TEST(ROCM_RELAYOUT, run) {
    relayout::run_test<TypeParam>(this->handle_rocm());
}
M
Megvii Engine Team 已提交
20
}  // namespace
21 22 23 24 25 26 27 28 29 30

TEST_F(ROCM, RELAYOUT_MEMCPY_ASYNC) {
    Checker<Relayout> checker(handle_rocm());
    checker.set_epsilon(1e-3);
    struct Arg {
        TensorLayout src, dst;
        Arg(TensorLayout src, TensorLayout dst) : src(src), dst(dst) {}
    };
    std::vector<Arg> args;
    // test for contig
M
Megvii Engine Team 已提交
31 32
    args.emplace_back(
            Arg{{{51200}, {1}, dtype::Float32()}, {{51200}, {1}, dtype::Float32()}});
33 34

    // test for copy_2d
M
Megvii Engine Team 已提交
35 36
    args.emplace_back(
            Arg{{{51200}, {9}, dtype::Float32()}, {{51200}, {1}, dtype::Float32()}});
37 38 39 40 41 42 43 44 45 46 47 48 49

    for (auto&& arg : args) {
        checker.set_dtype(0, dtype::Float32())
                .set_dtype(1, dtype::Float32())
                .execl({arg.src, arg.dst});
    }
}

#if MEGDNN_WITH_BENCHMARK
TEST_F(ROCM, RELAYOUT_BENCHMARK) {
    //! benchmark contious layout, such as (a, b, c, d) -> (b, a, c,d)
    //! just change the first two axis
    megdnn::rocm::enable_miopen_algo_search(handle_rocm(), true);
M
Megvii Engine Team 已提交
50 51
    auto benchmarker =
            ROCMBenchmarker<RelayoutForward>(handle_rocm(), handle_naive(false));
52 53 54 55 56 57 58 59 60 61 62
    benchmarker.set_display(true);

    auto run = [&](const TensorLayoutArray& layouts) {
        for (auto&& layout : layouts) {
            TensorLayout src = layout.dimshuffle({1, 0, 2});
            TensorLayout dst = layout;
            std::swap(dst.shape[0], dst.shape[1]);
            dst.init_contiguous_stride();
            benchmarker.execl({src, dst});
            auto used = benchmarker.execl({src, dst});
            used = benchmarker.execl({src, dst});
M
Megvii Engine Team 已提交
63 64 65
            printf("layout: %s bandwith: %f gbps/s\n", layout.to_string().c_str(),
                   2 * layout.total_nr_elems() * layout.dtype.size() / used * 1000 /
                           (1024 * 1024 * 1024));
66 67 68 69
        }
    };

    TensorLayoutArray layouts = {
M
Megvii Engine Team 已提交
70 71 72 73 74
            {{12, 23, 2}, dtype::Int32()},     {{12, 23, 8}, dtype::Int32()},
            {{12, 23, 17}, dtype::Int32()},    {{12, 23, 64}, dtype::Int32()},
            {{12, 23, 129}, dtype::Int32()},   {{12, 23, 256}, dtype::Int32()},
            {{12, 23, 1029}, dtype::Int32()},  {{12, 23, 4096}, dtype::Int32()},
            {{12, 23, 9143}, dtype::Int32()},  {{12, 23, 18284}, dtype::Int32()},
75 76 77 78 79 80 81 82 83 84 85 86 87
            {{2, 2, 1000000}, dtype::Int32()},
    };
    run(layouts);

    auto run2 = [&](const TensorLayoutArray& layouts) {
        for (auto&& layout : layouts) {
            TensorLayout src = layout.dimshuffle({0, 2, 1, 3});
            TensorLayout dst = layout;
            std::swap(dst.shape[0], dst.shape[1]);
            dst.init_contiguous_stride();
            benchmarker.execl({src, dst});
            auto used = benchmarker.execl({src, dst});
            used = benchmarker.execl({src, dst});
M
Megvii Engine Team 已提交
88 89 90
            printf("layout: %s bandwith: %f gbps/s\n", layout.to_string().c_str(),
                   2 * layout.total_nr_elems() * layout.dtype.size() / used * 1000 /
                           (1024 * 1024 * 1024));
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
        }
    };

    layouts = {
            {{3, 12, 24, 100}, dtype::Int32()},
            {{3, 12, 24, 1029}, dtype::Int32()},
            {{3, 4, 24, 9143}, dtype::Int32()},
            {{3, 4, 24, 18284}, dtype::Int32()},
    };

    run2(layouts);
}

TEST_F(ROCM, RELAYOUT_LAST_CONTIG_BENCHMARK) {
    megdnn::rocm::enable_miopen_algo_search(handle_rocm(), true);
M
Megvii Engine Team 已提交
106 107
    auto benchmarker =
            ROCMBenchmarker<RelayoutForward>(handle_rocm(), handle_naive(false));
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
    benchmarker.set_display(true);

    TensorLayout src =
            TensorLayout({5, 5, 100000}, {800000, 100000, 1}, dtype::Float32());
    TensorLayout dst =
            TensorLayout({5, 5, 100000}, {700000, 100000, 1}, dtype::Float32());
    benchmarker.execl({src, dst});
    auto used = benchmarker.execl({src, dst});
    used = benchmarker.execl({src, dst});
    printf("src: %s dst: %s bandwith: %f gbps/s\n", src.to_string().c_str(),
           dst.to_string().c_str(),
           2 * src.total_nr_elems() * src.dtype.size() / used * 1000 /
                   (1024 * 1024 * 1024));
}
#endif

TEST_F(ROCM, RELAYOUT) {
    struct Arg {
        TensorLayout src, dst;
        Arg(TensorLayout src, TensorLayout dst) : src(src), dst(dst) {}
    };
    std::vector<Arg> args;
#if !MEGDNN_DISABLE_FLOAT16
    {
        // contiguous stride
M
Megvii Engine Team 已提交
133 134 135 136 137 138
        args.emplace_back(
                TensorLayout({4, 3, 2}, {2, 8, 1}, dtype::Float16()),
                TensorLayout({4, 3, 2}, {6, 2, 1}, dtype::Float16()));
        args.emplace_back(
                TensorLayout({4, 3, 2}, {6, 2, 1}, dtype::Float16()),
                TensorLayout({4, 3, 2}, {2, 8, 1}, dtype::Float16()));
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
        args.emplace_back(
                TensorLayout({2, 4, 3, 5}, {60, 5, 20, 1}, dtype::Float16()),
                TensorLayout({2, 4, 3, 5}, {60, 15, 5, 1}, dtype::Float16()));
    }
    args.emplace_back(
            TensorLayout({2, 3, 4, 5}, {60, 20, 5, 1}, dtype::Float16()),
            TensorLayout({2, 3, 4, 5}, {120, 40, 10, 2}, dtype::Float16()));
    args.emplace_back(
            TensorLayout({2, 3, 4, 5}, {120, 40, 10, 2}, dtype::Float16()),
            TensorLayout({2, 3, 4, 5}, {60, 20, 5, 1}, dtype::Float16()));
    args.emplace_back(
            TensorLayout({2, 3, 4, 5}, {120, 40, 10, 2}, dtype::Float16()),
            TensorLayout({2, 3, 4, 5}, {180, 60, 15, 3}, dtype::Float16()));
#endif
    args.emplace_back(
            TensorLayout({2, 3, 4, 5}, {60, 20, 5, 1}, dtype::Int32()),
            TensorLayout({2, 3, 4, 5}, {120, 40, 10, 2}, dtype::Int32()));
    args.emplace_back(
            TensorLayout({2, 3, 4, 5}, {120, 40, 10, 2}, dtype::Int32()),
            TensorLayout({2, 3, 4, 5}, {60, 20, 5, 1}, dtype::Int32()));
    args.emplace_back(
            TensorLayout({2, 3, 4, 5}, {120, 40, 10, 2}, dtype::Int32()),
            TensorLayout({2, 3, 4, 5}, {180, 60, 15, 3}, dtype::Int32()));
    {
        // 1d
        size_t n = 10000;
M
Megvii Engine Team 已提交
165 166 167 168 169 170 171 172 173 174 175 176
        args.emplace_back(
                TensorLayout({n}, {1}, dtype::Int32()),
                TensorLayout({n}, {1}, dtype::Int32()));
        args.emplace_back(
                TensorLayout({n}, {1}, dtype::Int32()),
                TensorLayout({n}, {2}, dtype::Int32()));
        args.emplace_back(
                TensorLayout({n}, {2}, dtype::Int32()),
                TensorLayout({n}, {1}, dtype::Int32()));
        args.emplace_back(
                TensorLayout({n}, {2}, dtype::Int32()),
                TensorLayout({n}, {2}, dtype::Int32()));
177 178 179 180 181
    }
    {
        // 2d
        size_t m = 200, n = 300, k = 400;
        ptrdiff_t k2 = k * 2;
M
Megvii Engine Team 已提交
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
        args.emplace_back(
                TensorLayout({m, n}, {k2, 2}, dtype::Int32()),
                TensorLayout({m, n}, {k2 + 1, 2}, dtype::Int32()));
        args.emplace_back(
                TensorLayout({m, n}, {2, k2}, dtype::Int32()),
                TensorLayout({m, n}, {2, k2 + 1}, dtype::Int32()));
        args.emplace_back(
                TensorLayout({m, n}, {2, k2}, dtype::Int32()),
                TensorLayout({m, n}, {k2 + 1, 2}, dtype::Int32()));
        args.emplace_back(
                TensorLayout({m, n}, {k2, 2}, dtype::Int32()),
                TensorLayout({m, n}, {2, k2 + 1}, dtype::Int32()));
        args.emplace_back(
                TensorLayout({m, n}, {k2, 1}, dtype::Int32()),
                TensorLayout({m, n}, {k2 + 1, 1}, dtype::Int32()));
        args.emplace_back(
                TensorLayout({m, n}, {1, k2}, dtype::Int32()),
                TensorLayout({m, n}, {1, k2 + 1}, dtype::Int32()));
        args.emplace_back(
                TensorLayout({m, n}, {1, k2}, dtype::Int32()),
                TensorLayout({m, n}, {k2 + 1, 1}, dtype::Int32()));
        args.emplace_back(
                TensorLayout({m, n}, {k2, 1}, dtype::Int32()),
                TensorLayout({m, n}, {1, k2 + 1}, dtype::Int32()));
206 207 208 209 210 211
    }
    {
        // 3d
        size_t m = 20, n = 30, k = 40;
        ptrdiff_t k2 = k;
        args.emplace_back(
M
Megvii Engine Team 已提交
212 213 214
                TensorLayout({m, n, k}, {k2 * k2 * 4, k2 * 3, 2}, dtype::Int32()),
                TensorLayout(
                        {m, n, k}, {2 * k2 * k2 * k2 * 4, k2 * 3, 2}, dtype::Int32()));
215 216 217 218 219 220 221 222 223 224
    }
    {
        // simplify_layout
        // 234..56
        // 2..3456
        args.emplace_back(
                TensorLayout(
                        {2, 3, 4, 5, 6},
                        {2 * 3 * 4 * 5 * 6, 2 * 4 * 5 * 6, 2 * 5 * 6, 6, 1},
                        dtype::Int32()),
M
Megvii Engine Team 已提交
225 226 227
                TensorLayout(
                        {2, 3, 4, 5, 6}, {4 * 3 * 4 * 5 * 6, 4 * 5 * 6, 5 * 6, 6, 1},
                        dtype::Int32()));
228 229 230 231 232 233 234 235 236
    }

    Checker<Relayout> checker(handle_rocm());
    for (auto&& arg : args) {
        checker.exec(TensorLayoutArray{arg.src, arg.dst});
    }
}

// vim: syntax=cpp.doxygen