tensorrt_runtime.cpp 9.7 KB
Newer Older
1 2 3 4
/**
 * \file src/tensorrt/test/tensorrt_runtime.cpp
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 *
5
 * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
6 7 8
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
9 10
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.
11 12 13
 */

#include "megbrain/comp_node_env.h"
M
Megvii Engine Team 已提交
14
#include "megbrain/opr/basic_arith.h"
15 16 17 18 19 20 21
#include "megbrain/test/autocheck.h"
#include "megbrain/test/helper.h"
#include "megbrain/test/megdnn_helper.h"
#include "megbrain/utils/debug.h"

#if MGB_ENABLE_TENSOR_RT

22
#include "make_trt_net.h"
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
#include "megbrain/tensorrt/tensorrt_opr.h"
#include "megbrain/tensorrt/tensorrt_runtime_opr.h"

#include <random>

using namespace mgb;
using namespace nvinfer1;

template <typename T>
using TensorRTUniquePtr = intl::TensorRTUniquePtr<T>;

TEST(TestOprTensorRT, RuntimeBasic) {
    REQUIRE_GPU(1);
    intl::SimpleTensorRTNetwork net;
    auto make_trt = [&net]() {
        auto p = net.create_trt_network(false);
        TensorRTUniquePtr<INetworkDefinition> trt_net{p.second, {}};
        TensorRTUniquePtr<IBuilder> builder{p.first, {}};
        builder->setMaxBatchSize(5);
#if NV_TENSOR_RT_VERSION >= 6001
M
Megvii Engine Team 已提交
43
        TensorRTUniquePtr<IBuilderConfig> build_config{builder->createBuilderConfig()};
44 45 46
        TensorRTUniquePtr<ICudaEngine> cuda_engine{
                builder->buildEngineWithConfig(*trt_net, *build_config)};
#else
M
Megvii Engine Team 已提交
47
        TensorRTUniquePtr<ICudaEngine> cuda_engine{builder->buildCudaEngine(*trt_net)};
48 49 50 51 52 53 54 55
#endif
        TensorRTUniquePtr<IHostMemory> mem{cuda_engine->serialize(), {}};
        return TensorRTRuntimeOpr::make(mem->data(), mem->size(), {net.x})[0];
    };
    auto y2 = make_trt();

    HostTensorND host_z1;
    HostTensorND host_z2;
M
Megvii Engine Team 已提交
56 57
    auto func = net.graph->compile(
            {make_callback_copy(net.y, host_z1), make_callback_copy(y2, host_z2)});
58
    func->execute();
59
    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 5e-4);
60 61
}

62 63 64 65 66 67 68 69 70
TEST(TestOprTensorRT, RuntimeBasicBatched) {
    REQUIRE_GPU(1);
    intl::BatchedTensorRTNetwork net;
    auto make_trt = [&net]() {
        auto p = net.create_trt_network(false);
        TensorRTUniquePtr<INetworkDefinition> trt_net{p.second, {}};
        TensorRTUniquePtr<IBuilder> builder{p.first, {}};
        builder->setMaxBatchSize(5);
#if NV_TENSOR_RT_VERSION >= 6001
M
Megvii Engine Team 已提交
71
        TensorRTUniquePtr<IBuilderConfig> build_config{builder->createBuilderConfig()};
72 73 74
        TensorRTUniquePtr<ICudaEngine> cuda_engine{
                builder->buildEngineWithConfig(*trt_net, *build_config)};
#else
M
Megvii Engine Team 已提交
75
        TensorRTUniquePtr<ICudaEngine> cuda_engine{builder->buildCudaEngine(*trt_net)};
76 77
#endif
        TensorRTUniquePtr<IHostMemory> mem{cuda_engine->serialize(), {}};
78
        auto nx = opr::Broadcast::make(
M
Megvii Engine Team 已提交
79
                net.x, {1, net.x.shape()[0], net.x.shape()[1], net.x.shape()[2]});
80 81 82 83 84 85
        return TensorRTRuntimeOpr::make(mem->data(), mem->size(), {nx})[0];
    };
    auto y2 = make_trt();

    HostTensorND host_z1;
    HostTensorND host_z2;
M
Megvii Engine Team 已提交
86 87
    auto func = net.graph->compile(
            {make_callback_copy(net.y, host_z1), make_callback_copy(y2, host_z2)});
88 89 90 91
    func->execute();
    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 5e-4);
}

92 93 94 95 96 97 98 99 100 101 102
TEST(TestOprTensorRT, ConcatRuntimeBasic) {
    REQUIRE_GPU(1);
    intl::ConcatConvTensorRTNetwork net;

    SymbolVar y2;
    {
        auto p = net.create_trt_network(false);
        TensorRTUniquePtr<INetworkDefinition> trt_net{p.second, {}};
        TensorRTUniquePtr<IBuilder> builder{p.first, {}};
        builder->setMaxBatchSize(5);
#if NV_TENSOR_RT_VERSION >= 6001
M
Megvii Engine Team 已提交
103 104
        TensorRTUniquePtr<IBuilderConfig> build_config{builder->createBuilderConfig()};
        auto cuda_engine = builder->buildEngineWithConfig(*trt_net, *build_config);
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
#else
        auto cuda_engine = builder->buildCudaEngine(*trt_net);
#endif
        TensorRTUniquePtr<IHostMemory> mem{cuda_engine->serialize(), {}};

        FILE* fout = fopen(output_file("trt_cuda_engine").c_str(), "wb");
        auto wr = fwrite(mem->data(), 1, mem->size(), fout);
        mgb_assert(wr == mem->size());
        fclose(fout);

        y2 = TensorRTRuntimeOpr::make(
                TensorRTRuntimeOpr::to_shared_ptr_engine(cuda_engine), {},
                {net.x0, net.x1})[0];
    }

    HostTensorND host_z1;
    HostTensorND host_z2;
M
Megvii Engine Team 已提交
122 123
    auto func = net.graph->compile(
            {make_callback_copy(net.y, host_z1), make_callback_copy(y2, host_z2)});
124 125 126 127 128 129 130 131 132 133 134 135 136
    func->execute();
    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4);
}

TEST(TestOprTensorRT, RuntimeChangeBatchSize) {
    REQUIRE_GPU(1);
    intl::SimpleTensorRTNetwork net;
    auto make_trt = [&net]() {
        auto p = net.create_trt_network(false);
        TensorRTUniquePtr<INetworkDefinition> trt_net{p.second, {}};
        TensorRTUniquePtr<IBuilder> builder{p.first, {}};
        builder->setMaxBatchSize(10);
#if NV_TENSOR_RT_VERSION >= 6001
M
Megvii Engine Team 已提交
137
        TensorRTUniquePtr<IBuilderConfig> build_config{builder->createBuilderConfig()};
138 139 140
        TensorRTUniquePtr<ICudaEngine> cuda_engine{
                builder->buildEngineWithConfig(*trt_net, *build_config)};
#else
M
Megvii Engine Team 已提交
141
        TensorRTUniquePtr<ICudaEngine> cuda_engine{builder->buildCudaEngine(*trt_net)};
142 143 144 145 146 147 148 149
#endif
        TensorRTUniquePtr<IHostMemory> mem{cuda_engine->serialize(), {}};
        return TensorRTRuntimeOpr::make(mem->data(), mem->size(), {net.x})[0];
    };
    auto y2 = make_trt();

    HostTensorND host_z1;
    HostTensorND host_z2;
M
Megvii Engine Team 已提交
150 151
    auto func = net.graph->compile(
            {make_callback_copy(net.y, host_z1), make_callback_copy(y2, host_z2)});
152
    func->execute();
153
    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 5e-4);
154 155
    *net.host_x = *net.gen({1, 23, 28, 28});
    func->execute();
156
    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 5e-4);
157 158
    *net.host_x = *net.gen({10, 23, 28, 28});
    func->execute();
159
    MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 5e-4);
160 161
}

162 163 164 165 166 167 168 169 170 171
#if NV_TENSOR_RT_VERSION >= 6001
TEST(TestOprTensorRT, IOFormatFree) {
    size_t N = 1, C = 3, H = 7, W = 7;
    REQUIRE_GPU(1);
    TensorRTUniquePtr<IBuilder> builder{
            createInferBuilder(TensorRTOpr::Logger::instance()), {}};
    nvinfer1::NetworkDefinitionCreationFlags flags;
    ::memset(&flags, 0, sizeof(nvinfer1::NetworkDefinitionCreationFlags));
    flags = 1 << static_cast<int>(
                    nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
M
Megvii Engine Team 已提交
172
    TensorRTUniquePtr<INetworkDefinition> network{builder->createNetworkV2(flags), {}};
173 174 175
    auto cast = [](size_t i) { return static_cast<int>(i); };
    ITensor* data = network->addInput(
            "data", DataType::kINT8, Dims4{cast(N), cast(C), cast(H), cast(W)});
M
Megvii Engine Team 已提交
176
    TensorFormats formats = 1 << static_cast<int>(nvinfer1::TensorFormat::kCHW4);
177 178 179 180 181 182 183 184 185
    data->setAllowedFormats(formats);
    data->setDynamicRange(-127.f * 1.2f, 127.f * 1.2f);
    HostTensorGenerator<> fgen;
    auto mean = fgen({N, C, H, W});
    Weights mean_weights{DataType::kFLOAT, nullptr, 0};
    mean_weights.values = mean->raw_ptr();
    mean_weights.count = N * C * H * W;
    auto constant = network->addConstant(
            Dims4{cast(N), cast(C), cast(H), cast(W)}, mean_weights);
M
Megvii Engine Team 已提交
186 187
    auto out = network->addElementWise(
            *network->getInput(0), *constant->getOutput(0), ElementWiseOperation::kSUB);
188 189 190 191 192 193
    out->getOutput(0)->setDynamicRange(-127.f * 2.3f, 127.f * 2.3f);
    network->markOutput(*out->getOutput(0));
    network->getInput(0)->setType(DataType::kINT8);
    network->getOutput(0)->setType(DataType::kFLOAT);
    network->getOutput(0)->setAllowedFormats(
            1 << static_cast<int>(nvinfer1::TensorFormat::kLINEAR));
M
Megvii Engine Team 已提交
194
    TensorRTUniquePtr<IBuilderConfig> build_config{builder->createBuilderConfig()};
195 196 197 198 199 200 201 202 203
    build_config->setFlag(BuilderFlag::kINT8);
    build_config->setFlag(BuilderFlag::kSTRICT_TYPES);
    TensorRTUniquePtr<ICudaEngine> cuda_engine{
            builder->buildEngineWithConfig(*network, *build_config)};
    TensorRTUniquePtr<IHostMemory> mem{cuda_engine->serialize(), {}};

    HostTensorGenerator<dtype::Int8> gen;
    auto graph = ComputingGraph::make();
    graph->options().graph_opt_level = 0;
M
Megvii Engine Team 已提交
204
    auto mkvar = [&](const char* name, const TensorShape& shp, const DType& dtype) {
205
        return opr::TypeCvt::make(
M
Megvii Engine Team 已提交
206
                opr::Host2DeviceCopy::make(*graph, gen(shp)).rename(name), dtype);
207 208 209 210 211 212 213 214 215 216
    };
    auto x = mkvar("x", {N, C, H, W}, dtype::QuantizedS8(1.2f));
    auto fx = opr::TypeCvt::make(x, dtype::Float32());
    auto wval = opr::SharedDeviceTensor::make(*graph, *mean).rename("mean");
    auto z = fx - wval;
    HostTensorND y1;
    auto func1 = graph->compile({make_callback_copy(z, y1)});
    func1->execute();

    TensorShape shp{N, 1, H, W};
M
Megvii Engine Team 已提交
217 218
    auto host =
            std::make_shared<HostTensorND>(x.node()->comp_node(), x.node()->dtype());
219 220 221 222 223 224 225 226 227 228 229 230 231 232
    host->resize(shp);
    auto ptr = host->raw_ptr();
    size_t size_bytes = TensorLayout{shp, x.node()->dtype()}.span().dist_byte();
    std::memset(ptr, 0, size_bytes);
    auto padding = opr::ImmutableTensor::make(*graph, *host);
    x = opr::Concat::make({x, padding}, 1);

    auto nchw2nchw4 = [](SymbolVar x) {
        auto xshp = opr::GetVarShape::make(x);

        auto cv = [&x](int v) { return x.make_scalar(v); };
        auto sub = [&xshp, &cv](int idx) {
            return opr::IndexAt::make(xshp, {{0, cv(idx)}});
        };
M
Megvii Engine Team 已提交
233
        auto tshp = opr::Concat::make({sub(0), sub(1) / 4, cv(4), sub(2), sub(3)}, 0);
234 235 236 237 238 239 240 241 242 243 244 245 246
        auto y0 = opr::Reshape::make(x, tshp);
        auto y1 = opr::Dimshuffle::make(y0, {0, 1, 3, 4, 2});
        return y1;
    };
    x = nchw2nchw4(x);
    auto trt = TensorRTRuntimeOpr::make(mem->data(), mem->size(), {x})[0];
    HostTensorND y2;
    auto func2 = graph->compile({make_callback_copy(trt, y2)});
    func2->execute();
    MGB_ASSERT_TENSOR_EQ(y1, y2);
}
#endif

247 248 249
#endif  // MGB_ENABLE_TENSOR_RT

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}