user_allocator.cpp 2.9 KB
Newer Older
1
/**
2
 * \file example/cpp_example/user_allocator.cpp
3
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
4
 *
5
 * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
6
 *
7 8 9
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
 */

#include "../example.h"
#if LITE_BUILD_WITH_MGE
using namespace lite;
using namespace example;

namespace {
class CheckAllocator : public lite::Allocator {
public:
    //! allocate memory of size in the given device with the given align
    void* allocate(LiteDeviceType, int, size_t size, size_t align) override {
#ifdef WIN32
        return _aligned_malloc(size, align);
#elif defined(__ANDROID__) || defined(ANDROID)
        return memalign(align, size);
#else
        void* ptr = nullptr;
        auto err = posix_memalign(&ptr, align, size);
        if (!err) {
            printf("failed to malloc %zu bytes with align %zu", size, align);
        }
        return ptr;
#endif
    };

    //! free the memory pointed by ptr in the given device
    void free(LiteDeviceType, int, void* ptr) override {
#ifdef WIN32
        _aligned_free(ptr);
#else
        ::free(ptr);
#endif
    };
};
}  // namespace

bool lite::example::config_user_allocator(const Args& args) {
    std::string network_path = args.model_path;
    std::string input_path = args.input_path;

    auto allocator = std::make_shared<CheckAllocator>();

    //! create and load the network
    std::shared_ptr<Network> network = std::make_shared<Network>();

    Runtime::set_memory_allocator(network, allocator);

    network->load_model(network_path);

    //! set input data to input tensor
    std::shared_ptr<Tensor> input_tensor = network->get_input_tensor(0);
    //! copy or forward data to network
    size_t length = input_tensor->get_tensor_total_size_in_byte();
    void* dst_ptr = input_tensor->get_memory_ptr();
    auto src_tensor = parse_npy(input_path);
    void* src = src_tensor->get_memory_ptr();
    memcpy(dst_ptr, src, length);

    //! forward
    network->forward();
    network->wait();

    //! get the output data or read tensor set in network_in
    std::shared_ptr<Tensor> output_tensor = network->get_output_tensor(0);
    void* out_data = output_tensor->get_memory_ptr();
    size_t out_length = output_tensor->get_tensor_total_size_in_byte() /
                        output_tensor->get_layout().get_elem_size();
    printf("length=%zu\n", length);
    float max = -1.0f;
    float sum = 0.0f;
    for (size_t i = 0; i < out_length; i++) {
        float data = static_cast<float*>(out_data)[i];
        sum += data;
        if (max < data)
            max = data;
    }
    printf("max=%e, sum=%e\n", max, sum);
    return true;
}
#endif
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}