/** * \file example/cpp_example/user_allocator.cpp * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #include "../example.h" #if LITE_BUILD_WITH_MGE using namespace lite; using namespace example; namespace { class CheckAllocator : public lite::Allocator { public: //! allocate memory of size in the given device with the given align void* allocate(LiteDeviceType, int, size_t size, size_t align) override { #ifdef WIN32 return _aligned_malloc(size, align); #elif defined(__ANDROID__) || defined(ANDROID) return memalign(align, size); #else void* ptr = nullptr; auto err = posix_memalign(&ptr, align, size); if (!err) { printf("failed to malloc %zu bytes with align %zu", size, align); } return ptr; #endif }; //! free the memory pointed by ptr in the given device void free(LiteDeviceType, int, void* ptr) override { #ifdef WIN32 _aligned_free(ptr); #else ::free(ptr); #endif }; }; } // namespace bool lite::example::config_user_allocator(const Args& args) { std::string network_path = args.model_path; std::string input_path = args.input_path; auto allocator = std::make_shared(); //! create and load the network std::shared_ptr network = std::make_shared(); Runtime::set_memory_allocator(network, allocator); network->load_model(network_path); //! set input data to input tensor std::shared_ptr input_tensor = network->get_input_tensor(0); //! copy or forward data to network size_t length = input_tensor->get_tensor_total_size_in_byte(); void* dst_ptr = input_tensor->get_memory_ptr(); auto src_tensor = parse_npy(input_path); void* src = src_tensor->get_memory_ptr(); memcpy(dst_ptr, src, length); //! forward network->forward(); network->wait(); //! get the output data or read tensor set in network_in std::shared_ptr output_tensor = network->get_output_tensor(0); void* out_data = output_tensor->get_memory_ptr(); size_t out_length = output_tensor->get_tensor_total_size_in_byte() / output_tensor->get_layout().get_elem_size(); printf("length=%zu\n", length); float max = -1.0f; float sum = 0.0f; for (size_t i = 0; i < out_length; i++) { float data = static_cast(out_data)[i]; sum += data; if (max < data) max = data; } printf("max=%e, sum=%e\n", max, sum); return true; } #endif // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}