diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b13f7122f65beefa99a71d1849b8a569978ccad3..9fd34aa90e8e3f939a5bc2351a021f8c43f6b3ca 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -9,7 +9,7 @@ cpplint: - master script: - curl -o cpplint.py https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py - - python cpplint.py --root=mace --linelength=80 --counting=detailed $(find mace -name *.h -or -name *.cc) + - python cpplint.py --linelength=80 --counting=detailed $(find mace -name *.h -or -name *.cc) ops_test: stage: ops_test diff --git a/WORKSPACE b/WORKSPACE index 783ddc7acf36eea8ebeeafd8ea063e03958e663d..1f22f3276fe2cc74282ce2acf438d341298a4170 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -55,7 +55,7 @@ new_git_repository( name = "opencl_clhpp", build_file = "mace/third_party/opencl-clhpp.BUILD", commit = "4c6f7d56271727e37fb19a9b47649dd175df2b12", - remote = "https://github.com/KhronosGroup/OpenCL-CLHPP.git", + remote = "http://v9.git.n.xiaomi.com/deep-computing/OpenCL-CLHPP-Mirror.git", ) new_git_repository( diff --git a/docker/Dockerfile b/docker/Dockerfile index 498d1ea006d692ac10ec211fb9e99c6b25b4986d..6d83d0e417214f99a00d7c101c07016b89c7b798 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -117,9 +117,3 @@ RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com # Download tensorflow tools RUN wget http://cnbj1-inner-fds.api.xiaomi.net/mace/tool/transform_graph && \ chmod +x transform_graph - -# Install gitlab runner -RUN curl -L https://packages.gitlab.com/install/repositories/runner/gitlab-ci-multi-runner/script.deb.sh | bash -RUN apt-get install gitlab-ci-multi-runner - -ENTRYPOINT gitlab-runner run diff --git a/docker/gitlab-runner/Dockerfile b/docker/gitlab-runner/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..13984e4cde4c1caea4f96e3b0c36b4f8d1f9e60e --- /dev/null +++ b/docker/gitlab-runner/Dockerfile @@ -0,0 +1,12 @@ +FROM cr.d.xiaomi.net/mace/mace-dev:latest + +# Update source +# Looks like mirrors.163.com does not work in xiaomi network +# RUN sed -i 's/http:\/\/archive\.ubuntu\.com\/ubuntu\//http:\/\/mirrors\.163\.com\/ubuntu\//g' /etc/apt/sources.list +RUN apt-get update -y + +# Install gitlab runner +RUN curl -L https://packages.gitlab.com/install/repositories/runner/gitlab-ci-multi-runner/script.deb.sh | bash +RUN apt-get install gitlab-ci-multi-runner + +ENTRYPOINT gitlab-runner run diff --git a/mace/benchmark/BUILD b/mace/benchmark/BUILD index 2cdc00e180175be1237545a8b1d4e7cbfb876633..9334e7473acb0e706cdd34840228d16cc38578b3 100644 --- a/mace/benchmark/BUILD +++ b/mace/benchmark/BUILD @@ -16,7 +16,8 @@ cc_library( hdrs = ["stat_summarizer.h"], linkstatic = 1, deps = [ - "//mace/core", + "//mace/public", + "//mace/utils", ], ) diff --git a/mace/core/allocator.h b/mace/core/allocator.h index eebbb32bec2edf4424eeff552aa21e69123a80ae..a9f76fcab7d91d8c81a76d181f9515998fb2f72a 100644 --- a/mace/core/allocator.h +++ b/mace/core/allocator.h @@ -7,6 +7,9 @@ #define MACE_CORE_ALLOCATOR_H_ #include +#include +#include +#include #include "mace/core/registry.h" #include "mace/core/types.h" @@ -81,7 +84,7 @@ class CPUAllocator : public Allocator { free(data); }; void *Map(void *buffer, size_t offset, size_t nbytes) const override { - return (char *)buffer + offset; + return reinterpret_cast(buffer) + offset; } void *MapImage(void *buffer, const std::vector &image_shape, diff --git a/mace/core/arg_helper.cc b/mace/core/arg_helper.cc index 8b6d57fbba00ce55bd0a9bcace65fb9714379e25..207d2de91acc2a5b3efc55df95ff2b94a89c9f04 100644 --- a/mace/core/arg_helper.cc +++ b/mace/core/arg_helper.cc @@ -2,6 +2,9 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // +#include +#include + #include "mace/core/arg_helper.h" #include "mace/utils/logging.h" diff --git a/mace/core/arg_helper.h b/mace/core/arg_helper.h index 25137804016c442c96dd88f683f810ff4288e5b9..ab8e14b99bf9baa078dd37129a3a089d4fb6096f 100644 --- a/mace/core/arg_helper.h +++ b/mace/core/arg_helper.h @@ -5,6 +5,8 @@ #ifndef MACE_CORE_ARG_HELPER_H_ #define MACE_CORE_ARG_HELPER_H_ +#include +#include #include #include "mace/public/mace.h" diff --git a/mace/core/buffer.h b/mace/core/buffer.h index 38c577a582e4f2dcf668cad26fd54b82967836c7..e6d433e62dae922d62a4f554fb87b1fcfb2cb08f 100644 --- a/mace/core/buffer.h +++ b/mace/core/buffer.h @@ -6,6 +6,8 @@ #define MACE_CORE_BUFFER_H_ #include +#include + #include "mace/core/allocator.h" #include "mace/core/types.h" @@ -14,7 +16,7 @@ namespace mace { class BufferBase { public: BufferBase() : size_(0) {} - BufferBase(index_t size) : size_(size) {} + explicit BufferBase(index_t size) : size_(size) {} virtual ~BufferBase() {} virtual void *buffer() = 0; @@ -39,7 +41,7 @@ class BufferBase { virtual bool OnHost() const = 0; - virtual index_t offset() const { return 0; }; + virtual index_t offset() const { return 0; } template const T *data() const { @@ -59,7 +61,7 @@ class BufferBase { class Buffer : public BufferBase { public: - Buffer(Allocator *allocator) + explicit Buffer(Allocator *allocator) : BufferBase(0), allocator_(allocator), buf_(nullptr), @@ -93,7 +95,7 @@ class Buffer : public BufferBase { void *buffer() { MACE_CHECK_NOTNULL(buf_); return buf_; - }; + } const void *raw_data() const { if (OnHost()) { @@ -129,7 +131,7 @@ class Buffer : public BufferBase { void Map(std::vector *pitch) { MACE_CHECK(mapped_buf_ == nullptr, "buf has been already mapped"); mapped_buf_ = Map(0, size_, pitch); - }; + } void UnMap() { UnMap(mapped_buf_); @@ -151,7 +153,7 @@ class Buffer : public BufferBase { void Copy(void *src, index_t offset, index_t length) { MACE_CHECK_NOTNULL(mapped_buf_); MACE_CHECK(length <= size_, "out of buffer"); - memcpy(mapped_buf_, (char *)src + offset, length); + memcpy(mapped_buf_, reinterpret_cast(src) + offset, length); } bool OnHost() const { return allocator_->OnHost(); } @@ -197,7 +199,7 @@ class Image : public BufferBase { void *buffer() { MACE_CHECK_NOTNULL(buf_); return buf_; - }; + } const void *raw_data() const { MACE_CHECK_NOTNULL(mapped_buf_); @@ -227,12 +229,12 @@ class Image : public BufferBase { MACE_CHECK(mapped_buf_ == nullptr, "buf has been already mapped"); MACE_CHECK_NOTNULL(pitch); mapped_buf_ = allocator_->MapImage(buf_, shape_, pitch); - }; + } void UnMap() { UnMap(mapped_buf_); mapped_buf_ = nullptr; - }; + } void Resize(index_t size) { MACE_NOT_IMPLEMENTED; } @@ -276,12 +278,12 @@ class BufferSlice : public BufferBase { void *buffer() { MACE_CHECK_NOTNULL(buffer_); return buffer_->buffer(); - }; + } const void *raw_data() const { if (OnHost()) { MACE_CHECK_NOTNULL(buffer_); - return (char *)buffer_->raw_data() + offset_; + return reinterpret_cast(buffer_->raw_data()) + offset_; } else { MACE_CHECK_NOTNULL(mapped_buf_); return mapped_buf_; @@ -304,13 +306,13 @@ class BufferSlice : public BufferBase { MACE_CHECK_NOTNULL(buffer_); MACE_CHECK(mapped_buf_ == nullptr, "mapped buf is not null"); mapped_buf_ = buffer_->Map(offset_, length_, pitch); - }; + } void UnMap() { MACE_CHECK_NOTNULL(mapped_buf_); buffer_->UnMap(mapped_buf_); mapped_buf_ = nullptr; - }; + } void Resize(index_t size) { MACE_NOT_IMPLEMENTED; } @@ -326,6 +328,6 @@ class BufferSlice : public BufferBase { index_t offset_; index_t length_; }; -} +} // namespace mace #endif // MACE_CORE_BUFFER_H_ diff --git a/mace/core/mace.cc b/mace/core/mace.cc index e4d25c7d114dc5c9a28e457a6bb0888a887aa92a..f1f0d59a4716f738ee07c11598d68e4490ca0c31 100644 --- a/mace/core/mace.cc +++ b/mace/core/mace.cc @@ -459,7 +459,7 @@ MaceEngine::~MaceEngine() { MACE_CHECK(hexagon_controller_->TeardownGraph(), "hexagon teardown error"); MACE_CHECK(hexagon_controller_->Finalize(), "hexagon finalize error"); } -}; +} bool MaceEngine::Run(const float *input, const std::vector &input_shape, @@ -493,7 +493,6 @@ bool MaceEngine::Run(const float *input, auto shape = output_tensor->shape(); int64_t output_size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); - // TODO: check for overflow exception. std::memcpy(output, output_tensor->data(), output_size * sizeof(float)); return true; @@ -530,7 +529,6 @@ bool MaceEngine::Run(const std::vector &inputs, int64_t output_size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); MACE_CHECK(!shape.empty()) << "Output's shape must greater than 0"; - // TODO: check for overflow exception. std::memcpy(output.second, output_tensor->data(), output_size * sizeof(float)); } else { diff --git a/mace/core/net.cc b/mace/core/net.cc index 2439a67fee31f1e402efe7aff4f1c87daf5ad6b2..02efc1a41ebcdde25df4b91ebe7e679d8e4434a1 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -2,6 +2,8 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // +#include + #include "mace/core/net.h" #include "mace/utils/memory_logging.h" #include "mace/utils/timer.h" diff --git a/mace/core/net.h b/mace/core/net.h index 3b625393cf6fef8fd8a6a8336a60233b2c5bf087..e14297222a933f73640fff3736664d8c0f1b1f84 100644 --- a/mace/core/net.h +++ b/mace/core/net.h @@ -5,6 +5,10 @@ #ifndef MACE_CORE_NET_H_ #define MACE_CORE_NET_H_ +#include +#include +#include + #include "mace/core/operator.h" #include "mace/public/mace.h" diff --git a/mace/core/operator.cc b/mace/core/operator.cc index c670d9aa729dc575a204eacf3789fd56675df4a9..ae6ca107e1f13e72958f401e88cdde5af6005d98 100644 --- a/mace/core/operator.cc +++ b/mace/core/operator.cc @@ -3,6 +3,9 @@ // #include +#include +#include +#include #include "mace/core/operator.h" diff --git a/mace/core/operator.h b/mace/core/operator.h index a163c0c89b5e4da2feba23232027474e8930f3a4..27afdadd3a3b4c8bfbbe613c36ca558025f7c606 100644 --- a/mace/core/operator.h +++ b/mace/core/operator.h @@ -2,8 +2,13 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#ifndef MACE_CORE_OPERATOR_H -#define MACE_CORE_OPERATOR_H +#ifndef MACE_CORE_OPERATOR_H_ +#define MACE_CORE_OPERATOR_H_ + +#include +#include +#include +#include #include "mace/core/arg_helper.h" #include "mace/core/future.h" @@ -100,7 +105,7 @@ class Operator : public OperatorBase { } } } - virtual bool Run(StatsFuture *future) override = 0; + bool Run(StatsFuture *future) override = 0; ~Operator() noexcept override {} }; @@ -150,7 +155,7 @@ class OperatorRegistry { RegistryType; OperatorRegistry(); ~OperatorRegistry() = default; - RegistryType *registry() { return ®istry_; }; + RegistryType *registry() { return ®istry_; } std::unique_ptr CreateOperator(const OperatorDef &operator_def, Workspace *ws, DeviceType type, @@ -171,4 +176,4 @@ MACE_DECLARE_REGISTRY(OpRegistry, } // namespace mace -#endif // MACE_CORE_OPERATOR_H +#endif // MACE_CORE_OPERATOR_H_ diff --git a/mace/core/preallocated_pooled_allocator.h b/mace/core/preallocated_pooled_allocator.h index 0299d2f4cf83d89f4871806e6b504985a345e374..80e7fa659bb3c566a47e4afea87dc40d114fba22 100644 --- a/mace/core/preallocated_pooled_allocator.h +++ b/mace/core/preallocated_pooled_allocator.h @@ -5,7 +5,10 @@ #ifndef MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_ #define MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_ +#include +#include #include + #include "mace/core/allocator.h" #include "mace/core/buffer.h" diff --git a/mace/core/registry.h b/mace/core/registry.h index 07eaa01b76b1ff2df005eadf46cce7c055b79c49..0e1e85d2e6c3d9bcfb6663ff82001206dc3de9e2 100644 --- a/mace/core/registry.h +++ b/mace/core/registry.h @@ -7,7 +7,7 @@ #include #include -#include +#include // NOLINT(build/c++11) #include #include diff --git a/mace/core/runtime/cpu/cpu_runtime.cc b/mace/core/runtime/cpu/cpu_runtime.cc index 4c2cd851d4bd502a1096b8523a85d9e3cf61fcfd..555372ab8da152690237e12dd7f26a20c3db8468 100644 --- a/mace/core/runtime/cpu/cpu_runtime.cc +++ b/mace/core/runtime/cpu/cpu_runtime.cc @@ -2,19 +2,21 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include "mace/public/mace.h" -#include "mace/utils/logging.h" #include #include #include +#include +#include "mace/core/runtime/cpu/cpu_runtime.h" +#include "mace/public/mace.h" +#include "mace/utils/logging.h" namespace mace { namespace { -static int GetCPUMaxFreq(int cpu_id) { +int GetCPUMaxFreq(int cpu_id) { char path[64]; - sprintf(path, + snprintf(path, sizeof(path), "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpu_id); FILE *fp = fopen(path, "rb"); @@ -26,24 +28,25 @@ static int GetCPUMaxFreq(int cpu_id) { return freq; } -static void SortCPUIdsByMaxFreqAsc(std::vector &cpu_ids) { - int cpu_count = cpu_ids.size(); +void SortCPUIdsByMaxFreqAsc(std::vector *cpu_ids) { + MACE_CHECK_NOTNULL(cpu_ids); + int cpu_count = cpu_ids->size(); std::vector cpu_max_freq; cpu_max_freq.resize(cpu_count); // set cpu max frequency for (int i = 0; i < cpu_count; ++i) { cpu_max_freq[i] = GetCPUMaxFreq(i); - cpu_ids[i] = i; + (*cpu_ids)[i] = i; } // sort cpu ids by max frequency asc, bubble sort for (int i = 0; i < cpu_count - 1; ++i) { for (int j = i + 1; j < cpu_count; ++j) { if (cpu_max_freq[i] > cpu_max_freq[j]) { - int tmp = cpu_ids[i]; - cpu_ids[i] = cpu_ids[j]; - cpu_ids[j] = tmp; + int tmp = (*cpu_ids)[i]; + (*cpu_ids)[i] = (*cpu_ids)[j]; + (*cpu_ids)[j] = tmp; tmp = cpu_max_freq[i]; cpu_max_freq[i] = cpu_max_freq[j]; @@ -53,11 +56,12 @@ static void SortCPUIdsByMaxFreqAsc(std::vector &cpu_ids) { } } -static void SetThreadAffinity(cpu_set_t mask) { +void SetThreadAffinity(cpu_set_t mask) { int sys_call_res; pid_t pid = gettid(); - // TODO: when set omp num threads to 1, sometiomes return EINVAL(22) error + // TODO(chenghui): when set omp num threads to 1, + // sometiomes return EINVAL(22) error. // https://linux.die.net/man/2/sched_setaffinity sys_call_res = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask); if (sys_call_res != 0) { @@ -68,12 +72,11 @@ static void SetThreadAffinity(cpu_set_t mask) { } // namespace void SetCPURuntime(int omp_num_threads, CPUPowerOption power_option) { - int cpu_count = omp_get_num_procs(); LOG(INFO) << "cpu_count: " << cpu_count; std::vector sorted_cpu_ids; sorted_cpu_ids.resize(cpu_count); - SortCPUIdsByMaxFreqAsc(sorted_cpu_ids); + SortCPUIdsByMaxFreqAsc(&sorted_cpu_ids); std::vector use_cpu_ids; if (power_option == CPUPowerOption::DEFAULT || omp_num_threads >= cpu_count) { @@ -92,7 +95,7 @@ void SetCPURuntime(int omp_num_threads, CPUPowerOption power_option) { // compute mask cpu_set_t mask; CPU_ZERO(&mask); - for (auto cpu_id: use_cpu_ids) { + for (auto cpu_id : use_cpu_ids) { CPU_SET(cpu_id, &mask); } LOG(INFO) << "use cpus mask: " << mask.__bits[0]; diff --git a/mace/core/runtime/cpu/cpu_runtime.h b/mace/core/runtime/cpu/cpu_runtime.h index 13dfd680236663e5af516b004306f864013fe3e4..f80ca1b824bcc097b2276ae253ca2b4d702fab66 100644 --- a/mace/core/runtime/cpu/cpu_runtime.h +++ b/mace/core/runtime/cpu/cpu_runtime.h @@ -3,8 +3,8 @@ // -#ifndef MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H -#define MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H +#ifndef MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H_ +#define MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H_ #include "mace/public/mace.h" @@ -14,4 +14,4 @@ void SetCPURuntime(int omp_num_threads, CPUPowerOption power_option); } -#endif //MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H +#endif // MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H_ diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc index 2828ffa0e2056c180abbf93b6f35bb36edfcc851..138fd933a0848abb6b5300a5c97b0cc62e659f90 100644 --- a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc +++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc @@ -3,8 +3,11 @@ // #include -#include +#include // NOLINT(build/c++11) #include +#include +#include +#include #include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" #include "mace/core/runtime/hexagon/hexagon_nn_ops.h" @@ -324,7 +327,7 @@ bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor, MACE_ASSERT(output_bytes == output_tensor->raw_size(), "wrong output bytes inferred."); return res == 0; -}; +} bool HexagonControlWrapper::ExecuteGraphNew( const std::vector &input_tensors, @@ -374,7 +377,7 @@ bool HexagonControlWrapper::ExecuteGraphNew( delete[] inputs; delete[] outputs; return res == 0; -}; +} bool HexagonControlWrapper::ExecuteGraphPreQuantize(const Tensor &input_tensor, Tensor *output_tensor) { diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.h b/mace/core/runtime/hexagon/hexagon_control_wrapper.h index 8cb3b359010b18e3afc350179d38060d7aa07b76..01f69749fcf050c83b953213f76122d676bde99c 100644 --- a/mace/core/runtime/hexagon/hexagon_control_wrapper.h +++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.h @@ -2,8 +2,8 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#ifndef MACE_DSP_HEXAGON_CONTROL_WRAPPER_H_ -#define MACE_DSP_HEXAGON_CONTROL_WRAPPER_H_ +#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROL_WRAPPER_H_ +#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROL_WRAPPER_H_ #include @@ -16,7 +16,7 @@ namespace mace { class HexagonControlWrapper { public: - HexagonControlWrapper(){}; + HexagonControlWrapper() {} int GetVersion(); bool Config(); bool Init(); @@ -53,6 +53,6 @@ class HexagonControlWrapper { DISABLE_COPY_AND_ASSIGN(HexagonControlWrapper); }; -} +} // namespace mace -#endif // MACE_DSP_HEXAGON_CONTROL_WRAPPER_H_ +#endif // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROL_WRAPPER_H_ diff --git a/mace/core/runtime/hexagon/hexagon_controller.h b/mace/core/runtime/hexagon/hexagon_controller.h index 0e7d7596ca6e36218e7d6ed7e82112e63607dd97..11f77c1d834e7a6de42574e8853fb54e6c078381 100644 --- a/mace/core/runtime/hexagon/hexagon_controller.h +++ b/mace/core/runtime/hexagon/hexagon_controller.h @@ -1,5 +1,9 @@ -#ifndef MACE_DSP_HEXAGON_DSP_CONTROLLER_H_ -#define MACE_DSP_HEXAGON_DSP_CONTROLLER_H_ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROLLER_H_ +#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROLLER_H_ #include "mace/core/runtime/hexagon/hexagon_nn.h" @@ -18,4 +22,5 @@ int hexagon_controller_DeInitHexagon(); } #endif // __cplusplus -#endif // MACE_DSP_HEXAGON_DSP_CONTROLLER_H_ \ No newline at end of file +#endif // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_CONTROLLER_H_ + diff --git a/mace/core/runtime/hexagon/hexagon_nn.h b/mace/core/runtime/hexagon/hexagon_nn.h index 0baafd8cc7956fed751120504a876b4da4c06b56..7bb5bdefbe2fef4dfd13283969c69f4fd66eff2a 100644 --- a/mace/core/runtime/hexagon/hexagon_nn.h +++ b/mace/core/runtime/hexagon/hexagon_nn.h @@ -1,8 +1,43 @@ -#ifndef _HEXAGON_NN_H -#define _HEXAGON_NN_H +/* + * Copyright (c) 2016-2017, The Linux Foundation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted (subject to the limitations in the + * disclaimer below) provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * * Neither the name of The Linux Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE + * GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT + * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_NN_H_ +#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_NN_H_ #ifndef __QAIC_HEADER #define __QAIC_HEADER(ff) ff -#endif //__QAIC_HEADER +#endif // __QAIC_HEADER #ifndef __QAIC_HEADER_EXPORT #define __QAIC_HEADER_EXPORT @@ -14,7 +49,7 @@ #ifndef __QAIC_IMPL #define __QAIC_IMPL(ff) ff -#endif //__QAIC_IMPL +#endif // __QAIC_IMPL #ifndef __QAIC_IMPL_EXPORT #define __QAIC_IMPL_EXPORT @@ -186,4 +221,4 @@ __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute_new)( #ifdef __cplusplus } #endif -#endif //_HEXAGON_NN_H +#endif // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_NN_H_ diff --git a/mace/core/runtime/hexagon/hexagon_nn_ops.h b/mace/core/runtime/hexagon/hexagon_nn_ops.h index 8704ce8068d9b38de4bc43a815a700b55eb2b480..faad9c4d0f256084ca3b651c96feab9b7767467e 100644 --- a/mace/core/runtime/hexagon/hexagon_nn_ops.h +++ b/mace/core/runtime/hexagon/hexagon_nn_ops.h @@ -2,10 +2,12 @@ // Copyright (c) 2018 XiaoMi All rights reserved. // -#ifndef LIBMACE_HEXAGON_NN_OPS_H -#define LIBMACE_HEXAGON_NN_OPS_H +#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_NN_OPS_H_ +#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_NN_OPS_H_ +#include #include + #include "mace/utils/logging.h" namespace mace { @@ -15,7 +17,7 @@ namespace mace { typedef enum op_type_enum { #define DEF_OP(NAME, ...) OP_##NAME, -#include "mace/core/runtime/hexagon/ops.h" +#include "mace/core/runtime/hexagon/ops.h" // NOLINT(build/include) NN_OPS_MAX #undef DEF_OP @@ -26,7 +28,7 @@ class OpMap { void Init() { #define DEF_OP(NAME) op_map_[#NAME] = OP_##NAME; -#include "mace/core/runtime/hexagon/ops.h" +#include "mace/core/runtime/hexagon/ops.h" // NOLINT(build/include) #undef DEF_OP } @@ -45,4 +47,4 @@ class OpMap { }; } // namespace mace -#endif // LIBMACE_HEXAGON_NN_OPS_H +#endif // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_NN_OPS_H_ diff --git a/mace/core/runtime/hexagon/ops.h b/mace/core/runtime/hexagon/ops.h index 55b40413fdf4cbbca9ae69b93991aed9e5fe5a89..a01e71acab6cdae2fd96020d844767554307fe3d 100644 --- a/mace/core/runtime/hexagon/ops.h +++ b/mace/core/runtime/hexagon/ops.h @@ -1,3 +1,38 @@ +/* + * Copyright (c) 2016-2017, The Linux Foundation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted (subject to the limitations in the + * disclaimer below) provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * * Neither the name of The Linux Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE + * GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT + * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + /* * You probably want to * @@ -42,6 +77,8 @@ * * otherwise the interface becomes incompatible. */ +// NOLINT(build/header_guard) + DEF_OP(INPUT) DEF_OP(OUTPUT) DEF_OP(Nop) diff --git a/mace/core/runtime/hexagon/quantize.cc b/mace/core/runtime/hexagon/quantize.cc index c4548bcbc4902d03ce51acd79648c6299772dcfe..7330424aab6d893c84cee0915078995148c93ba9 100644 --- a/mace/core/runtime/hexagon/quantize.cc +++ b/mace/core/runtime/hexagon/quantize.cc @@ -2,6 +2,8 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // +#include + #include "mace/core/runtime/hexagon/quantize.h" namespace mace { @@ -93,4 +95,5 @@ void Quantizer::DeQuantize(const Tensor &in_tensor, } } -} // namespace mace \ No newline at end of file + +} // namespace mace diff --git a/mace/core/runtime/hexagon/quantize.h b/mace/core/runtime/hexagon/quantize.h index 216e0c6b5fecf766b03ecfe2bfa57a951f0d671e..8e98f3beddf801dfa9ec0b38a547d93761f6ec7f 100644 --- a/mace/core/runtime/hexagon/quantize.h +++ b/mace/core/runtime/hexagon/quantize.h @@ -2,8 +2,8 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#ifndef MACE_DSP_UTIL_QUANTIZE_H_ -#define MACE_DSP_UTIL_QUANTIZE_H_ +#ifndef MACE_CORE_RUNTIME_HEXAGON_QUANTIZE_H_ +#define MACE_CORE_RUNTIME_HEXAGON_QUANTIZE_H_ #include "mace/core/tensor.h" @@ -40,6 +40,6 @@ class Quantizer { DISABLE_COPY_AND_ASSIGN(Quantizer); }; -} // mace +} // namespace mace -#endif // MACE_DSP_UTIL_QUANTIZE_H_ +#endif // MACE_CORE_RUNTIME_HEXAGON_QUANTIZE_H_ diff --git a/mace/core/tensor.h b/mace/core/tensor.h index 66c9c19571b24f4a6d85422ee1e73bfc52c7cd39..1d7d5debf9dfefaeab59205d6de67d29867d2c35 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -5,6 +5,10 @@ #ifndef MACE_CORE_TENSOR_H_ #define MACE_CORE_TENSOR_H_ +#include +#include +#include + #include "mace/core/buffer.h" #include "mace/core/preallocated_pooled_allocator.h" #include "mace/core/runtime/opencl/cl2_header.h" @@ -60,7 +64,7 @@ inline std::ostream &operator<<(std::ostream &os, signed char c) { inline std::ostream &operator<<(std::ostream &os, unsigned char c) { return os << static_cast(c); } -} +} // namespace numerical_chars class Tensor { public: @@ -69,7 +73,7 @@ class Tensor { dtype_(type), buffer_(nullptr), is_buffer_owner_(true), - name_(""){}; + name_("") {} Tensor(BufferBase *buffer, DataType dtype) : dtype_(dtype), @@ -240,7 +244,7 @@ class Tensor { inline void SetSourceOpName(const std::string name) { name_ = name; } inline void DebugPrint() const { - using namespace numerical_chars; + using namespace numerical_chars; // NOLINT(build/namespaces) std::stringstream os; for (index_t i : shape_) { os << i << ", "; @@ -262,7 +266,7 @@ class Tensor { class MappingGuard { public: - MappingGuard(const Tensor *tensor) : tensor_(tensor) { + explicit MappingGuard(const Tensor *tensor) : tensor_(tensor) { if (tensor_ != nullptr) { tensor_->buffer_->Map(&mapped_image_pitch_); } @@ -301,6 +305,6 @@ class Tensor { DISABLE_COPY_AND_ASSIGN(Tensor); }; -} // namespace tensor +} // namespace mace #endif // MACE_CORE_TENSOR_H_ diff --git a/mace/core/testing/test_benchmark.cc b/mace/core/testing/test_benchmark.cc index 7dcf2a272ee342e0903b54da2f870b59d4ab8110..4a894a1d4ff39d421bc05ff1630759f4bf1bb4d7 100644 --- a/mace/core/testing/test_benchmark.cc +++ b/mace/core/testing/test_benchmark.cc @@ -6,7 +6,7 @@ #include #include -#include +#include // NOLINT(build/c++11) #include #include "mace/core/testing/test_benchmark.h" diff --git a/mace/core/testing/test_benchmark_main.cc b/mace/core/testing/test_benchmark_main.cc index 91302d7832f983f9801ccc129060176885d9fdda..76b7d15f8f25191d71c0052e5572a631eb7b6782 100644 --- a/mace/core/testing/test_benchmark_main.cc +++ b/mace/core/testing/test_benchmark_main.cc @@ -14,7 +14,6 @@ int main(int argc, char **argv) { mace::ConfigOpenCLRuntime(mace::GPUType::ADRENO, mace::GPUPerfHint::PERF_HIGH, mace::GPUPriorityHint::PRIORITY_HIGH); - // TODO Use gflags if (argc == 2) { mace::testing::Benchmark::Run(argv[1]); } else { diff --git a/mace/core/types.h b/mace/core/types.h index 5eb7b536a5784df4160bed080f48feaa30efb4cc..e7a078f625fbaf869cdfbae50dcaf0be7b3b9054 100644 --- a/mace/core/types.h +++ b/mace/core/types.h @@ -6,6 +6,7 @@ #define MACE_CORE_TYPES_H_ #include +#include #include "mace/public/mace.h" #include "include/half.hpp" diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index 2cb5e237ab7c81e72c01df0f4850a9d3c5583389..1aabb5de70177036c054e4a2f675a0c2abfa42cb 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -4,6 +4,7 @@ #include #include +#include #include "mace/core/arg_helper.h" #include "mace/core/workspace.h" @@ -52,16 +53,16 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { unsigned char *model_data_ptr = nullptr; for (auto &const_tensor : net_def.tensors()) { if (model_data_ptr == nullptr || - reinterpret_cast(const_tensor.data()) < - reinterpret_cast(model_data_ptr)) { + reinterpret_cast(const_tensor.data()) < + reinterpret_cast(model_data_ptr)) { model_data_ptr = const_cast(const_tensor.data()); } } for (auto &const_tensor : net_def.tensors()) { model_data_size = std::max( model_data_size, - static_cast((reinterpret_cast(const_tensor.data()) - - reinterpret_cast(model_data_ptr)) + + static_cast((reinterpret_cast(const_tensor.data()) - + reinterpret_cast(model_data_ptr)) + const_tensor.data_size() * GetEnumTypeSize(const_tensor.data_type()))); } @@ -89,7 +90,8 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { dims.push_back(d); } - index_t offset = (long long)const_tensor.data() - (long long)model_data_ptr; + index_t offset = reinterpret_cast(const_tensor.data()) + - reinterpret_cast(model_data_ptr); std::unique_ptr tensor( new Tensor(BufferSlice(tensor_buffer_.get(), offset, const_tensor.data_size() * @@ -116,7 +118,7 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) { // As DSP may have different data output type for each op, // we stick to the same concept. for (auto &op : net_def.op()) { - if (! op.mem_id().empty()){ + if (!op.mem_id().empty()) { const DataType op_dtype = static_cast( ArgumentHelper::GetSingleArgument( op, "T", static_cast(DT_FLOAT))); @@ -142,11 +144,14 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) { std::unique_ptr tensor (new Tensor(preallocated_allocator_.GetBuffer(mem_ids[i]), dtype)); tensor->SetSourceOpName(op.name()); - VLOG(3) << "Tensor: " << op.name() << "(" << op.type() << ")" << "; Mem: " - << mem_ids[i] << "; Image shape: " - << dynamic_cast(tensor->UnderlyingBuffer())->image_shape()[0] + VLOG(3) << "Tensor: " << op.name() << "(" << op.type() << ")" + << " Mem: " << mem_ids[i] + << " Image shape: " + << dynamic_cast(tensor->UnderlyingBuffer()) + ->image_shape()[0] << ", " - << dynamic_cast(tensor->UnderlyingBuffer())->image_shape()[1]; + << dynamic_cast(tensor->UnderlyingBuffer()) + ->image_shape()[1]; tensor_map_[op.output(i)] = std::move(tensor); } } diff --git a/mace/core/workspace.h b/mace/core/workspace.h index 5e990d8210321d829f18551bd4fbe63c70e750b3..1e1012672c30d388fe34ff645b50ed36a292c16b 100644 --- a/mace/core/workspace.h +++ b/mace/core/workspace.h @@ -5,6 +5,11 @@ #ifndef MACE_CORE_WORKSPACE_H_ #define MACE_CORE_WORKSPACE_H_ +#include +#include +#include +#include + #include "mace/core/preallocated_pooled_allocator.h" #include "mace/core/tensor.h" #include "mace/public/mace.h" diff --git a/mace/python/tools/memory_optimizer.py b/mace/python/tools/memory_optimizer.py index 2e5716976b5a8cafdd22dceee0785b88a199bc11..8753b2700157416f9f078a8ee6a14b64c8fec718 100644 --- a/mace/python/tools/memory_optimizer.py +++ b/mace/python/tools/memory_optimizer.py @@ -43,6 +43,9 @@ class MemoryOptimizer(object): mem_size[1] = output_shape[0] * output_shape[1] return mem_size + def mem_area(self, memory_size): + return memory_size[0] * memory_size[1] + def optimize(self): for op in self.net_def.op: if self.is_buffer_image_op(op): @@ -54,22 +57,34 @@ class MemoryOptimizer(object): print('WARNING: the number of output shape is not equal to the number of output.') return for i in range(len(op.output)): - if len(self.idle_mem) == 0: - # allocate new mem + op_mem_size = self.get_mem_size(op.type, op.output_shape[i].dims) + mem_id = -1 + if len(self.idle_mem) > 0: + best_mem_candidate_id = -1 + best_mem_candidate_delta_area = sys.maxint + best_mem_candidate_shape = [] + for mid in self.idle_mem: + reuse_mem_size = self.mem_block[mid] + resize_mem_size = [max(reuse_mem_size[0], op_mem_size[0]), max(reuse_mem_size[1], op_mem_size[1])] + delta_mem_area = self.mem_area(resize_mem_size) - self.mem_area(reuse_mem_size) + if delta_mem_area < best_mem_candidate_delta_area: + best_mem_candidate_id = mid + best_mem_candidate_delta_area = delta_mem_area + best_mem_candidate_shape = resize_mem_size + + if best_mem_candidate_delta_area <= self.mem_area(op_mem_size): + # reuse + self.mem_block[best_mem_candidate_id] = best_mem_candidate_shape + mem_id = best_mem_candidate_id + self.idle_mem.remove(mem_id) + + if mem_id == -1: mem_id = self.total_mem_count self.total_mem_count += 1 - else: - # reuse mem - mem_id = self.idle_mem.pop() + self.mem_block[mem_id] = op_mem_size op.mem_id.extend([mem_id]) self.op_mem[op.output[i]] = mem_id - if mem_id not in self.mem_block: - self.mem_block[mem_id] = [0, 0] - mem_size = self.mem_block[mem_id] - op_mem_size = self.get_mem_size(op.type, op.output_shape[i].dims) - mem_size[0] = max(mem_size[0], op_mem_size[0]) - mem_size[1] = max(mem_size[1], op_mem_size[1]) # de-ref input tensor mem for ipt in op.input: diff --git a/tools/benchmark.sh b/tools/benchmark.sh index e050f5b42ebe0ceb036e8699f7fa0dde69578eb1..dbcc862c431a188b8bd55cd4fe93ba0703a9f296 100644 --- a/tools/benchmark.sh +++ b/tools/benchmark.sh @@ -29,7 +29,8 @@ if [ "$EMBED_MODEL_DATA" = 0 ]; then fi if [ x"$TARGET_ABI" == x"host" ]; then - bazel build --verbose_failures -c opt --strip always //mace/benchmark:benchmark_model \ + bazel build --verbose_failures -c opt --strip always \ + //mace/benchmark:benchmark_model \ --copt="-std=c++11" \ --copt="-D_GLIBCXX_USE_C99_MATH_TR1" \ --copt="-Werror=return-type" \ @@ -52,16 +53,18 @@ if [ x"$TARGET_ABI" == x"host" ]; then $OPTION_ARGS || exit 1 else - bazel build --verbose_failures -c opt --strip always //mace/benchmark:benchmark_model \ + bazel build --verbose_failures -c opt --strip always \ + //mace/benchmark:benchmark_model \ --crosstool_top=//external:android/crosstool \ --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ --cpu=${TARGET_ABI} \ --copt="-std=c++11" \ --copt="-D_GLIBCXX_USE_C99_MATH_TR1" \ --copt="-Werror=return-type" \ + --copt="-DMACE_OBFUSCATE_LITERALS" \ --copt="-DMACE_MODEL_TAG=${MODEL_TAG}" \ - --copt="-O3" \ --define openmp=true \ + --copt="-O3" \ --define production=true || exit 1 cp bazel-bin/mace/benchmark/benchmark_model $MODEL_OUTPUT_DIR @@ -70,11 +73,14 @@ else IFS=',' read -r -a INPUT_NAMES <<< "${INPUT_NODES}" for NAME in "${INPUT_NAMES[@]}";do FORMATTED_NAME=$(sed s/[^[:alnum:]]/_/g <<< ${NAME}) - adb -s $DEVICE_ID push ${MODEL_OUTPUT_DIR}/${INPUT_FILE_NAME}_${FORMATTED_NAME} ${PHONE_DATA_DIR} || exit 1 + adb -s $DEVICE_ID push ${MODEL_OUTPUT_DIR}/${INPUT_FILE_NAME}_${FORMATTED_NAME} \ + ${PHONE_DATA_DIR} > /dev/null || exit 1 done - adb -s $DEVICE_ID push ${MODEL_OUTPUT_DIR}/benchmark_model ${PHONE_DATA_DIR} || exit 1 + adb -s $DEVICE_ID push ${MODEL_OUTPUT_DIR}/benchmark_model \ + ${PHONE_DATA_DIR} > /dev/null || exit 1 if [ "$EMBED_MODEL_DATA" = 0 ]; then - adb -s $DEVICE_ID push ${MODEL_OUTPUT_DIR}/${MODEL_TAG}.data ${PHONE_DATA_DIR} || exit 1 + adb -s $DEVICE_ID push ${MODEL_OUTPUT_DIR}/${MODEL_TAG}.data + ${PHONE_DATA_DIR} > /dev/null || exit 1 fi adb -s $DEVICE_ID