diff --git a/CMakeLists.txt b/CMakeLists.txt index 2532ecf24367c0efd8cc6bda90209e77008a4a54..4ccf73763c08a748b53027d7f4a0f254774a1843 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,8 +1,8 @@ -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 3.6) project(paddle-mobile) option(DEBUGING "enable debug mode" ON) -option(USE_OPENMP "openmp support" ON) +option(USE_OPENMP "openmp support" OFF) option(USE_EXCEPTION "use std exception" ON) option(LOG_PROFILE "log profile" ON) # select the platform to build @@ -15,7 +15,7 @@ file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h) include_directories(src/) if(IS_IOS) - set(CMAKE_CXX_FLAGS "-fobjc-abi-version=2 -fobjc-arc -std=gnu++11 -stdlib=libc++ -O3 -s -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "-mfpu=neon -marm -fobjc-abi-version=2 -fobjc-arc -std=gnu++11 -stdlib=libc++ -O3 -s -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CXX_FLAGS}") else() set(CMAKE_CXX_FLAGS "-std=c++14 -O3 -s ${CMAKE_CXX_FLAGS}") endif() @@ -43,7 +43,7 @@ if (LOG_PROFILE) add_definitions(-DPADDLE_MOBILE_PROFILE) endif() -if(USE_OPENMP) +if(USE_OPENMP AND NOT IS_IOS) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") add_definitions(-DPADDLE_MOBILE_USE_OPENMP) endif() @@ -104,12 +104,21 @@ else() foreach(f ${_tmp_list_h}) list(REMOVE_ITEM PADDLE_MOBILE_H ${f}) endforeach() -endif() + file(GLOB_RECURSE _tmp_list src/fpga/*.cpp src/fpga/*.cc) + foreach(f ${_tmp_list}) + list(REMOVE_ITEM PADDLE_MOBILE_CC ${f}) + endforeach() + + file(GLOB_RECURSE _tmp_list_h src/fpga/*.h) + foreach(f ${_tmp_list_h}) + list(REMOVE_ITEM PADDLE_MOBILE_H ${f}) + endforeach() +endif() + if (ANDROID_NDK_TOOLCHAIN_INCLUDED) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -llog") - add_definitions(-DARMV7) else() list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/paddle_mobile_jni.h) list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/paddle_mobile_jni.cpp) @@ -130,8 +139,8 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY build) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build) # NET default -set(NET "defult" CACHE STRING "select net type") -set_property(CACHE NET PROPERTY STRINGS "defult" "googlenet" "mobilenet" "yolo" "squeezenet") +set(NET "default" CACHE STRING "select net type") +set_property(CACHE NET PROPERTY STRINGS "default" "googlenet" "mobilenet" "yolo" "squeezenet" "FPGAnets") include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake") @@ -153,3 +162,4 @@ if(DEBUGING) endif() endif() + diff --git a/README.md b/README.md index 69362734116fd8af78442a07dd31600aa46b7935..59ef597dd749ea16658977cd6d548cedaa90d166 100644 --- a/README.md +++ b/README.md @@ -27,10 +27,10 @@ Paddle-Moible是PaddlePaddle组织下的项目,是一个致力于嵌入式平 - **ARM CPU** -![](http://7xop3k.com1.z0.glb.clouddn.com/15312108766575.jpg) +![](http://mms-graph.bj.bcebos.com/paddle-mobile%2F2018_07_29.png) arm cpu是paddle-mobile的主要支持方向,cpu的通用性一直是其优势。嵌入式深度学习,需要大量的cpu汇编实现。我们正在紧锣密鼓的编码,为的是能充分硬件的每一点加速能力。 - arm cpu的优化工作还在进行中,现在使用了常规的cpu优化。在arm a73上paddle-mobile arm-v7现在单核运行一次mobilenet1.0是120+ms,显然这不是我们的最终目标,我们正在用大量的汇编改写,后续性能仍会有巨大提升空间, 目前只支持armv7, 未来我们也会支持armv8。 + arm cpu的优化工作还在进行中,现在使用了常规的cpu优化。在arm a73上paddle-mobile arm-v7现在单核运行一次mobilenet1.0是110+ms,显然这不是我们的最终目标,我们正在用大量的汇编改写,后续性能仍会有巨大提升空间, 目前只支持armv7, 未来我们也会支持armv8。 - **Mali GPU** diff --git a/doc/quantification.md b/doc/quantification.md new file mode 100644 index 0000000000000000000000000000000000000000..04a93116a08c094ef71861cec1bb3262304c4cb7 --- /dev/null +++ b/doc/quantification.md @@ -0,0 +1,39 @@ +# Quantification 模型量化、反量化 + +## 背景故事 +部分网络如AlexNet训练出的模型体积较大,不适宜在移动设备上使用。 + + +## 解决模型过大办法 +1. 选用适合移动端的模型结构如:mobilenet、googlenet、 yolo、squeezenet 等; +2. 使用我们提供的量化工具,可以在几乎不影响精度的情况下将float32模型减小至原模型的 1/4; + +- - - - - +## 量化工具介绍 + +### 模型转化工具目录: + +- [量化工具目录](https://github.com/PaddlePaddle/paddle-mobile/tree/develop/tools/quantification) + +- [模型转化工具](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/tools/quantification/convert.cpp) + +#### 使用说明 +- [工具使用](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/tools/quantification/README.md) + +## 如何读取量化后的模型 +load方法中添加了 quantification 参数,默认为false。 如果需要load量化后的模型,按需传参即可。 + +[我是源代码](https://github.com/PaddlePaddle/paddle-mobile/blob/55302b33ea3bd68c9797d8f65e527544792b8095/src/io/paddle_mobile.h) + +```c++ +bool Load(const std::string &dirname, bool optimize = false, + bool quantification = false, int batch_size = 1); +``` + +- - - - - + + + + + + diff --git a/src/common/types.cpp b/src/common/types.cpp index 9bc594c7533b980626d8d07e89fc3ccf649a127f..2f366eb9e5a10ea11e3153e6e32b18204c6dd9cd 100644 --- a/src/common/types.cpp +++ b/src/common/types.cpp @@ -17,38 +17,46 @@ limitations under the License. */ namespace paddle_mobile { -const std::string G_OP_TYPE_CONV = "conv2d"; -const std::string G_OP_TYPE_BATCHNORM = "batch_norm"; -const std::string G_OP_TYPE_BOX_CODER = "box_coder"; -const std::string G_OP_TYPE_CONCAT = "concat"; -const std::string G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add"; -const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU = "fusion_conv_add_relu"; -const std::string G_OP_TYPE_FUSION_CONV_ADD_BN_RELU = "fusion_conv_add_bn_relu"; -const std::string G_OP_TYPE_FUSION_DWCONV_BN_RELU = "fusion_dwconv_bn_relu"; - -const std::string G_OP_TYPE_FC = "fusion_fc"; -const std::string G_OP_TYPE_FUSION_CONV_ADD = "fusion_conv_add"; -const std::string G_OP_TYPE_LRN = "lrn"; -const std::string G_OP_TYPE_MUL = "mul"; -const std::string G_OP_TYPE_MULTICLASS_NMS = "multiclass_nms"; -const std::string G_OP_TYPE_POOL2D = "pool2d"; -const std::string G_OP_TYPE_PRIOR_BOX = "prior_box"; -const std::string G_OP_TYPE_RELU = "relu"; -const std::string G_OP_TYPE_RESHAPE = "reshape"; -const std::string G_OP_TYPE_SIGMOID = "sigmoid"; -const std::string G_OP_TYPE_SOFTMAX = "softmax"; -const std::string G_OP_TYPE_TRANSPOSE = "transpose"; -const std::string G_OP_TYPE_SPLIT = "split"; -const std::string G_OP_TYPE_FEED = "feed"; -const std::string G_OP_TYPE_FETCH = "fetch"; -const std::string G_OP_TYPE_DEPTHWISE_CONV = "depthwise_conv2d"; -const std::string G_OP_TYPE_IM2SEQUENCE = "im2sequence"; -const std::string G_OP_TYPE_DROPOUT = "dropout"; +const char *G_OP_TYPE_CONV = "conv2d"; +const char *G_OP_TYPE_BATCHNORM = "batch_norm"; +const char *G_OP_TYPE_BOX_CODER = "box_coder"; +const char *G_OP_TYPE_CONCAT = "concat"; +const char *G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add"; +const char *G_OP_TYPE_FUSION_CONV_ADD_RELU = "fusion_conv_add_relu"; +const char *G_OP_TYPE_FUSION_CONV_ADD_BN_RELU = "fusion_conv_add_bn_relu"; +const char *G_OP_TYPE_FUSION_DWCONV_BN_RELU = "fusion_dwconv_bn_relu"; +const char *G_OP_TYPE_FUSION_CONV_BN_RELU = "fusion_conv_bn_relu"; +const char *G_OP_TYPE_FC = "fusion_fc"; +const char *G_OP_TYPE_FUSION_CONV_ADD = "fusion_conv_add"; +const char *G_OP_TYPE_LRN = "lrn"; +const char *G_OP_TYPE_MUL = "mul"; +const char *G_OP_TYPE_MULTICLASS_NMS = "multiclass_nms"; +const char *G_OP_TYPE_POOL2D = "pool2d"; +const char *G_OP_TYPE_PRIOR_BOX = "prior_box"; +const char *G_OP_TYPE_RELU = "relu"; +const char *G_OP_TYPE_RESHAPE = "reshape"; +const char *G_OP_TYPE_SIGMOID = "sigmoid"; +const char *G_OP_TYPE_SOFTMAX = "softmax"; +const char *G_OP_TYPE_TRANSPOSE = "transpose"; +const char *G_OP_TYPE_SPLIT = "split"; +const char *G_OP_TYPE_FEED = "feed"; +const char *G_OP_TYPE_FETCH = "fetch"; +const char *G_OP_TYPE_DEPTHWISE_CONV = "depthwise_conv2d"; +const char *G_OP_TYPE_IM2SEQUENCE = "im2sequence"; +const char *G_OP_TYPE_DROPOUT = "dropout"; +const char *G_OP_TYPE_FUSION_CONV_ADD_BN = "fusion_conv_add_bn"; +const char *G_OP_TYPE_FUSION_POOL_BN = "fusion_pool_bn"; +const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU = + "fusion_elementwise_add_relu"; +const char *G_OP_TYPE_FUSION_FC_RELU = "fusion_fc_relu"; +const char *G_OP_TYPE_REGION = "region"; std::unordered_map< std::string, std::pair, std::vector>> op_input_output_key = { {G_OP_TYPE_CONV, {{"Input"}, {"Output"}}}, + {G_OP_TYPE_FUSION_DWCONV_BN_RELU, {{"Input"}, {"Out"}}}, + {G_OP_TYPE_FUSION_CONV_BN_RELU, {{"Input"}, {"Out"}}}, {G_OP_TYPE_FUSION_CONV_ADD, {{"Input"}, {"Out"}}}, {G_OP_TYPE_RELU, {{"X"}, {"Out"}}}, {G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}}, @@ -72,6 +80,11 @@ std::unordered_map< {G_OP_TYPE_DEPTHWISE_CONV, {{"Input"}, {"Output"}}}, {G_OP_TYPE_FUSION_CONV_ADD_RELU, {{"Input"}, {"Out"}}}, {G_OP_TYPE_IM2SEQUENCE, {{"X"}, {"Out"}}}, - {G_OP_TYPE_DROPOUT, {{"X"}, {"Out"}}}}; + {G_OP_TYPE_DROPOUT, {{"X"}, {"Out"}}}, + {G_OP_TYPE_FUSION_CONV_ADD_BN, {{"Input"}, {"Out"}}}, + {G_OP_TYPE_FUSION_POOL_BN, {{"X"}, {"Out"}}}, + {G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU, {{"X", "Y"}, {"Out"}}}, + {G_OP_TYPE_FUSION_FC_RELU, {{"X", "Y", "Z"}, {"Out"}}}, + {G_OP_TYPE_REGION, {{"X"}, {"Out"}}}}; } // namespace paddle_mobile diff --git a/src/common/types.h b/src/common/types.h index 1daf9c9b7bccfc8bcb584e5a37f920539736a911..7745f80a9ca2ef6f0258f6f2eacf45761d29a00e 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include +#include #include namespace paddle_mobile { @@ -72,33 +73,40 @@ enum PMStatus { PMWrongDevice = 0x08 /*!< un-correct device. */ }; -extern const std::string G_OP_TYPE_CONV; -extern const std::string G_OP_TYPE_BATCHNORM; -extern const std::string G_OP_TYPE_BOX_CODER; -extern const std::string G_OP_TYPE_CONCAT; -extern const std::string G_OP_TYPE_ELEMENTWISE_ADD; -extern const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU; -extern const std::string G_OP_TYPE_FC; -extern const std::string G_OP_TYPE_FUSION_CONV_ADD; -extern const std::string G_OP_TYPE_FUSION_CONV_ADD_BN_RELU; -extern const std::string G_OP_TYPE_FUSION_DWCONV_BN_RELU; - -extern const std::string G_OP_TYPE_LRN; -extern const std::string G_OP_TYPE_MUL; -extern const std::string G_OP_TYPE_MULTICLASS_NMS; -extern const std::string G_OP_TYPE_POOL2D; -extern const std::string G_OP_TYPE_PRIOR_BOX; -extern const std::string G_OP_TYPE_RELU; -extern const std::string G_OP_TYPE_RESHAPE; -extern const std::string G_OP_TYPE_SIGMOID; -extern const std::string G_OP_TYPE_SOFTMAX; -extern const std::string G_OP_TYPE_TRANSPOSE; -extern const std::string G_OP_TYPE_SPLIT; -extern const std::string G_OP_TYPE_FEED; -extern const std::string G_OP_TYPE_FETCH; -extern const std::string G_OP_TYPE_DEPTHWISE_CONV; -extern const std::string G_OP_TYPE_IM2SEQUENCE; -extern const std::string G_OP_TYPE_DROPOUT; +extern const char *G_OP_TYPE_CONV; +extern const char *G_OP_TYPE_BATCHNORM; +extern const char *G_OP_TYPE_BOX_CODER; +extern const char *G_OP_TYPE_CONCAT; +extern const char *G_OP_TYPE_ELEMENTWISE_ADD; +extern const char *G_OP_TYPE_FUSION_CONV_ADD_RELU; +extern const char *G_OP_TYPE_FC; +extern const char *G_OP_TYPE_FUSION_CONV_ADD; +extern const char *G_OP_TYPE_FUSION_CONV_ADD_BN_RELU; +extern const char *G_OP_TYPE_FUSION_DWCONV_BN_RELU; +extern const char *G_OP_TYPE_FUSION_CONV_BN_RELU; + +extern const char *G_OP_TYPE_LRN; +extern const char *G_OP_TYPE_MUL; +extern const char *G_OP_TYPE_MULTICLASS_NMS; +extern const char *G_OP_TYPE_POOL2D; +extern const char *G_OP_TYPE_PRIOR_BOX; +extern const char *G_OP_TYPE_RELU; +extern const char *G_OP_TYPE_RESHAPE; +extern const char *G_OP_TYPE_SIGMOID; +extern const char *G_OP_TYPE_SOFTMAX; +extern const char *G_OP_TYPE_TRANSPOSE; +extern const char *G_OP_TYPE_SPLIT; +extern const char *G_OP_TYPE_FEED; +extern const char *G_OP_TYPE_FETCH; +extern const char *G_OP_TYPE_DEPTHWISE_CONV; +extern const char *G_OP_TYPE_IM2SEQUENCE; +extern const char *G_OP_TYPE_DROPOUT; + +extern const char *G_OP_TYPE_FUSION_CONV_ADD_BN; +extern const char *G_OP_TYPE_FUSION_POOL_BN; +extern const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU; +extern const char *G_OP_TYPE_FUSION_FC_RELU; +extern const char *G_OP_TYPE_REGION; extern std::unordered_map< std::string, std::pair, std::vector>> diff --git a/src/common/variant.h b/src/common/variant.h index 9d0aa3019fbfdd5acbaed8a1140bc58c33f7f438..00b8eb985d8f7fc22bb93a3e229aa387c358e257 100644 --- a/src/common/variant.h +++ b/src/common/variant.h @@ -84,7 +84,7 @@ struct Variant { if (type_id == typeid(T).hash_code()) { return *const_cast(reinterpret_cast(&data)); } else { - PADDLE_MOBILE_THROW_EXCEPTION(" bad cast in variant "); + PADDLE_MOBILE_THROW_EXCEPTION(" bad cast in variant"); exit(0); } } diff --git a/src/fpga/api/fpga_api.cpp b/src/fpga/api/fpga_api.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a913d6e39cddda97b347c0675717c265dfa89d18 --- /dev/null +++ b/src/fpga/api/fpga_api.cpp @@ -0,0 +1,68 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fpga/api/fpga_api.h" + +namespace paddle { +namespace mobile { +namespace fpga { +namespace api { + +static int fd = -1; +static const char *device_path = "/dev/fpgadrv0"; + +static inline int do_ioctl(int req, void *arg) { return ioctl(req, arg); } + +int open_device() { + if (fd == -1) { + fd = open(device_path, O_RDWR); + } + return fd; +} + +// memory management; +void *fpga_malloc(size_t size) { + return reinterpret_cast<(void *)> mmap64(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); +} + +void fpga_free(void *ptr) { munmap(ptr, 0); } + +void fpga_copy(void *dest, const void *src, size_t num) { + memcpy(dest, src, num); +} + +int ComputeFpgaConv(struct FpgaConvArgs) {} +int ComputeFpgaPool(struct FpgaPoolArgs) {} +int ComputeFpgaEWAdd(struct FpgaEWAddArgs) {} + +} // namespace api +} // namespace fpga +} // namespace mobile +} // namespace paddle diff --git a/src/fpga/api/fpga_api.h b/src/fpga/api/fpga_api.h new file mode 100644 index 0000000000000000000000000000000000000000..2dfc285af4506c055f6780d7b3d393433c0904a8 --- /dev/null +++ b/src/fpga/api/fpga_api.h @@ -0,0 +1,88 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include + +// memory management; + +namespace paddle { +namespace mobile { +namespace fpga { +namespace api { + +int open_device(); +int close_device(); + +void *fpga_malloc(size_t size); +void fpga_free(void *ptr); +void fpga_copy(void *dst, const void *src, size_t num); + +struct FpgaVersionArgs { + void *buf; +}; + +struct MemoryToPhysicalArgs { + const void *src; + uint64_t physical; +}; + +struct MemoryCopyArgs { + void *src; + void *dst; + size_t size; +}; + +struct FpgaQuantArgs { + float scale; +}; + +struct FpgaBNArgs {}; + +struct FpgaConvArgs { + bool enable_BN = false; + bool enable_Relu = false; + struct FpgaBNParam bn_parm; +}; + +struct FpgaPoolArgs { + bool enable_BN = false; + struct FpgaBNParam bn_parm; +}; + +struct FpgaEWAddArgs { // only support X + Y + bool enable_Relu = false; +}; + +int ComputeFpgaConv(struct FpgaConvArgs); +int ComputeFpgaPool(struct FpgaPoolArgs); +int ComputeFpgaEWAdd(struct FpgaEWAddArgs); + +#define IOCTL_FPGA_MAGIC 'FPGA' +#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs) +#define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs) +#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaArgs) +#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs) +#define IOCTL_MEM_TOPHY _IOW(IOCTL_FPGA_MAGIC, 12, struct MemoryToPhysicalArgs) +#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs) +#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs) +#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs) + +} // namespace api +} // namespace fpga +} // namespace mobile +} // namespace paddle diff --git a/src/framework/operator.cpp b/src/framework/operator.cpp index 36b4663cb603d29bb60cfc297899d1c300e8ca91..765103c241a82ac224d707340f8b66ace827e335 100644 --- a/src/framework/operator.cpp +++ b/src/framework/operator.cpp @@ -28,6 +28,16 @@ vector OperatorBase::GetOutKeys() const { return it->second.second; } +template +vector OperatorBase::GetInputKeys() const { + auto it = op_input_output_key.find(type_); + if (it == op_input_output_key.end()) { + DLOG << type_ << " has no outputs"; + return {}; + } + return it->second.first; +} + template OperatorBase::OperatorBase(const std::string &type, const VariableNameMap &inputs, @@ -49,6 +59,11 @@ template void OperatorBase::Run() const { RunImpl(); #ifdef PADDLE_MOBILE_DEBUG + vector input_keys = GetInputKeys(); + for (const auto key : input_keys) { + Tensor *input = GetVarValue(key, inputs_, *scope_); + DLOG << type_ << " input- " << key << "=" << *input; + } vector output_keys = GetOutKeys(); for (const auto key : output_keys) { Tensor *out_ = GetVarValue(key, outputs_, *scope_); diff --git a/src/framework/operator.h b/src/framework/operator.h index 793551b0cd3eea290243c156c27616a34c37a3d2..084ac3c81185fe489fe1ca67589c1e8edb1d4fdf 100644 --- a/src/framework/operator.h +++ b/src/framework/operator.h @@ -61,6 +61,7 @@ class OperatorBase { virtual ~OperatorBase() {} void Run() const; std::vector GetOutKeys() const; + std::vector GetInputKeys() const; virtual void RunImpl() const = 0; virtual void Init() = 0; @@ -118,6 +119,10 @@ class OperatorWithKernel : public OperatorBase { virtual void InferShape() const = 0; void Init() { + // for (auto i : this->inputs_) { + // DLOG << i.first; + // DLOG << i.second; + // } PADDLE_MOBILE_ENFORCE(kernel_.Init(¶m_), " %s kernel init failed", this->type_.c_str()); } @@ -146,7 +151,7 @@ class OpKernelBase { } #endif virtual void Compute(const P ¶) const = 0; - virtual bool Init(P *para) { return true; }; + virtual bool Init(P *para) { return true; } virtual ~OpKernelBase() = default; private: diff --git a/src/framework/program/program-optimize/fusion_op_register.h b/src/framework/program/program-optimize/fusion_op_register.h index 1cd6b1dd779f9bc9ff0f5be5513c4fa716d80b10..f16a65c28fb47e1cf4139588742ebe1073c3f3e6 100644 --- a/src/framework/program/program-optimize/fusion_op_register.h +++ b/src/framework/program/program-optimize/fusion_op_register.h @@ -42,8 +42,17 @@ class FusionOpRegister { matchers_[matcher->Type()] = shared_matcher; } - const std::map> Matchers() { - return matchers_; + const std::vector> Matchers() { + std::vector> matchers; + for (const auto& match : matchers_) { + matchers.push_back(match.second); + } + std::sort(matchers.begin(), matchers.end(), + [](std::shared_ptr first, + std::shared_ptr second) { + return first->BeginNode().Depth() > second->BeginNode().Depth(); + }); + return matchers; } private: diff --git a/src/framework/program/program-optimize/node.cpp b/src/framework/program/program-optimize/node.cpp index e635e07eaf4484c3e390101c3b43fdaf24bbd2c6..a4e1db506da362df4fb61b39827d5e77ebc425eb 100644 --- a/src/framework/program/program-optimize/node.cpp +++ b/src/framework/program/program-optimize/node.cpp @@ -44,23 +44,6 @@ bool Node::operator==(const Node &in) { return true; } -std::vector> Node::OpDescs(int size) { - std::vector> op_descs; - OpDescs(size - 1, &op_descs); - return op_descs; -} - -void Node::OpDescs(int index, - std::vector> *op_desc) { - if (index == 0) { - return; - } - op_desc->push_back(this->op_desc_); - for (auto &output : outputs_) { - output->OpDescs(index, op_desc); - } -} - std::shared_ptr Node::To(int size) { std::shared_ptr node = std::make_shared(); this->To(size - 1, node); diff --git a/src/framework/program/program-optimize/node.h b/src/framework/program/program-optimize/node.h index 88bf1e16ed2a5fb3a038eadd546d63ffb3916f68..7eb179c243c28fe2668c3cf2f8f28f81312c0988 100644 --- a/src/framework/program/program-optimize/node.h +++ b/src/framework/program/program-optimize/node.h @@ -47,13 +47,10 @@ class Node { std::map>> change, std::vector> *removed_nodes); - std::vector> OpDescs(int size); std::shared_ptr OpDescOfNode() { return op_desc_; } std::string Type() { return type_; } private: - void OpDescs(int size, - std::vector> *op_desc); void To(int index, std::shared_ptr); void Folder( std::shared_ptr op_desc, diff --git a/src/framework/program/program-optimize/program_optimize.cpp b/src/framework/program/program-optimize/program_optimize.cpp index 3619bc79f576651245aa322992df9d318c810cd4..82d33bc65d864e010fbe41b270b71ed98a21b33e 100644 --- a/src/framework/program/program-optimize/program_optimize.cpp +++ b/src/framework/program/program-optimize/program_optimize.cpp @@ -78,9 +78,8 @@ std::shared_ptr ProgramOptimize::FusionOptimize( } for (auto ®isted : FusionOpRegister::Instance()->Matchers()) { - std::string fusion_type = registed.first; - std::shared_ptr matcher = registed.second; - // DLOG << " registed node \n " << matcher->BeginNode(); + std::string fusion_type = registed->Type(); + std::shared_ptr matcher = registed; auto match_vector = type_map[matcher->BeginType()]; diff --git a/src/framework/program/program.h b/src/framework/program/program.h index 5760efc826667d805695118b12e41efa0305553b..e500d500344d83204bf388401541259b90ea2f78 100644 --- a/src/framework/program/program.h +++ b/src/framework/program/program.h @@ -30,6 +30,7 @@ class Program { std::string model_path; std::string para_path; bool combined = false; + bool quantification = false; private: }; diff --git a/src/io/executor.cpp b/src/io/executor.cpp index 480f48290cc1bbf4888832d76187a13a4915ec40..65f019d1e3c3f6f6bdb8a18a9ff99bb7ecb2012c 100644 --- a/src/io/executor.cpp +++ b/src/io/executor.cpp @@ -154,7 +154,7 @@ void Executor::LoadMemory(const framework::VarDesc var_desc, tensor->Resize(framework::make_ddim(desc.Dims())); - void *memory = tensor; + void *memory = nullptr; int type_size = 0; switch (desc.DataType()) { case framework::VARTYPE_TYPE_FP16: @@ -179,11 +179,25 @@ void Executor::LoadMemory(const framework::VarDesc var_desc, default: break; } - - for (int n = 0; n < memory_size * type_size; ++n) { - static_cast(memory)[n] = (*data)[n]; + if (program_.quantification) { + float min_value; + float max_value; + + memcpy(&min_value, *data, sizeof(float)); + memcpy(&max_value, *data + sizeof(float), sizeof(float)); + *data += 2 * sizeof(float); + const float factor = (max_value - min_value) / 255.0; + uint8_t *uint8_data = (uint8_t *)(*data); + for (int k = 0; k < memory_size; ++k) { + static_cast(memory)[k] = uint8_data[k] * factor + min_value; + } + *data += (memory_size * sizeof(uint8_t)); + } else { + for (int n = 0; n < memory_size * type_size; ++n) { + static_cast(memory)[n] = (*data)[n]; + } + (*data) += (sizeof(char) * memory_size * type_size); } - (*data) += (sizeof(char) * memory_size * type_size); } template diff --git a/src/io/loader.cpp b/src/io/loader.cpp index 51e007a6ab4bce415628649a40f711903bceee92..9ed877d05d51dfbe7139ea2289fdb6480c62f88f 100644 --- a/src/io/loader.cpp +++ b/src/io/loader.cpp @@ -44,26 +44,29 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) { template const framework::Program Loader::Load( - const std::string &dirname, bool optimize, bool can_add_split) { - auto program = - this->LoadProgram(dirname + "/__model__", optimize, can_add_split); + const std::string &dirname, bool optimize, bool quantification, + bool can_add_split) { + auto program = this->LoadProgram(dirname + "/__model__", optimize, + quantification, can_add_split); program.model_path = dirname; return program; } template const framework::Program Loader::Load( - const std::string &model_path, const std::string ¶_path, - bool optimize) { + const std::string &model_path, const std::string ¶_path, bool optimize, + bool quantification) { auto program = this->LoadProgram(model_path, optimize); program.para_path = para_path; program.combined = true; + program.quantification = quantification; return program; } template const framework::Program Loader::LoadProgram( - const std::string &model_path, bool optimize, bool can_add_split) { + const std::string &model_path, bool optimize, bool quantification, + bool can_add_split) { std::string model_filename = model_path; PaddleMobile__Framework__Proto__ProgramDesc *c_program; uint8_t *buf = NULL; @@ -82,6 +85,7 @@ const framework::Program Loader::LoadProgram( framework::Program program; program.originProgram = originProgramDesc; + program.quantification = quantification; auto scope = std::make_shared(); program.scope = scope; diff --git a/src/io/loader.h b/src/io/loader.h index 5e3c53dc9db858f506a13d2105339038340344a6..512cee831f0a09f8223c07c531eb9d1c74e75d92 100644 --- a/src/io/loader.h +++ b/src/io/loader.h @@ -30,6 +30,7 @@ class Loader { * */ const framework::Program Load(const std::string &dirname, bool optimize = false, + bool quantification = false, bool can_add_split = false); /* @@ -38,11 +39,13 @@ class Loader { * */ const framework::Program Load(const std::string &model_path, const std::string ¶_path, - bool optimize = false); + bool optimize = false, + bool quantification = false); private: const framework::Program LoadProgram(const std::string &model_path, bool optimize = false, + bool quantification = false, bool can_add_split = false); }; diff --git a/src/io/paddle_mobile.cpp b/src/io/paddle_mobile.cpp index cabdd799a0e7d561d8bc56c0913f1389c38f8907..5e2e209d64aa7a00b56a5bdbbff88cb3097b7b94 100644 --- a/src/io/paddle_mobile.cpp +++ b/src/io/paddle_mobile.cpp @@ -26,7 +26,7 @@ void PaddleMobile::SetThreadNum(int num) { template bool PaddleMobile::Load(const std::string &dirname, bool optimize, - int batch_size) { + bool quantification, int batch_size) { if (loader_.get() == nullptr) { loader_ = std::make_shared>(); } else { @@ -35,7 +35,7 @@ bool PaddleMobile::Load(const std::string &dirname, bool optimize, if (executor_.get() == nullptr) { executor_ = std::make_shared>( - loader_->Load(dirname, optimize), batch_size, optimize); + loader_->Load(dirname, optimize, quantification), batch_size, optimize); } else { LOG(kLOG_INFO) << "executor inited"; } @@ -46,7 +46,7 @@ bool PaddleMobile::Load(const std::string &dirname, bool optimize, template bool PaddleMobile::Load(const std::string &model_path, const std::string ¶_path, bool optimize, - int batch_size) { + bool quantification, int batch_size) { if (loader_.get() == nullptr) { loader_ = std::make_shared>(); } else { @@ -55,7 +55,8 @@ bool PaddleMobile::Load(const std::string &model_path, if (executor_.get() == nullptr) { executor_ = std::make_shared>( - loader_->Load(model_path, para_path, optimize), batch_size, optimize); + loader_->Load(model_path, para_path, optimize, quantification), + batch_size, optimize); } else { LOG(kLOG_INFO) << "executor inited"; } diff --git a/src/io/paddle_mobile.h b/src/io/paddle_mobile.h index 74c11471566c3db8a37ea2d62e0496e5d40cb3b7..5dc3ccb21dd7e67fbe9b5032d01046b12728dc64 100644 --- a/src/io/paddle_mobile.h +++ b/src/io/paddle_mobile.h @@ -39,14 +39,18 @@ class PaddleMobile { * @b 加载分开形式的 fluid 模型 * */ bool Load(const std::string &dirname, bool optimize = false, - int batch_size = 1); + bool quantification = false, int batch_size = 1); /* * @b load combine format fluid mode * @b 加载结合在一起格式的模型 * */ bool Load(const std::string &model_path, const std::string ¶_path, - bool optimize = false, int batch_size = 1); + bool optimize = false, bool quantification = false, + int batch_size = 1); + /* + * @b 设置线程数, 当 cmake 中开启 openmp 时生效 + * */ void SetThreadNum(int num); /* diff --git a/src/memory/t_malloc.cpp b/src/memory/t_malloc.cpp index 0252f3c07c06487720586b0f650e2179d247234f..178541953323b6ffd1a3339f8209c2839b37a784 100644 --- a/src/memory/t_malloc.cpp +++ b/src/memory/t_malloc.cpp @@ -16,10 +16,32 @@ limitations under the License. */ #include #include +#ifdef PADDLE_MOBILE_FPGA + +#include "fpga/api/fpga_api.h" + +#endif + namespace paddle_mobile { namespace memory { const int MALLOC_ALIGN = 64; +#ifdef PADDLE_MOBILE_FPGA +namespace api = paddle::mobile::fpga::api; + +void Copy(void *dst, const void *src, size_t num) { + std::memcpy(dst, src, num); +} + +void *Alloc(size_t size) { return api::malloc(size); } + +void Free(void *ptr) { + if (ptr) { + api::fpga_free(ptr); + } +} + +#else void Copy(void *dst, const void *src, size_t num) { std::memcpy(dst, src, num); } @@ -42,5 +64,7 @@ void Free(void *ptr) { } } +#endif + } // namespace memory } // namespace paddle_mobile diff --git a/src/operators/batchnorm_op.cpp b/src/operators/batchnorm_op.cpp index 644a27c586375bc66d327e18ac5182e8fce2893b..f820908404ea637d9680c32d5c4b5568e191dd7e 100644 --- a/src/operators/batchnorm_op.cpp +++ b/src/operators/batchnorm_op.cpp @@ -26,7 +26,7 @@ void BatchNormOp::InferShape() const { auto x_dims = this->param_.InputX()->dims(); this->param_.OutputY()->Resize(x_dims); } -template class BatchNormOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/box_coder_op.cpp b/src/operators/box_coder_op.cpp index dece07d5efcfae9629842aead04d0274b9d82c93..9e57c9021dac1b6857752989727c1c86051e33f7 100644 --- a/src/operators/box_coder_op.cpp +++ b/src/operators/box_coder_op.cpp @@ -47,7 +47,7 @@ void BoxCoderOp::InferShape() const { this->param_.OutputBox()->Resize(framework::make_ddim( {input_targetbox_dims[0], input_priorbox_dims[0], 4})); } -template class BoxCoderOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/concat_op.cpp b/src/operators/concat_op.cpp index 9c524df351549fd0141294be805d77b3f1057362..19d771ddd5884412624a0720368ecc80f92678ea 100644 --- a/src/operators/concat_op.cpp +++ b/src/operators/concat_op.cpp @@ -56,7 +56,6 @@ void ConcatOp::InferShape() const { this->param_.Out()->Resize(out_dims); } -template class ConcatOp; } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/conv_op.cpp b/src/operators/conv_op.cpp index 1b00ed06eee2b1676667b9c54b8601c8872b6699..c4601995219b32db75f22c7c2ed959e18af85f36 100644 --- a/src/operators/conv_op.cpp +++ b/src/operators/conv_op.cpp @@ -48,8 +48,6 @@ void ConvOp::InferShape() const { this->param_.Output()->Resize(ddim); } -template class ConvOp; - } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/depthwise_conv_op.cpp b/src/operators/depthwise_conv_op.cpp index bee90781cd2de9d65bbbee3193cc922e743706de..8d6b6a143c37537be6de1e60cc095f1052136e26 100644 --- a/src/operators/depthwise_conv_op.cpp +++ b/src/operators/depthwise_conv_op.cpp @@ -49,8 +49,6 @@ void DepthwiseConvOp::InferShape() const { this->param_.Output()->Resize(ddim); } -template class DepthwiseConvOp; - } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/dropout_op.cpp b/src/operators/dropout_op.cpp index f7f5ca2475171f5756ee8cf4f13754d07df8fe01..a632aa0c52b19c591467f94afb216245a596680b 100644 --- a/src/operators/dropout_op.cpp +++ b/src/operators/dropout_op.cpp @@ -22,7 +22,7 @@ void DropoutOp::InferShape() const { auto input_dims = this->param_.InputX()->dims(); this->param_.Out()->Resize(input_dims); } -template class DropoutOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/elementwise_add_op.cpp b/src/operators/elementwise_add_op.cpp index 369589574139c7bc68debb7c55836926a3d5f6b2..49885f783417d61c6348fc4563e7306036994f17 100644 --- a/src/operators/elementwise_add_op.cpp +++ b/src/operators/elementwise_add_op.cpp @@ -24,7 +24,7 @@ void ElementwiseAddOp::InferShape() const { auto x_dim = this->param_.InputX()->dims(); this->param_.Out()->Resize(x_dim); } -template class ElementwiseAddOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/feed_op.cpp b/src/operators/feed_op.cpp index c4357d7993cd91a306fec5856eaa6839e9ab6a6e..4447f2c699fc929805f15a265440803e6ff34b56 100644 --- a/src/operators/feed_op.cpp +++ b/src/operators/feed_op.cpp @@ -14,10 +14,7 @@ limitations under the License. */ #include "feed_op.h" namespace paddle_mobile { -namespace operators { - -template class FeedOp; -} +namespace operators {} } // namespace paddle_mobile namespace ops = paddle_mobile::operators; diff --git a/src/operators/fetch_op.cpp b/src/operators/fetch_op.cpp index cdbe413c955b931a16e716aa2e18d2a018a53bab..adbd61d5ec364a40b565059ceb5d5d49999c8436 100644 --- a/src/operators/fetch_op.cpp +++ b/src/operators/fetch_op.cpp @@ -14,10 +14,7 @@ limitations under the License. */ #include "fetch_op.h" namespace paddle_mobile { -namespace operators { - -template class FetchOp; -} +namespace operators {} } // namespace paddle_mobile namespace ops = paddle_mobile::operators; diff --git a/src/operators/fusion_conv_add.cpp b/src/operators/fusion_conv_add.cpp index b1dba23be0d8ea010b38844b1897381fbf578617..cdd6a6db2bb11ebf8dce2aca85630aa8805adf3e 100644 --- a/src/operators/fusion_conv_add.cpp +++ b/src/operators/fusion_conv_add.cpp @@ -45,7 +45,6 @@ void FusionConvAddOp::InferShape() const { this->param_.Output()->Resize(ddim); } -template class FusionConvAddOp; } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/fusion_conv_add.h b/src/operators/fusion_conv_add.h index d23c99e5cd5e9cc50dc77672d9aaaf54dfdc545c..170df9ce33e4ab90297664fbc81d723e7c246f83 100644 --- a/src/operators/fusion_conv_add.h +++ b/src/operators/fusion_conv_add.h @@ -36,8 +36,6 @@ class FusionConvAddMatcher : public framework::FusionOpMatcher { void FolderNodes( framework::Node *node, std::vector> *removed_nodes) { - vector> origin_descs = - node->OpDescs(node_.Depth()); node->Folder(node_.Depth(), Type(), {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}}}}, removed_nodes); } @@ -68,11 +66,11 @@ class FusionConvAddOp : public framework::OperatorWithKernel< #ifdef PADDLE_MOBILE_CPU -//#ifndef CONV_ADD_REGISTER -// static framework::FusionOpRegistrar convadd_registrar( -// new FusionConvAddMatcher()); -//#define CONV_ADD_REGISTER -//#endif +#ifndef CONV_ADD_REGISTER +static framework::FusionOpRegistrar convadd_registrar( + new FusionConvAddMatcher()); +#define CONV_ADD_REGISTER +#endif #endif diff --git a/src/operators/fusion_conv_add_bn_relu_op.cpp b/src/operators/fusion_conv_add_bn_relu_op.cpp index 62839c1a5acaf89a3efef39bbe4a67c675da393b..16f4650a64ec0c363d5fa94ee27c15c73cf58a70 100644 --- a/src/operators/fusion_conv_add_bn_relu_op.cpp +++ b/src/operators/fusion_conv_add_bn_relu_op.cpp @@ -44,7 +44,7 @@ void FusionConvAddBNReluOp::InferShape() const { framework::DDim ddim = framework::make_ddim(output_shape); this->param_.Output()->Resize(ddim); } -template class FusionConvAddBNReluOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/fusion_conv_add_bn_relu_op.h b/src/operators/fusion_conv_add_bn_relu_op.h index 389c76cc83a532fe706d911903a8412bb8bfb4ca..19e33465c06921e9a6a7beb77053f05a03a6c760 100644 --- a/src/operators/fusion_conv_add_bn_relu_op.h +++ b/src/operators/fusion_conv_add_bn_relu_op.h @@ -39,8 +39,6 @@ class FusionConvAddBNReluMatcher : public framework::FusionOpMatcher { void FolderNodes( framework::Node *node, std::vector> *removed_nodes) { - vector> origin_descs = - node->OpDescs(node_.Depth()); node->Folder(node_.Depth(), Type(), {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}}}, {G_OP_TYPE_BATCHNORM, diff --git a/src/operators/fusion_conv_add_relu_op.cpp b/src/operators/fusion_conv_add_relu_op.cpp index 5575b52ce9866901a13c630a7509c7e5ec5401cb..18618886cccba08c7502b3e1d75fbba9b6916f56 100644 --- a/src/operators/fusion_conv_add_relu_op.cpp +++ b/src/operators/fusion_conv_add_relu_op.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef CONVADDRELU_OP +#ifdef FUSION_CONVADDRELU_OP #include "fusion_conv_add_relu_op.h" #include "operators/math/conv_func.h" diff --git a/src/operators/fusion_conv_add_relu_op.h b/src/operators/fusion_conv_add_relu_op.h index cf68fac8cf6dad4eb8469a543656311e5cedc9e7..50a4a2c7c64526c9a5dc1057829ed14f09357780 100644 --- a/src/operators/fusion_conv_add_relu_op.h +++ b/src/operators/fusion_conv_add_relu_op.h @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef CONVADDRELU_OP +#ifdef FUSION_CONVADDRELU_OP #pragma once diff --git a/src/operators/fusion_conv_bn_relu_op.cpp b/src/operators/fusion_conv_bn_relu_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..49fe9c933a5a9695f2c18bd0921c2d36063dc065 --- /dev/null +++ b/src/operators/fusion_conv_bn_relu_op.cpp @@ -0,0 +1,60 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_CONVBNRELU_OP + +#include "operators/fusion_conv_bn_relu_op.h" +#include "operators/math/conv_func.h" + +namespace paddle_mobile { +namespace operators { + +template +void FusionConvBNReluOp::InferShape() const { + auto in_dims = this->param_.Input()->dims(); + auto filter_dims = this->param_.Filter()->dims(); + const std::vector &strides = this->param_.Strides(); + std::vector paddings = this->param_.Paddings(); + int groups = this->param_.Groups(); + std::vector dilations = this->param_.Dilations(); + + PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() && + dilations.size() == paddings.size() && + paddings.size() == strides.size()), + "ConvParam is not suitable"); + + std::vector output_shape({in_dims[0], filter_dims[0]}); + for (size_t i = 0; i < strides.size(); ++i) { + output_shape.push_back( + math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i], + paddings[i], strides[i])); + } + + framework::DDim ddim = framework::make_ddim(output_shape); + this->param_.Output()->Resize(ddim); +} + +} // namespace operators +} // namespace paddle_mobile + +namespace ops = paddle_mobile::operators; +#ifdef PADDLE_MOBILE_CPU +REGISTER_OPERATOR_CPU(fusion_conv_bn_relu, ops::FusionConvBNReluOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif + +#endif diff --git a/src/operators/fusion_conv_bn_relu_op.h b/src/operators/fusion_conv_bn_relu_op.h new file mode 100644 index 0000000000000000000000000000000000000000..4c2c1033ac0a4d6c8e3bc3f188a66884dd9e0642 --- /dev/null +++ b/src/operators/fusion_conv_bn_relu_op.h @@ -0,0 +1,103 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_CONVBNRELU_OP + +#pragma once + +#include +#include +#include "framework/operator.h" +#include "framework/program/program-optimize/fusion_op_register.h" +#include "operators/kernel/conv_bn_relu_kernel.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { +using std::string; +using std::vector; +class FusionConvBNReluMatcher : public framework::FusionOpMatcher { + public: + FusionConvBNReluMatcher() { + node_ = framework::Node(G_OP_TYPE_CONV); + node_ > std::make_shared(G_OP_TYPE_BATCHNORM) > + std::make_shared(G_OP_TYPE_RELU); + } + + void FolderNodes( + framework::Node *node, + std::vector> *removed_nodes) { + node->Folder(node_.Depth(), Type(), + {{G_OP_TYPE_BATCHNORM, + {{"Scale", "Scale"}, + {"Mean", "Mean"}, + {"Bias", "Bias"}, + {"Variance", "Variance"}}}}, + removed_nodes); + } + + std::string Type() { return G_OP_TYPE_FUSION_CONV_BN_RELU; } +}; + +template +class FusionConvBNReluOp : public framework::OperatorWithKernel< + DeviceType, FusionConvBNReluParam, + operators::ConvBNReluKernel> { + public: + FusionConvBNReluOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, + const framework::AttributeMap &attrs, + std::shared_ptr scope) + : framework::OperatorWithKernel< + DeviceType, FusionConvBNReluParam, + operators::ConvBNReluKernel>(type, inputs, outputs, + attrs, scope) {} + + using framework::OperatorWithKernel< + DeviceType, FusionConvBNReluParam, + operators::ConvBNReluKernel>::OperatorWithKernel; + void InferShape() const override; + + protected: +}; + +#ifdef PADDLE_MOBILE_CPU + +#ifndef FUSION_CONV_BN_RELU_REGISTER +static framework::FusionOpRegistrar fusion_conv_bn_relu_registrar( + new FusionConvBNReluMatcher()); +#define FUSION_CONV_BN_RELU_REGISTER +#endif + +#endif + +#ifdef PADDLE_MOBILE_MALI_GPU + +#endif + +#ifdef PADDLE_MOBILE_FPGA +#endif + +} // namespace operators +} // namespace paddle_mobile + +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(fusion_conv_bn_relu); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif + +#endif diff --git a/src/operators/fusion_dwconv_bn_relu_op.cpp b/src/operators/fusion_dwconv_bn_relu_op.cpp index ba03a436c37cc8f1dcba94036fd6a3fbbd8fcaf3..e55295830e19b5b39a5ae2501e30170ffb1a7854 100644 --- a/src/operators/fusion_dwconv_bn_relu_op.cpp +++ b/src/operators/fusion_dwconv_bn_relu_op.cpp @@ -44,7 +44,7 @@ void FusionDWConvBNReluOp::InferShape() const { framework::DDim ddim = framework::make_ddim(output_shape); this->param_.Output()->Resize(ddim); } -template class FusionDWConvBNReluOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/fusion_dwconv_bn_relu_op.h b/src/operators/fusion_dwconv_bn_relu_op.h index bf95b51da43b8e9c0cec102876d48828b3749575..6f9f03e4936e082de802ced385060fecb9cc27a9 100644 --- a/src/operators/fusion_dwconv_bn_relu_op.h +++ b/src/operators/fusion_dwconv_bn_relu_op.h @@ -38,8 +38,6 @@ class FusionDWConvBNReluMatcher : public framework::FusionOpMatcher { void FolderNodes( framework::Node *node, std::vector> *removed_nodes) { - vector> origin_descs = - node->OpDescs(node_.Depth()); node->Folder(node_.Depth(), Type(), {{G_OP_TYPE_BATCHNORM, {{"Scale", "Scale"}, diff --git a/src/operators/fusion_fc_op.cpp b/src/operators/fusion_fc_op.cpp index 57a8b1b53f2f98b3218ee8fc40c6c9774ec5a5c7..d564d4d88c16ee09382a9b2dae275807ec4bdb4b 100644 --- a/src/operators/fusion_fc_op.cpp +++ b/src/operators/fusion_fc_op.cpp @@ -50,7 +50,6 @@ void FusionFcOp::InferShape() const { this->param_.Out()->Resize(ddim); } -template class FusionFcOp; } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/im2sequence_op.cpp b/src/operators/im2sequence_op.cpp index 273ce462d0aa423a6bf023186c6a579e975dfb11..3c929af9cf0a8a1550f197ffdb42ee590cd43235 100644 --- a/src/operators/im2sequence_op.cpp +++ b/src/operators/im2sequence_op.cpp @@ -47,8 +47,6 @@ void Im2SequenceOp::InferShape() const { this->param_.Output()->Resize(ddim); } -template class Im2SequenceOp; - } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/arm/conv_add_relu_kernel.cpp b/src/operators/kernel/arm/conv_add_relu_kernel.cpp index 356dd191e761afc5d5b6bfacd250f90ae31017b2..8414b7374dd0ed2b10784563dbac9c1565d66f4c 100644 --- a/src/operators/kernel/arm/conv_add_relu_kernel.cpp +++ b/src/operators/kernel/arm/conv_add_relu_kernel.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef FUSION_CONVADD_RELU_OP +#ifdef FUSION_CONVADDRELU_OP #include "operators/kernel/conv_add_relu_kernel.h" #include "operators/kernel/central-arm-func/conv_add_relu_arm_func.h" diff --git a/src/operators/kernel/arm/conv_bn_relu_kernel.cpp b/src/operators/kernel/arm/conv_bn_relu_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..23f06c1f0b8a0ed3f22ca9d23d24ae44c59f3618 --- /dev/null +++ b/src/operators/kernel/arm/conv_bn_relu_kernel.cpp @@ -0,0 +1,68 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_CONVBNRELU_OP + +#include "operators/kernel/conv_bn_relu_kernel.h" +#include "operators/kernel/central-arm-func/conv_bn_relu_arm_func.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { + const Tensor *mean = param->InputMean(); + const Tensor *variance = param->InputVariance(); + const Tensor *scale = param->InputScale(); + const Tensor *bias = param->InputBias(); + const float epsilon = param->Epsilon(); + + // DLOG << "variance: " << *variance; + + auto mean_ptr = mean->data(); + auto variance_ptr = variance->data(); + auto scale_ptr = scale->data(); + auto bias_ptr = bias->data(); + + const int C = mean->numel(); + float inv_std_ptr[C]; + for (int i = 0; i < C; i++) { + inv_std_ptr[i] = + 1 / static_cast(pow((variance_ptr[i] + epsilon), 0.5)); + } + Tensor *new_scale = new Tensor(); + Tensor *new_bias = new Tensor(); + auto new_scale_ptr = new_scale->mutable_data({C}); + auto new_bias_ptr = new_bias->mutable_data({C}); + for (int i = 0; i < C; i++) { + new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i]; + new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i]; + } + + param->SetNewScale(new_scale); + param->SetNewBias(new_bias); + return true; +} + +template <> +void ConvBNReluKernel::Compute( + const FusionConvBNReluParam ¶m) const { + ConvBNReluCompute(param); +} +template class ConvBNReluKernel; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/central-arm-func/batchnorm_arm_func.h b/src/operators/kernel/central-arm-func/batchnorm_arm_func.h index b2af17eb4aaf0a7ef98442f589162a3b6f371a3b..cc591035065e4cbbe71ff8f6bd6cbab9c6fe9e79 100644 --- a/src/operators/kernel/central-arm-func/batchnorm_arm_func.h +++ b/src/operators/kernel/central-arm-func/batchnorm_arm_func.h @@ -54,7 +54,40 @@ void BatchnormCompute(const BatchNormParam ¶m) { int HXW = H * W; -#ifdef ARMV7 +#if __ARM_NEON +#if __aarch64__ + float *inv_std_ptr = new float[C]; + for (int i = 0; i < C; i++) { + inv_std_ptr[i] = + 1 / static_cast(pow((variance_ptr[i] + epsilon), 0.5)); + } + + Tensor new_scale; + auto new_scale_ptr = new_scale.mutable_data(framework::make_ddim({C})); + Tensor new_bias; + auto new_bias_ptr = new_bias.mutable_data(framework::make_ddim({C})); + + /// ((x - est_mean) * (inv_var) * scale + bias equal to + /// (x * inv_var * scale) + (bias - est_mean * inv_var * scale) + for (int i = 0; i < C; i++) { + new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i]; + new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i]; + { + for (int n = 0; n < N; n++) { + for (int h = 0; h < H; h++) { + int tmp_index = n * stride0 + i * stride1 + h * stride2; + for (int w = 0; w < W; w++) { + int index = tmp_index + w; + out_ptr[index] = + input_x_ptr[index] * new_scale_ptr[i] + new_bias_ptr[i]; + } + } + } + } + } + delete[] inv_std_ptr; +#else + if (HXW > 32) { int NXC = N * C; float *inv_std_ptr = new float[NXC * 4]; @@ -229,6 +262,7 @@ void BatchnormCompute(const BatchNormParam ¶m) { delete[] inv_std_ptr; } +#endif #else float *inv_std_ptr = new float[C]; for (int i = 0; i < C; i++) { diff --git a/src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h b/src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h index 6aadbab95c591d4286fdbb3c3f01a291cdd90429..e8929e3e94073d384d24f63b5aa73e51e353fa26 100644 --- a/src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h +++ b/src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef FUSION_CONVADD_RELU_OP +#ifdef FUSION_CONVADDRELU_OP #pragma once #include diff --git a/src/operators/kernel/central-arm-func/conv_bn_relu_arm_func.h b/src/operators/kernel/central-arm-func/conv_bn_relu_arm_func.h new file mode 100644 index 0000000000000000000000000000000000000000..f18d67749b96cd0ee2d84c2731af8a2c3e136db1 --- /dev/null +++ b/src/operators/kernel/central-arm-func/conv_bn_relu_arm_func.h @@ -0,0 +1,139 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_CONVBNRELU_OP + +#pragma once +#include +#include "operators/math/depthwise_conv_3x3.h" +#include "operators/op_param.h" +namespace paddle_mobile { +namespace operators { +void ConvBNReluBasic(const FusionConvBNReluParam ¶m) { + const Tensor *input = param.Input(); + Tensor filter = *param.Filter(); + Tensor new_bias = *param.NewBias(); + Tensor new_scale = *param.NewScale(); + + Tensor *output = param.Output(); + + int groups = param.Groups(); + std::vector strides = param.Strides(); + std::vector paddings = param.Paddings(); + std::vector dilations = param.Dilations(); + + const int batch_size = static_cast(input->dims()[0]); + + std::vector filter_shape_vec(framework::vectorize(filter.dims())); + + std::vector output_shape_vec(framework::vectorize(output->dims())); + size_t data_dim = filter_shape_vec.size() - 2; + std::vector col_shape_vec(1 + 2 * data_dim); + col_shape_vec[0] = input->dims()[1] / groups; + for (size_t j = 0; j < data_dim; ++j) { + col_shape_vec[j + 1] = filter_shape_vec[j + 2]; + col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; + } + framework::DDim col_shape(framework::make_ddim(col_shape_vec)); + + framework::DDim col_matrix_shape = + framework::flatten_to_2d(col_shape, data_dim + 1); + + bool is_expand = + math::IsExpand(filter_shape_vec, strides, paddings, dilations); + Tensor col; + Tensor col_matrix; + if (is_expand) { + col.mutable_data(col_shape); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } + + framework::DDim input_shape = framework::slice_ddim( + input->dims(), 1, static_cast(input->dims().size())); + + framework::DDim filter_matrix_shape = {filter.dims()[0], + filter.numel() / filter.dims()[0]}; + filter.Resize(filter_matrix_shape); + framework::DDim output_matrix_shape = { + output->dims()[1], + output->numel() / (output->dims()[0] * output->dims()[1])}; + + // convolution operator: im2col(or vol2col) + gemm + int in_step = static_cast(input->dims()[1]) / groups; + int out_step = static_cast(output->dims()[1]) / groups; + + math::Vol2ColFunctor vol2col; + math::Im2ColFunctor im2col; + + for (int i = 0; i < batch_size; i++) { + Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); + Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); + + for (int g = 0; g < groups; g++) { + Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); + + if (!is_expand) { + col.ShareDataWith(in_slice); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } else if (data_dim == 2U) { + // im2col + im2col(in_slice, dilations, strides, + std::vector{paddings[0], paddings[1], paddings[0], + paddings[1]}, + &col); + } else if (data_dim == 3U) { + // vol2col + vol2col(in_slice, dilations, strides, paddings, &col); + } + // gemm + Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); + Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); + + math::matmulWithBn( + filter_slice, false, col_matrix, false, static_cast(1), + &out_slice, static_cast(0), true, &new_scale, &new_bias, g); + } + } +} + +template +void ConvBNReluCompute(const FusionConvBNReluParam ¶m) { + if (param.Groups() == param.Input()->dims()[1] && + param.Input()->dims()[1] == param.Output()->dims()[1] && + param.Filter()->dims()[2] == param.Filter()->dims()[3] && + param.Filter()->dims()[2] == 3 && param.Strides()[0] == 1) { + math::DepthwiseConvAddBNRelu3x3s1p1(param.Input(), param.Filter(), + param.Output(), param.NewScale(), + param.NewBias(), true); + } else if (param.Groups() == param.Input()->dims()[1] && + param.Input()->dims()[1] == param.Output()->dims()[1] && + param.Filter()->dims()[2] == param.Filter()->dims()[3] && + param.Filter()->dims()[2] == 3 && param.Strides()[0] == 2) { + // math::DepthwiseConvAddBNRelu3x3s2p1(param.Input(), param.Filter(), + // param.Output(), param.NewScale(), + // param.NewBias(), 1); + math::DepthwiseConvAddBNRelu3x3s2p1v2(param.Input(), param.Filter(), + param.Output(), param.NewScale(), + param.NewBias(), true); + } else { + ConvBNReluBasic(param); + } +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/central-arm-func/pool_arm_func.h b/src/operators/kernel/central-arm-func/pool_arm_func.h index 892dca2ea40d40484b4c32a57f8633849cc9d038..6179df5b0c11ad2a2e19384989029696e9d6c266 100644 --- a/src/operators/kernel/central-arm-func/pool_arm_func.h +++ b/src/operators/kernel/central-arm-func/pool_arm_func.h @@ -76,15 +76,20 @@ void PoolCompute(const PoolParam ¶m) { } } else if (ksize[0] == 2 && ksize[0] == ksize[1]) { -#ifndef IOS +#if __ARM_NEON +#if __aarch64__ + PoolBasic(pooling_type, ksize, strides, paddings, in_x, out); +#else if (pooling_type == "max") { math::Pool2x2Max(strides, paddings, in_x, out); } else if (pooling_type == "avg") { math::Pool2x2Avg(strides, paddings, in_x, out); } +#endif #else PoolBasic(pooling_type, ksize, strides, paddings, in_x, out); -#endif +#endif // __ARM_NEON + } else { PoolBasic(pooling_type, ksize, strides, paddings, in_x, out); } diff --git a/src/operators/kernel/central-arm-func/sigmoid_arm_func.h b/src/operators/kernel/central-arm-func/sigmoid_arm_func.h index daf6ad0e472515c8034a400dfc73de608f5b12d2..c612c4b092143ef8925f81a6d6fefe9cd9dff25b 100644 --- a/src/operators/kernel/central-arm-func/sigmoid_arm_func.h +++ b/src/operators/kernel/central-arm-func/sigmoid_arm_func.h @@ -68,6 +68,7 @@ void sigmoid(const Tensor *X, Tensor *Y) { input_outer_ptr++; } } +#else #endif } diff --git a/src/operators/kernel/conv_add_relu_kernel.h b/src/operators/kernel/conv_add_relu_kernel.h index 3f36d80c4781aebea756b04e340d056a79cfd7d7..931313273d150fa1ad159e7069fbc3812d6e6657 100644 --- a/src/operators/kernel/conv_add_relu_kernel.h +++ b/src/operators/kernel/conv_add_relu_kernel.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#ifdef FUSION_CONVADD_RELU_OP +#ifdef FUSION_CONVADDRELU_OP #include #include "framework/ddim.h" diff --git a/src/operators/kernel/conv_bn_relu_kernel.h b/src/operators/kernel/conv_bn_relu_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..c9d4df5d8f597deebaf2b53491851b7ce03fc7aa --- /dev/null +++ b/src/operators/kernel/conv_bn_relu_kernel.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifdef FUSION_CONVBNRELU_OP + +#include +#include "framework/ddim.h" +#include "framework/operator.h" +#include "operators/math/conv_func.h" +#include "operators/math/im2col.h" +#include "operators/math/math_function.h" +#include "operators/math/vol2col.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +using framework::DDim; +using framework::OpKernelBase; + +template +class ConvBNReluKernel + : public OpKernelBase { + public: + void Compute(const FusionConvBNReluParam ¶m) const; + bool Init(FusionConvBNReluParam *param); +}; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/lrn_op.cpp b/src/operators/lrn_op.cpp index 1a5a8eccc1fc314d27517db8bc286035e573c9be..dde9123edf3568020f933bb7375be99e40f2367b 100644 --- a/src/operators/lrn_op.cpp +++ b/src/operators/lrn_op.cpp @@ -24,7 +24,7 @@ void LrnOp::InferShape() const { auto x_dims = this->param_.InputX()->dims(); this->param_.Out()->Resize(x_dims); } -template class LrnOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/math/depthwise_conv_3x3.cpp b/src/operators/math/depthwise_conv_3x3.cpp index 5db676564e190bf40e8af437ba68aee80b5a5af3..7e353c29b80279f895ad6d0150b31eb1703d97d4 100644 --- a/src/operators/math/depthwise_conv_3x3.cpp +++ b/src/operators/math/depthwise_conv_3x3.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "operators/math/depthwise_conv_3x3.h" -#ifdef __ARM_NEON +#if __ARM_NEON #include #endif #include @@ -23,7 +23,6 @@ namespace math { void DepthwiseConv3x3(const Tensor *input, vector strides, vector paddings, const Tensor *filter, Tensor *bias, Tensor *output, bool if_bias) { -#ifdef __ARM_NEON const int batch_size = input->dims()[0]; const int input_height = input->dims()[2]; @@ -181,7 +180,27 @@ void DepthwiseConv3x3(const Tensor *input, vector strides, } } else { -#if defined(ARMV17) +#if __ARM_NEON +#if __aarch64__ + const float32x4_t data1 = vld1q_f32(pos1); + const float32x4_t data2 = vld1q_f32(pos2); + const float32x4_t data3 = vld1q_f32(pos3); + + const float32x4_t v_filter1 = vld1q_f32(filter1); + const float32x4_t v_filter2 = vld1q_f32(filter2); + const float32x4_t v_filter3 = vld1q_f32(filter3); + float32x4_t mula = vmulq_f32(data1, v_filter1); + mula = vmlaq_f32(mula, data2, v_filter2); + mula = vmlaq_f32(mula, data3, v_filter3); + float32x2_t res = vpadd_f32( + vget_high_f32(vsetq_lane_f32(0, mula, 3)), vget_low_f32(mula)); + res = vpadd_f32(res, res); + if (if_bias) { + output_data[ph * output_width + pw] += vget_lane_f32(res, 0); + } else { + output_data[ph * output_width + pw] = vget_lane_f32(res, 0); + } +#else asm volatile( "vld1.32 {q1}, [%[pos1]] \n\t" @@ -209,26 +228,10 @@ void DepthwiseConv3x3(const Tensor *input, vector strides, [filter2] "r"(filter2), [filter3] "r"(filter3), [output_ptr] "r"(output_ptr), [zero] "r"(zero) : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6"); +#endif // __aarch64__ #else - const float32x4_t data1 = vld1q_f32(pos1); - const float32x4_t data2 = vld1q_f32(pos2); - const float32x4_t data3 = vld1q_f32(pos3); - const float32x4_t v_filter1 = vld1q_f32(filter1); - const float32x4_t v_filter2 = vld1q_f32(filter2); - const float32x4_t v_filter3 = vld1q_f32(filter3); - float32x4_t mula = vmulq_f32(data1, v_filter1); - mula = vmlaq_f32(mula, data2, v_filter2); - mula = vmlaq_f32(mula, data3, v_filter3); - float32x2_t res = vpadd_f32( - vget_high_f32(vsetq_lane_f32(0, mula, 3)), vget_low_f32(mula)); - res = vpadd_f32(res, res); - if (if_bias) { - output_data[ph * output_width + pw] += vget_lane_f32(res, 0); - } else { - output_data[ph * output_width + pw] = vget_lane_f32(res, 0); - } -#endif +#endif // __ARM_NEON } } } @@ -239,12 +242,11 @@ void DepthwiseConv3x3(const Tensor *input, vector strides, input_data += input_batch_stride; output_data += output_batch_stride; } -#endif } void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter, Tensor *output, Tensor *bias, bool if_bias) { -#ifdef __ARM_NEON +#if __ARM_NEON const float *input_data = input->data(); const float *filter_data = filter->data(); float *output_data = output->data(); @@ -520,7 +522,7 @@ void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter, void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter, Tensor *output, const Tensor *new_scale, const Tensor *new_bias, bool if_relu) { -#ifdef __ARM_NEON +#if __ARM_NEON const float *input_data = input->data(); const float *filter_data = filter->data(); float *output_data = output->data(); @@ -824,7 +826,7 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter, void DepthwiseConvAddBNRelu3x3s2p1(const Tensor *input, const Tensor *filter, Tensor *output, const Tensor *new_scale, const Tensor *new_bias, bool if_relu) { -#ifdef __ARM_NEON +#if __ARM_NEON const int batch_size = input->dims()[0]; @@ -1022,7 +1024,7 @@ void DepthwiseConvAddBNRelu3x3s2p1(const Tensor *input, const Tensor *filter, void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter, Tensor *output, Tensor bias, bool if_bias) { -#ifdef __ARM_NEON +#if __ARM_NEON const float *input_data = input->data(); const float *filter_data = filter->data(); float *output_data = output->data(); @@ -1225,7 +1227,7 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter, void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, Tensor *output, const Tensor *new_scale, const Tensor *new_bias, bool if_relu) { -#ifdef __ARM_NEON +#if __ARM_NEON const float *input_data = input->data(); const float *filter_data = filter->data(); float *output_data = output->data(); diff --git a/src/operators/math/gemm.cpp b/src/operators/math/gemm.cpp index bb91adcc4db412db137fdc12831bad75e069e38c..b9b61f4d1c59a0e2c8e7822742c54472ad540981 100644 --- a/src/operators/math/gemm.cpp +++ b/src/operators/math/gemm.cpp @@ -15,7 +15,7 @@ limitations under the License. */ #include "operators/math/gemm.h" #include "common/log.h" #include "memory/t_malloc.h" -#ifndef X86 +#if __ARM_NEON #include #endif #ifdef _OPENMP @@ -33,6 +33,7 @@ float *packedA; float *packedB; float *packedC; float *zero; +/* // 将A矩阵分块复制到连续内存(ColMajor) void PackMatrixA(int m, int k, int m_tail, const float *A, int lda, float *buffer) { @@ -60,9 +61,39 @@ void PackMatrixA(int m, int k, int m_tail, const float *A, int lda, } } +// 将B矩阵分块复制到连续内存(ColMajor) +void PackMatrixB(int k, int n, int n_tail, const float *B, int ldb, + float *buffer) { + int i, j; + const float *Bj, *Bj1, *Bj2, *Bj3; + for (j = 0; j < n - n_tail; j += NR) { + Bj = &B(0, j); + Bj1 = &B(0, j + 1); + Bj2 = &B(0, j + 2); + Bj3 = &B(0, j + 3); + for (i = 0; i < k; ++i) { + *buffer++ = *Bj++; + *buffer++ = *Bj1++; + *buffer++ = *Bj2++; + *buffer++ = *Bj3++; + } + } + if (n_tail != 0) { + for (i = 0; i < k; ++i) { + for (int j = n - n_tail; j < n; ++j) { + *buffer++ = B(i, j); + } + for (int j = n; j < n + (NR - n_tail); ++j) { + *buffer++ = 0; + } + } + } +} +*/ + // 将A矩阵分块复制到连续内存(RowMajor) -void PackMatrixA_(int m, int k, int m_tail, const float *A, int lda, - float *buffer) { +void PackMatrixA_4r(int m, int k, int m_tail, const float *A, int lda, + float *buffer) { const float *a0, *a1, *a2, *a3; for (int i = 0; i < m - m_tail; i += MR) { a0 = A + i * lda; @@ -100,49 +131,94 @@ void PackMatrixA_(int m, int k, int m_tail, const float *A, int lda, } } -// 将B矩阵分块复制到连续内存(ColMajor) -void PackMatrixB(int k, int n, int n_tail, const float *B, int ldb, - float *buffer) { - int i, j; - const float *Bj, *Bj1, *Bj2, *Bj3; - for (j = 0; j < n - n_tail; j += NR) { - Bj = &B(0, j); - Bj1 = &B(0, j + 1); - Bj2 = &B(0, j + 2); - Bj3 = &B(0, j + 3); - for (i = 0; i < k; ++i) { - *buffer++ = *Bj++; - *buffer++ = *Bj1++; - *buffer++ = *Bj2++; - *buffer++ = *Bj3++; +void PackMatrixA_6r(int m, int k, int m_tail, const float *A, int lda, + float *buffer) { + const float *a0, *a1, *a2, *a3, *a4, *a5; + for (int i = 0; i < m - m_tail; i += MR) { + a0 = A + i * lda; + a1 = A + (i + 1) * lda; + a2 = A + (i + 2) * lda; + a3 = A + (i + 3) * lda; + a4 = A + (i + 4) * lda; + a5 = A + (i + 5) * lda; + for (int j = 0; j < k; ++j) { + *buffer++ = *a0++; + *buffer++ = *a1++; + *buffer++ = *a2++; + *buffer++ = *a3++; + *buffer++ = *a4++; + *buffer++ = *a5++; } } - if (n_tail != 0) { - for (i = 0; i < k; ++i) { - for (int j = n - n_tail; j < n; ++j) { - *buffer++ = B(i, j); - } - for (int j = n; j < n + (NR - n_tail); ++j) { - *buffer++ = 0; - } + int i = m - m_tail; + a0 = &A(i, 0); + a1 = a0 + lda; + a2 = a0 + 2 * lda; + a3 = a0 + 3 * lda; + a4 = a0 + 4 * lda; + a5 = a0 + 5 * lda; + if (m_tail != 0) { + if (m_tail <= 5) { + a5 = zero; + } + if (m_tail <= 4) { + a4 = zero; + } + if (m_tail <= 3) { + a3 = zero; + } + if (m_tail <= 2) { + a2 = zero; + } + if (m_tail <= 1) { + a1 = zero; + } + for (int j = 0; j < k; ++j) { + *buffer++ = *a0++; + *buffer++ = *a1++; + *buffer++ = *a2++; + *buffer++ = *a3++; + *buffer++ = *a4++; + *buffer++ = *a5++; } } } // 将B矩阵分块复制到连续内存(RowMajor) -void PackMatrixB_(int k, int n, int n_tail, const float *B, int ldb, - float *buffer) { +void PackMatrixB_8c(int k, int n, int n_tail, const float *B, int ldb, + float *buffer) { const float *b0; for (int j = 0; j < n - n_tail; j += NR) { for (int i = 0; i < k; ++i) { b0 = &B(i, j); +#if __ARM_NEON +#if __aarch64__ + asm volatile( + "prfm pldl1keep, [%[b0]] \n\t" + "ld1 {v0.4s, v1.4s}, [%[b0]] \n\t" + "st1 {v0.4s, v1.4s}, [%[buffer]], #32 \n\t" + : [buffer] "+r"(buffer) + : [b0] "r"(b0) + : "memory", "v0", "v1"); +#else asm volatile( - "pld [%[b0]] \n\t" - "vld1.32 {q0, q1}, [%[b0]] \n\t" - "vst1.32 {q0, q1}, [%[buffer]]! \n\t" + "pld [%[b0]] \n\t" + "vld1.32 {q0, q1}, [%[b0]] \n\t" + "vst1.32 {q0, q1}, [%[buffer]]! \n\t" : [buffer] "+r"(buffer) : [b0] "r"(b0) - : "memory", "q0", "q0"); + : "memory", "q0", "q1"); +#endif // __aarch64__ +#else + *buffer++ = *b0++; + *buffer++ = *b0++; + *buffer++ = *b0++; + *buffer++ = *b0++; + *buffer++ = *b0++; + *buffer++ = *b0++; + *buffer++ = *b0++; + *buffer++ = *b0++; +#endif // __ARM_NEON } } if (n_tail != 0) { @@ -165,7 +241,8 @@ void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b, for (int j = 0; j < nc; j += NR) { for (int i = 0; i < mc; i += MR) { // AddDot4x4(KC, a + i * KC, b + j * KC, c + i * NC + j, NC); - AddDot4x8(KC, a + i * KC, b + j * KC, c + i * NC + j, NC); + // AddDot4x8(KC, a + i * KC, b + j * KC, c + i * NC + j, NC); + AddDot6x8(KC, a + i * KC, b + j * KC, c + i * NC + j, NC); } } @@ -195,7 +272,8 @@ void InnerKernelWithBn(int mc, int nc, float alpha, const float *a, for (int j = 0; j < nc; j += NR) { for (int i = 0; i < mc; i += MR) { // AddDot4x4(KC, a + i * KC, b + j * KC, c + i * NC + j, NC); - AddDot4x8(KC, a + i * KC, b + j * KC, c + i * NC + j, NC); + // AddDot4x8(KC, a + i * KC, b + j * KC, c + i * NC + j, NC); + AddDot6x8(KC, a + i * KC, b + j * KC, c + i * NC + j, NC); } } @@ -206,8 +284,10 @@ void InnerKernelWithBn(int mc, int nc, float alpha, const float *a, } } -#if defined(IOS) -void AddDot4x4(int k, const float *a, const float *b, float *C, int ldc) { +#if __ARM_NEON +#if __aarch64__ + +void AddDot4x4(int k, const float *a, const float *b, float *c, int ldc) { // init C float32x4_t cv0 = vdupq_n_f32(0.0); float32x4_t cv1 = vdupq_n_f32(0.0); @@ -234,30 +314,271 @@ void AddDot4x4(int k, const float *a, const float *b, float *C, int ldc) { a += MR; b += NR; } - float32x4x4_t cv = {cv0, cv1, cv2, cv3}; - int i, j; - for (i = 0; i < mc; ++i) { - for (j = 0; j < nc; ++j) { - if (beta == 0.0) { - C(i, j) = 0.0; - } else if (beta != 1.0) { - C(i, j) *= beta; + + vst1q_f32(c, cv0); + vst1q_f32(c + ldc, cv1); + vst1q_f32(c + 2 * ldc, cv2); + vst1q_f32(c + 3 * ldc, cv3); + // float32x4x4_t cv = {cv0, cv1, cv2, cv3}; +} + +void AddDot4x8(int k, const float *a, const float *b, float *c, int ldc) { + // init C + float32x4_t cv0 = vdupq_n_f32(0.0); + float32x4_t cv1 = vdupq_n_f32(0.0); + float32x4_t cv2 = vdupq_n_f32(0.0); + float32x4_t cv3 = vdupq_n_f32(0.0); + float32x4_t cv4 = vdupq_n_f32(0.0); + float32x4_t cv5 = vdupq_n_f32(0.0); + float32x4_t cv6 = vdupq_n_f32(0.0); + float32x4_t cv7 = vdupq_n_f32(0.0); + + float32x4_t av; + float32x4_t bv0; + float32x4_t bv1; + + float32x2_t av01; + float32x2_t av23; + + for (int p = 0; p < k; p += 1) { + av = vld1q_f32(a); + bv0 = vld1q_f32(b); + bv1 = vld1q_f32(b + 4); + + av01 = vget_low_f32(av); + cv0 = vmlaq_lane_f32(cv0, bv0, av01, 0); + cv1 = vmlaq_lane_f32(cv1, bv1, av01, 0); + cv2 = vmlaq_lane_f32(cv2, bv0, av01, 1); + cv3 = vmlaq_lane_f32(cv3, bv1, av01, 1); + av23 = vget_high_f32(av); + cv4 = vmlaq_lane_f32(cv4, bv0, av23, 0); + cv5 = vmlaq_lane_f32(cv5, bv1, av23, 0); + cv6 = vmlaq_lane_f32(cv6, bv0, av23, 1); + cv7 = vmlaq_lane_f32(cv7, bv1, av23, 1); + + a += MR; + b += NR; + } + + vst1q_f32(c, cv0); + vst1q_f32(c + 4, cv1); + vst1q_f32(c + ldc, cv2); + vst1q_f32(c + ldc + 4, cv3); + vst1q_f32(c + 2 * ldc, cv4); + vst1q_f32(c + 2 * ldc + 4, cv5); + vst1q_f32(c + 3 * ldc, cv6); + vst1q_f32(c + 3 * ldc + 4, cv7); +} + +// 分块矩阵乘法结果回写 +// C = A * B +void WriteBasic(int mc, int nc, float *c, float *C, int ldc) { + int nc1 = nc / 4; + int _nc1 = nc % 4; + + float *c_ptr, *C_ptr; + float32x4_t cv; + for (int i = 0; i < mc; ++i) { + c_ptr = c + i * NC; + C_ptr = C + i * ldc; + for (int j = 0; j < nc1; ++j) { + cv = vld1q_f32(c_ptr); + vst1q_f32(C_ptr, cv); + c_ptr += 4; + C_ptr += 4; + } + if (_nc1 != 0) { + cv = vld1q_f32(c_ptr); + if (_nc1 >= 1) { + vst1q_lane_f32(C_ptr, cv, 0); + C_ptr++; } - if (j == 0) { - C(i, j) += alpha * vgetq_lane_f32(cv.val[i], 0); - } else if (j == 1) { - C(i, j) += alpha * vgetq_lane_f32(cv.val[i], 1); - } else if (j == 2) { - C(i, j) += alpha * vgetq_lane_f32(cv.val[i], 2); - } else if (j == 3) { - C(i, j) += alpha * vgetq_lane_f32(cv.val[i], 3); + if (_nc1 >= 2) { + vst1q_lane_f32(C_ptr, cv, 1); + C_ptr++; + } + if (_nc1 >= 3) { + vst1q_lane_f32(C_ptr, cv, 2); } } } } -} // namespace math -#elif defined(ARMV7) +// C = alpha * A * B + beta * C +void WriteWithAlphaBeta(int mc, int nc, float *c, float *C, int ldc) {} + +// C = A * B + C +void WriteWithAdd(int mc, int nc, float *c, float *C, int ldc) { + int nc1 = nc / 4; + int _nc1 = nc % 4; + + float *c_ptr, *C_ptr; + float32x4_t cv; + float32x4_t cv1; + for (int i = 0; i < mc; ++i) { + c_ptr = c + i * NC; + C_ptr = C + i * ldc; + for (int j = 0; j < nc1; ++j) { + cv = vld1q_f32(c_ptr); + cv1 = vld1q_f32(C_ptr); + cv = vaddq_f32(cv, cv1); + vst1q_f32(C_ptr, cv); + c_ptr += 4; + C_ptr += 4; + } + if (_nc1 != 0) { + cv = vld1q_f32(c_ptr); + cv1 = vld1q_f32(C_ptr); + cv = vaddq_f32(cv, cv1); + if (_nc1 >= 1) { + vst1q_lane_f32(C_ptr, cv, 0); + C_ptr++; + } + if (_nc1 >= 2) { + vst1q_lane_f32(C_ptr, cv, 1); + C_ptr++; + } + if (_nc1 >= 3) { + vst1q_lane_f32(C_ptr, cv, 2); + } + } + } +} + +// C = A * B + C, relu(C) +void WriteWithAddRelu(int mc, int nc, float *c, float *C, int ldc) { + int nc1 = nc / 4; + int _nc1 = nc % 4; + + float *c_ptr, *C_ptr; + float32x4_t cv; + float32x4_t cv1; + float32x4_t zero = vdupq_n_f32(0.0); + for (int i = 0; i < mc; ++i) { + c_ptr = c + i * NC; + C_ptr = C + i * ldc; + for (int j = 0; j < nc1; ++j) { + cv = vld1q_f32(c_ptr); + cv1 = vld1q_f32(C_ptr); + cv = vaddq_f32(cv, cv1); + cv = vmaxq_f32(cv, zero); + vst1q_f32(C_ptr, cv); + c_ptr += 4; + C_ptr += 4; + } + if (_nc1 != 0) { + cv = vld1q_f32(c_ptr); + cv1 = vld1q_f32(C_ptr); + cv = vaddq_f32(cv, cv1); + cv = vmaxq_f32(cv, zero); + if (_nc1 >= 1) { + vst1q_lane_f32(C_ptr, cv, 0); + C_ptr++; + } + if (_nc1 >= 2) { + vst1q_lane_f32(C_ptr, cv, 1); + C_ptr++; + } + if (_nc1 >= 3) { + vst1q_lane_f32(C_ptr, cv, 2); + } + } + } +} + +// C = A * B, batchnorm(C) +void WriteWithBn(int mc, int nc, float *c, float *C, int ldc, float *new_scale, + float *new_bias) { + int nc1 = nc / 4; + int _nc1 = nc % 4; + + float *c_ptr, *C_ptr; + float32x4_t cv; + float32x4_t cv1; + float32x4_t bias; + float32x2_t scale; + for (int i = 0; i < mc; ++i) { + c_ptr = c + i * NC; + C_ptr = C + i * ldc; + bias = vld1q_dup_f32(new_bias); + scale = vld1_dup_f32(new_scale); + new_bias++; + new_scale++; + float scale0 = vget_lane_f32(scale, 0); + for (int j = 0; j < nc1; ++j) { + cv = vld1q_f32(c_ptr); + cv = vmlaq_n_f32(bias, cv, scale0); + vst1q_f32(C_ptr, cv); + c_ptr += 4; + C_ptr += 4; + } + if (_nc1 != 0) { + cv = vld1q_f32(c_ptr); + cv = vmlaq_n_f32(bias, cv, scale0); + if (_nc1 >= 1) { + vst1q_lane_f32(C_ptr, cv, 0); + C_ptr++; + } + if (_nc1 >= 2) { + vst1q_lane_f32(C_ptr, cv, 1); + C_ptr++; + } + if (_nc1 >= 3) { + vst1q_lane_f32(C_ptr, cv, 2); + C_ptr++; + } + } + } +} + +// C = A * B, batchnorm(C), relu(C) +void WriteWithBnRelu(int mc, int nc, float *c, float *C, int ldc, + float *new_scale, float *new_bias) { + int nc1 = nc / 4; + int _nc1 = nc % 4; + + float *c_ptr, *C_ptr; + float32x4_t cv; + float32x4_t bias; + float32x2_t scale; + float32x4_t zero = vdupq_n_f32(0.0); + for (int i = 0; i < mc; ++i) { + c_ptr = c + i * NC; + C_ptr = C + i * ldc; + bias = vld1q_dup_f32(new_bias); + scale = vld1_dup_f32(new_scale); + new_bias++; + new_scale++; + float scale0 = vget_lane_f32(scale, 0); + for (int j = 0; j < nc1; ++j) { + cv = vld1q_f32(c_ptr); + cv = vmlaq_n_f32(bias, cv, scale0); + cv = vmaxq_f32(cv, zero); + vst1q_f32(C_ptr, cv); + c_ptr += 4; + C_ptr += 4; + } + if (_nc1 != 0) { + cv = vld1q_f32(c_ptr); + cv = vmlaq_n_f32(bias, cv, scale0); + cv = vmaxq_f32(cv, zero); + if (_nc1 >= 1) { + vst1q_lane_f32(C_ptr, cv, 0); + C_ptr++; + } + if (_nc1 >= 2) { + vst1q_lane_f32(C_ptr, cv, 1); + C_ptr++; + } + if (_nc1 >= 3) { + vst1q_lane_f32(C_ptr, cv, 2); + } + } + } +} + +#else + void AddDot4x4(int k, const float *a, const float *b, float *c, int ldc) { const float *a_ptr, *b_ptr; a_ptr = a; @@ -328,205 +649,61 @@ void AddDot4x4(int k, const float *a, const float *b, float *c, int ldc) { "q10", "q11", "q12", "q13"); } -#else -void AddDot4x4(int k, const float *a, const float *b, float *c, int ldc) { - float *c0, *c1, *c2, *c3; - c0 = c; - c1 = c + ldc; - c2 = c + 2 * ldc; - c3 = c + 3 * ldc; - for (int p = 0; p < k; p += 1) { - // first row - c0[0] += a[0] * b[0]; - c0[1] += a[0] * b[1]; - c0[2] += a[0] * b[2]; - c0[3] += a[0] * b[3]; +/* +void VectorKernel(int m, int n, int k, float alpha, const float *A, int lda, + const float *B, int ldb, float beta, float *C, int ldc, + bool relu) { + float *bufferC = static_cast(memory::Alloc(sizeof(float) * n)); - // second row - c1[0] += a[1] * b[0]; - c1[1] += a[1] * b[1]; - c1[2] += a[1] * b[2]; - c1[3] += a[1] * b[3]; + const float *a0, *b0, *b1, *b2, *b3; + float *c0, *C0; - // third row - c2[0] += a[2] * b[0]; - c2[1] += a[2] * b[1]; - c2[2] += a[2] * b[2]; - c2[3] += a[2] * b[3]; + int volatile kc1 = k / 4; + int volatile kc2 = k % 4; + int volatile nc1 = n / 16; + int _nc1 = n % 16; + int volatile nc2 = _nc1 / 4; + int volatile nc3 = _nc1 % 4; + for (int i = 0; i < kc1; i++) { + a0 = A + i * 4; + b0 = B + i * 4 * ldb; + b1 = b0 + ldb; + b2 = b1 + ldb; + b3 = b2 + ldb; + c0 = bufferC; + asm volatile( + "pld [%[a0], #16] \n\t" + "vld1.32 {q0}, [%[a0]] \n\t" - // fourth row - c3[0] += a[3] * b[0]; - c3[1] += a[3] * b[1]; - c3[2] += a[3] * b[2]; - c3[3] += a[3] * b[3]; + "subs %[nc1], %[nc1], #1 \n\t" + "blt end_nc1_%= \n\t" + "loop_nc1_%=: \n\t" - a += 4; - b += 4; - } -} + "cmp %[i], #0 \n\t" + "beq i_eq0_%= \n\t" + "bne i_ne0_%= \n\t" -#endif + "i_eq0_%=: \n\t" + "vmov.f32 q10, #0.0 \n\t" + "vmov.f32 q11, #0.0 \n\t" + "vmov.f32 q12, #0.0 \n\t" + "vmov.f32 q13, #0.0 \n\t" + "b gemm_nc1_%= \n\t" -// 32位 float 矩阵乘法 -void Sgemm(int m, int n, int k, float alpha, const float *A, int lda, - const float *B, int ldb, float beta, float *C, int ldc, bool relu) { - // L1 data cache is 32 kib (Per Contex-A57, Contex-A72, Contex-A73) - // L2 cache is 0.5~4 Mib (Contex-A72 cluster) - int L1 = 30 * 1024; - int L2 = 1 * 1024 * 1024; + "i_ne0_%=: \n\t" + "pld [%[c0], #64] \n\t" + "vld1.32 {q10, q11}, [%[c0]]! \n\t" + "vld1.32 {q12, q13}, [%[c0]] \n\t" + "sub %[c0], %[c0], #32 \n\t" - KC = k; - MC = L2 / (2 * KC * sizeof(float)); - NC = MC; - - // make sure MC is multiple of 4, and NC is multiple of 8 - int mblock_num = (m + MC - 1) / MC; - MC = (m + mblock_num - 1) / mblock_num; - MC = (MC + 4 - 1) / 4 * 4; - // DLOG << "mblock_num = " << mblock_num << ", MC = " << MC << "\n"; - - int nblock_num = (n + NC - 1) / NC; - NC = (n + nblock_num - 1) / nblock_num; - NC = (NC + 8 - 1) / 8 * 8; - // DLOG << "nblock_num = " << nblock_num << ", NC = " << NC << "\n"; - - packedA = static_cast( - paddle_mobile::memory::Alloc(sizeof(float) * MC * KC)); - packedB = static_cast( - paddle_mobile::memory::Alloc(sizeof(float) * KC * NC)); - packedC = static_cast( - paddle_mobile::memory::Alloc(sizeof(float) * MC * NC)); - zero = static_cast(paddle_mobile::memory::Alloc(sizeof(float) * KC)); - - for (int l = 0; l < KC; ++l) { - zero[l] = 0; - } - - int mc, nc; - for (int j = 0; j < n; j += NC) { - nc = s_min(n - j, NC); - PackMatrixB_(KC, nc, nc % NR, &B(0, j), ldb, packedB); - for (int i = 0; i < m; i += MC) { - mc = s_min(m - i, MC); - PackMatrixA_(mc, KC, mc % MR, &A(i, 0), lda, packedA); - InnerKernel(mc, nc, alpha, packedA, packedB, beta, packedC, &C(i, j), ldc, - relu); - } - } - - paddle_mobile::memory::Free(packedA); - paddle_mobile::memory::Free(packedB); - paddle_mobile::memory::Free(packedC); - paddle_mobile::memory::Free(zero); -} - -void SgemmWithBn(int m, int n, int k, float alpha, const float *A, int lda, - const float *B, int ldb, float beta, float *C, int ldc, - bool relu, float *new_scale, float *new_bias) { - // L1 data cache is 32 kib (Per Contex-A57, Contex-A72, Contex-A73) - // L2 cache is 0.5~4 Mib (Contex-A72 cluster) - int L1 = 30 * 1024; - int L2 = 1 * 1024 * 1024; - - KC = k; - MC = L2 / (2 * KC * sizeof(float)); - NC = MC; - - // make sure MC is multiple of 4, and NC is multiple of 8 - int mblock_num = (m + MC - 1) / MC; - MC = (m + mblock_num - 1) / mblock_num; - MC = (MC + 4 - 1) / 4 * 4; - // DLOG << "mblock_num = " << mblock_num << ", MC = " << MC << "\n"; - - int nblock_num = (n + NC - 1) / NC; - NC = (n + nblock_num - 1) / nblock_num; - NC = (NC + 8 - 1) / 8 * 8; - // DLOG << "nblock_num = " << nblock_num << ", NC = " << NC << "\n"; - - packedA = static_cast( - paddle_mobile::memory::Alloc(sizeof(float) * MC * KC)); - packedB = static_cast( - paddle_mobile::memory::Alloc(sizeof(float) * KC * NC)); - packedC = static_cast( - paddle_mobile::memory::Alloc(sizeof(float) * MC * NC)); - zero = static_cast(paddle_mobile::memory::Alloc(sizeof(float) * KC)); - - for (int l = 0; l < KC; ++l) { - zero[l] = 0; - } - - int mc, nc; - for (int j = 0; j < n; j += NC) { - nc = s_min(n - j, NC); - PackMatrixB_(KC, nc, nc % NR, &B(0, j), ldb, packedB); - for (int i = 0; i < m; i += MC) { - mc = s_min(m - i, MC); - PackMatrixA_(mc, KC, mc % MR, &A(i, 0), lda, packedA); - InnerKernelWithBn(mc, nc, alpha, packedA, packedB, beta, packedC, - &C(i, j), ldc, relu, new_scale + i, new_bias + i); - } - } - - paddle_mobile::memory::Free(packedA); - paddle_mobile::memory::Free(packedB); - paddle_mobile::memory::Free(packedC); - paddle_mobile::memory::Free(zero); -} - -void VectorKernel(int m, int n, int k, float alpha, const float *A, int lda, - const float *B, int ldb, float beta, float *C, int ldc, - bool relu) { - float *bufferC = static_cast(memory::Alloc(sizeof(float) * n)); - - const float *a0, *b0, *b1, *b2, *b3; - float *c0, *C0; - - int volatile kc1 = k / 4; - int volatile kc2 = k % 4; - int volatile nc1 = n / 16; - int _nc1 = n % 16; - int volatile nc2 = _nc1 / 4; - int volatile nc3 = _nc1 % 4; - for (int i = 0; i < kc1; i++) { - a0 = A + i * 4; - b0 = B + i * 4 * ldb; - b1 = b0 + ldb; - b2 = b1 + ldb; - b3 = b2 + ldb; - c0 = bufferC; - asm volatile( - "pld [%[a0], #16] \n\t" - "vld1.32 {q0}, [%[a0]] \n\t" - - "subs %[nc1], %[nc1], #1 \n\t" - "blt end_nc1_%= \n\t" - "loop_nc1_%=: \n\t" - - "cmp %[i], #0 \n\t" - "beq i_eq0_%= \n\t" - "bne i_ne0_%= \n\t" - - "i_eq0_%=: \n\t" - "vmov.f32 q10, #0.0 \n\t" - "vmov.f32 q11, #0.0 \n\t" - "vmov.f32 q12, #0.0 \n\t" - "vmov.f32 q13, #0.0 \n\t" - "b gemm_nc1_%= \n\t" - - "i_ne0_%=: \n\t" - "pld [%[c0], #64] \n\t" - "vld1.32 {q10, q11}, [%[c0]]! \n\t" - "vld1.32 {q12, q13}, [%[c0]] \n\t" - "sub %[c0], %[c0], #32 \n\t" - - "gemm_nc1_%=: \n\t" - "pld [%[b0], #64] \n\t" - "vld1.32 {q2, q3}, [%[b0]]! \n\t" - "vld1.32 {q4, q5}, [%[b0]]! \n\t" - "vmla.f32 q10, q2, d0[0] \n\t" - "vmla.f32 q11, q3, d0[0] \n\t" - "vmla.f32 q12, q4, d0[0] \n\t" - "vmla.f32 q13, q5, d0[0] \n\t" + "gemm_nc1_%=: \n\t" + "pld [%[b0], #64] \n\t" + "vld1.32 {q2, q3}, [%[b0]]! \n\t" + "vld1.32 {q4, q5}, [%[b0]]! \n\t" + "vmla.f32 q10, q2, d0[0] \n\t" + "vmla.f32 q11, q3, d0[0] \n\t" + "vmla.f32 q12, q4, d0[0] \n\t" + "vmla.f32 q13, q5, d0[0] \n\t" "pld [%[b1], #64] \n\t" "vld1.32 {q2, q3}, [%[b1]]! \n\t" @@ -905,6 +1082,7 @@ void VectorKernelWithBn(int m, int n, int k, float alpha, const float *A, VecWriteWithBn(n, bufferC, C, ldc, new_scale, new_bias); } } +*/ void AddDot4x8(int k, const float *a, const float *b, float *c, int ldc) { const float *a_ptr, *b_ptr; @@ -1214,6 +1392,21 @@ void WriteWithAddRelu(int mc, int nc, float *c, float *C, int ldc) { // C = A * B, batchnorm(C) void WriteWithBn(int mc, int nc, float *c, float *C, int ldc, float *scale, float *bias) { + if (nc < 4) { + for (int i = 0; i < mc; ++i) { + for (int j = 0; j < nc; ++j) { + *C = (*c) * (*scale) + (*bias); + C++; + c++; + } + C += (ldc - nc); + c += (NC - nc); + scale++; + bias++; + } + return; + } + int volatile nc1 = nc / 16; int _nc1 = nc % 16; int volatile nc2 = _nc1 / 4; @@ -1300,6 +1493,24 @@ void WriteWithBn(int mc, int nc, float *c, float *C, int ldc, float *scale, // C = A * B, batchnorm(C), relu(C) void WriteWithBnRelu(int mc, int nc, float *c, float *C, int ldc, float *scale, float *bias) { + if (nc < 4) { + for (int i = 0; i < mc; ++i) { + for (int j = 0; j < nc; ++j) { + *C = (*c) * (*scale) + (*bias); + if (*C < 0) { + *C = 0; + } + C++; + c++; + } + C += (ldc - nc); + c += (NC - nc); + scale++; + bias++; + } + return; + } + int nc1 = nc / 16; int _nc1 = nc % 16; int nc2 = _nc1 / 4; @@ -1390,282 +1601,644 @@ void WriteWithBnRelu(int mc, int nc, float *c, float *C, int ldc, float *scale, "q8", "q10", "q11", "q12", "q13", "q14"); } -// C = A * B -void VecWriteBasic(int n, float *c, float *C, int ldc) { - int nc1 = n / 16; - int _nc1 = n % 16; - int nc2 = _nc1 / 4; - int nc3 = 16 - 4 * (_nc1 % 4); + /* + // C = A * B + void VecWriteBasic(int n, float *c, float *C, int ldc) { + int nc1 = n / 16; + int _nc1 = n % 16; + int nc2 = _nc1 / 4; + int nc3 = 16 - 4 * (_nc1 % 4); - asm volatile( - "subs %[nc1], %[nc1], #1 \n\t" - "blt end_nc1_%= \n\t" - "loop_nc1_%=: \n\t" + asm volatile( + "subs %[nc1], %[nc1], #1 \n\t" + "blt end_nc1_%= \n\t" + "loop_nc1_%=: \n\t" - "vld1.32 {q0, q1}, [%[c]]! \n\t" - "vst1.32 {q0, q1}, [%[C]]! \n\t" + "vld1.32 {q0, q1}, [%[c]]! \n\t" + "vst1.32 {q0, q1}, [%[C]]! \n\t" - "vld1.32 {q2, q3}, [%[c]]! \n\t" - "vst1.32 {q2, q3}, [%[C]]! \n\t" + "vld1.32 {q2, q3}, [%[c]]! \n\t" + "vst1.32 {q2, q3}, [%[C]]! \n\t" - "subs %[nc1], %[nc1], #1 \n\t" - "bge loop_nc1_%= \n\t" - "end_nc1_%=: \n\t" + "subs %[nc1], %[nc1], #1 \n\t" + "bge loop_nc1_%= \n\t" + "end_nc1_%=: \n\t" - "subs %[nc2], %[nc2], #1 \n\t" - "blt end_nc2_%= \n\t" - "loop_nc2_%=: \n\t" + "subs %[nc2], %[nc2], #1 \n\t" + "blt end_nc2_%= \n\t" + "loop_nc2_%=: \n\t" - "vld1.32 {q4}, [%[c]]! \n\t" - "vst1.32 {q4}, [%[C]]! \n\t" + "vld1.32 {q4}, [%[c]]! \n\t" + "vst1.32 {q4}, [%[C]]! \n\t" - "subs %[nc2], %[nc2], #1 \n\t" - "bge loop_nc2_%= \n\t" - "end_nc2_%=: \n\t" + "subs %[nc2], %[nc2], #1 \n\t" + "bge loop_nc2_%= \n\t" + "end_nc2_%=: \n\t" - "cmp %[nc3], #16 \n\t" - "beq end_nc3_%= \n\t" - "sub %[c], %[c], %[nc3] \n\t" - "sub %[C], %[C], %[nc3] \n\t" - "vld1.32 {q5}, [%[c]]! \n\t" - "vst1.32 {q5}, [%[C]]! \n\t" - "end_nc3_%=: \n\t" + "cmp %[nc3], #16 \n\t" + "beq end_nc3_%= \n\t" + "sub %[c], %[c], %[nc3] \n\t" + "sub %[C], %[C], %[nc3] \n\t" + "vld1.32 {q5}, [%[c]]! \n\t" + "vst1.32 {q5}, [%[C]]! \n\t" + "end_nc3_%=: \n\t" - : - : [C] "r"(C), [c] "r"(c), [nc1] "r"(nc1), [nc2] "r"(nc2), [nc3] "r"(nc3) - : "memory", "q0", "q1", "q2", "q3", "q4", "q5"); -} + : + : [C] "r"(C), [c] "r"(c), [nc1] "r"(nc1), [nc2] "r"(nc2), [nc3] "r"(nc3) + : "memory", "q0", "q1", "q2", "q3", "q4", "q5"); + } -// C = alpha * A * B + beta * C -void VecWriteWithAlphaBeta(int n, float *c, float *C, int ldc) {} + // C = alpha * A * B + beta * C + void VecWriteWithAlphaBeta(int n, float *c, float *C, int ldc) {} -// C = A * B + C -void VecWriteWithAdd(int n, float *c, float *C, int ldc) { - int nc1 = n / 16; - int _nc1 = n % 16; + // C = A * B + C + void VecWriteWithAdd(int n, float *c, float *C, int ldc) { + int nc1 = n / 16; + int _nc1 = n % 16; - asm volatile( - "subs %[nc1], %[nc1], #1 \n\t" - "blt end_nc1_%= \n\t" - "loop_nc1_%=: \n\t" + asm volatile( + "subs %[nc1], %[nc1], #1 \n\t" + "blt end_nc1_%= \n\t" + "loop_nc1_%=: \n\t" - "vld1.32 {q0, q1}, [%[c]]! \n\t" - "vld1.32 {q2, q3}, [%[C]] \n\t" - "vadd.f32 q10, q0, q2 \n\t" - "vadd.f32 q11, q1, q3 \n\t" - "vst1.32 {q10, q11}, [%[C]]! \n\t" + "vld1.32 {q0, q1}, [%[c]]! \n\t" + "vld1.32 {q2, q3}, [%[C]] \n\t" + "vadd.f32 q10, q0, q2 \n\t" + "vadd.f32 q11, q1, q3 \n\t" + "vst1.32 {q10, q11}, [%[C]]! \n\t" - "vld1.32 {q4, q5}, [%[c]]! \n\t" - "vld1.32 {q6, q7}, [%[C]] \n\t" - "vadd.f32 q12, q4, q6 \n\t" - "vadd.f32 q13, q5, q7 \n\t" - "vst1.32 {q12, q13}, [%[C]]! \n\t" + "vld1.32 {q4, q5}, [%[c]]! \n\t" + "vld1.32 {q6, q7}, [%[C]] \n\t" + "vadd.f32 q12, q4, q6 \n\t" + "vadd.f32 q13, q5, q7 \n\t" + "vst1.32 {q12, q13}, [%[C]]! \n\t" - "subs %[nc1], %[nc1], #1 \n\t" - "bge loop_nc1_%= \n\t" - "end_nc1_%=: \n\t" + "subs %[nc1], %[nc1], #1 \n\t" + "bge loop_nc1_%= \n\t" + "end_nc1_%=: \n\t" - : [C] "+r"(C), [c] "+r"(c) - : [nc1] "r"(nc1) - : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q10", "q11", - "q12", "q13"); + : [C] "+r"(C), [c] "+r"(c) + : [nc1] "r"(nc1) + : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q10", + "q11", "q12", "q13"); - if (_nc1 != 0) { - for (int j = 0; j < _nc1; j++) { - *C++ += *c++; + if (_nc1 != 0) { + for (int j = 0; j < _nc1; j++) { + *C++ += *c++; + } } } -} -// C = A * B + C, relu(C) -void VecWriteWithAddRelu(int n, float *c, float *C, int ldc) { - int nc1 = n / 16; - int _nc1 = n % 16; + // C = A * B + C, relu(C) + void VecWriteWithAddRelu(int n, float *c, float *C, int ldc) { + int nc1 = n / 16; + int _nc1 = n % 16; - asm volatile( - "vmov.f32 q14, #0.0 \n\t" - "subs %[nc1], %[nc1], #1 \n\t" - "blt end_nc1_%= \n\t" - "loop_nc1_%=: \n\t" + asm volatile( + "vmov.f32 q14, #0.0 \n\t" + "subs %[nc1], %[nc1], #1 \n\t" + "blt end_nc1_%= \n\t" + "loop_nc1_%=: \n\t" - "vld1.32 {q0, q1}, [%[c]]! \n\t" - "vld1.32 {q2, q3}, [%[C]] \n\t" - "vadd.f32 q10, q0, q2 \n\t" - "vadd.f32 q11, q1, q3 \n\t" - "vmax.f32 q10, q10, q14 \n\t" - "vmax.f32 q11, q11, q14 \n\t" - "vst1.32 {q10, q11}, [%[C]]! \n\t" + "vld1.32 {q0, q1}, [%[c]]! \n\t" + "vld1.32 {q2, q3}, [%[C]] \n\t" + "vadd.f32 q10, q0, q2 \n\t" + "vadd.f32 q11, q1, q3 \n\t" + "vmax.f32 q10, q10, q14 \n\t" + "vmax.f32 q11, q11, q14 \n\t" + "vst1.32 {q10, q11}, [%[C]]! \n\t" - "vld1.32 {q4, q5}, [%[c]]! \n\t" - "vld1.32 {q6, q7}, [%[C]] \n\t" - "vadd.f32 q12, q4, q6 \n\t" - "vadd.f32 q13, q5, q7 \n\t" - "vmax.f32 q12, q12, q14 \n\t" - "vmax.f32 q13, q13, q14 \n\t" - "vst1.32 {q12, q13}, [%[C]]! \n\t" + "vld1.32 {q4, q5}, [%[c]]! \n\t" + "vld1.32 {q6, q7}, [%[C]] \n\t" + "vadd.f32 q12, q4, q6 \n\t" + "vadd.f32 q13, q5, q7 \n\t" + "vmax.f32 q12, q12, q14 \n\t" + "vmax.f32 q13, q13, q14 \n\t" + "vst1.32 {q12, q13}, [%[C]]! \n\t" - "subs %[nc1], %[nc1], #1 \n\t" - "bge loop_nc1_%= \n\t" - "end_nc1_%=: \n\t" + "subs %[nc1], %[nc1], #1 \n\t" + "bge loop_nc1_%= \n\t" + "end_nc1_%=: \n\t" - : [C] "+r"(C), [c] "+r"(c) - : [nc1] "r"(nc1) - : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q10", "q11", - "q12", "q13"); + : [C] "+r"(C), [c] "+r"(c) + : [nc1] "r"(nc1) + : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q10", + "q11", "q12", "q13"); - if (_nc1 != 0) { - for (int j = 0; j < _nc1; j++) { - *C += *c; - if (*C < 0) { - *C = 0; + if (_nc1 != 0) { + for (int j = 0; j < _nc1; j++) { + *C += *c; + if (*C < 0) { + *C = 0; + } + C++; + c++; } - C++; - c++; } } + + // C = A * B, batchnorm(C) + void VecWriteWithBn(int n, float *c, float *C, int ldc, float *scale, + float *bias) { + int nc1 = n / 16; + int _nc1 = n % 16; + int nc2 = _nc1 / 4; + int nc3 = 16 - 4 * (_nc1 % 4); + + asm volatile( + "subs %[nc1], %[nc1], #1 \n\t" + "blt end_nc1_%= \n\t" + "loop_nc1_%=: \n\t" + + "vld1.32 {q0, q1}, [%[c]]! \n\t" + "vld1.32 {q2, q3}, [%[scale]]! \n\t" + "vld1.32 {q10, q11}, [%[bias]]! \n\t" + "vmla.f32 q10, q0, q2 \n\t" + "vmla.f32 q11, q1, q3 \n\t" + "vst1.32 {q10, q11}, [%[C]]! \n\t" + + "vld1.32 {q4, q5}, [%[c]]! \n\t" + "vld1.32 {q6, q7}, [%[scale]]! \n\t" + "vld1.32 {q12, q13}, [%[bias]]! \n\t" + "vmla.f32 q12, q4, q6 \n\t" + "vmla.f32 q13, q5, q7 \n\t" + "vst1.32 {q12, q13}, [%[C]]! \n\t" + + "subs %[nc1], %[nc1], #1 \n\t" + "bge loop_nc1_%= \n\t" + "end_nc1_%=: \n\t" + + "subs %[nc2], %[nc2], #1 \n\t" + "blt end_nc2_%= \n\t" + "loop_nc2_%=: \n\t" + + "vld1.32 {q0}, [%[c]]! \n\t" + "vld1.32 {q1}, [%[scale]]! \n\t" + "vld1.32 {q10}, [%[bias]]! \n\t" + "vmla.f32 q10, q0, q1 \n\t" + "vst1.32 {q10}, [%[C]]! \n\t" + + "subs %[nc2], %[nc2], #1 \n\t" + "bge loop_nc2_%= \n\t" + "end_nc2_%=: \n\t" + + "cmp %[nc3], #16 \n\t" + "beq end_nc3_%= \n\t" + + "sub %[c], %[c], %[nc3] \n\t" + "sub %[scale], %[scale], %[nc3] \n\t" + "sub %[bias], %[bias], %[nc3] \n\t" + "sub %[C], %[C], %[nc3] \n\t" + + "vld1.32 {q0}, [%[c]]! \n\t" + "vld1.32 {q1}, [%[scale]]! \n\t" + "vld1.32 {q10}, [%[bias]]! \n\t" + "vmla.f32 q10, q0, q1 \n\t" + "vst1.32 {q10}, [%[C]]! \n\t" + "end_nc3_%=: \n\t" + + : + : [C] "r"(C), [c] "r"(c), [nc1] "r"(nc1), [nc2] "r"(nc2), [nc3] + "r"(nc3), [scale] "r"(scale), [bias] "r"(bias) : "memory", "q0", "q1", "q2", + "q3", "q4", "q5", "q6", "q7", "q10", "q11", "q12", "q13"); + } + + // C = A * B, batchnorm(C), relu(C) + void VecWriteWithBnRelu(int n, float *c, float *C, int ldc, float *scale, + float *bias) { + int nc1 = n / 16; + int _nc1 = n % 16; + int nc2 = _nc1 / 4; + int nc3 = 16 - 4 * (_nc1 % 4); + + asm volatile( + "vmov.f32 q14, #0.0 \n\t" + "subs %[nc1], %[nc1], #1 \n\t" + "blt end_nc1_%= \n\t" + "loop_nc1_%=: \n\t" + + "vld1.32 {q0, q1}, [%[c]]! \n\t" + "vld1.32 {q2, q3}, [%[scale]]! \n\t" + "vld1.32 {q10, q11}, [%[bias]]! \n\t" + "vmla.f32 q10, q0, q2 \n\t" + "vmla.f32 q11, q1, q3 \n\t" + "vmax.f32 q10, q10, q14 \n\t" + "vmax.f32 q11, q11, q14 \n\t" + "vst1.32 {q10, q11}, [%[C]]! \n\t" + + "vld1.32 {q4, q5}, [%[c]]! \n\t" + "vld1.32 {q6, q7}, [%[scale]]! \n\t" + "vld1.32 {q12, q13}, [%[bias]]! \n\t" + "vmla.f32 q12, q4, q6 \n\t" + "vmla.f32 q13, q5, q7 \n\t" + "vmax.f32 q12, q12, q14 \n\t" + "vmax.f32 q13, q13, q14 \n\t" + "vst1.32 {q12, q13}, [%[C]]! \n\t" + + "subs %[nc1], %[nc1], #1 \n\t" + "bge loop_nc1_%= \n\t" + "end_nc1_%=: \n\t" + + "subs %[nc2], %[nc2], #1 \n\t" + "blt end_nc2_%= \n\t" + "loop_nc2_%=: \n\t" + + "vld1.32 {q0}, [%[c]]! \n\t" + "vld1.32 {q1}, [%[scale]]! \n\t" + "vld1.32 {q10}, [%[bias]]! \n\t" + "vmla.f32 q10, q0, q1 \n\t" + "vmax.f32 q10, q10, q14 \n\t" + "vst1.32 {q10}, [%[C]]! \n\t" + + "subs %[nc2], %[nc2], #1 \n\t" + "bge loop_nc2_%= \n\t" + "end_nc2_%=: \n\t" + + "cmp %[nc3], #16 \n\t" + "beq end_nc3_%= \n\t" + + "sub %[c], %[c], %[nc3] \n\t" + "sub %[scale], %[scale], %[nc3] \n\t" + "sub %[bias], %[bias], %[nc3] \n\t" + "sub %[C], %[C], %[nc3] \n\t" + + "vld1.32 {q0}, [%[c]]! \n\t" + "vld1.32 {q1}, [%[scale]]! \n\t" + "vld1.32 {q10}, [%[bias]]! \n\t" + "vmla.f32 q10, q0, q1 \n\t" + "vmax.f32 q10, q10, q14 \n\t" + "vst1.32 {q10}, [%[C]]! \n\t" + "end_nc3_%=: \n\t" + + : + : [C] "r"(C), [c] "r"(c), [nc1] "r"(nc1), [nc2] "r"(nc2), [nc3] + "r"(nc3), [scale] "r"(scale), [bias] "r"(bias) : "memory", "q0", "q1", "q2", + "q3", "q4", "q5", "q6", "q7", "q10", "q11", "q12", "q13", "q14"); + } + */ + +#endif // __aarch64__ +#else + +void AddDot4x4(int k, const float *a, const float *b, float *c, int ldc) { + float *c0, *c1, *c2, *c3; + c0 = c; + c1 = c + ldc; + c2 = c + 2 * ldc; + c3 = c + 3 * ldc; + for (int p = 0; p < k; p += 1) { + // first row + c0[0] += a[0] * b[0]; + c0[1] += a[0] * b[1]; + c0[2] += a[0] * b[2]; + c0[3] += a[0] * b[3]; + + // second row + c1[0] += a[1] * b[0]; + c1[1] += a[1] * b[1]; + c1[2] += a[1] * b[2]; + c1[3] += a[1] * b[3]; + + // third row + c2[0] += a[2] * b[0]; + c2[1] += a[2] * b[1]; + c2[2] += a[2] * b[2]; + c2[3] += a[2] * b[3]; + + // fourth row + c3[0] += a[3] * b[0]; + c3[1] += a[3] * b[1]; + c3[2] += a[3] * b[2]; + c3[3] += a[3] * b[3]; + + a += 4; + b += 4; + } } -// C = A * B, batchnorm(C) -void VecWriteWithBn(int n, float *c, float *C, int ldc, float *scale, - float *bias) { - int nc1 = n / 16; - int _nc1 = n % 16; - int nc2 = _nc1 / 4; - int nc3 = 16 - 4 * (_nc1 % 4); +#endif // __ARM_NEON - asm volatile( - "subs %[nc1], %[nc1], #1 \n\t" - "blt end_nc1_%= \n\t" - "loop_nc1_%=: \n\t" +// 32位 float 矩阵乘法 +void Sgemm(int m, int n, int k, float alpha, const float *A, int lda, + const float *B, int ldb, float beta, float *C, int ldc, bool relu) { + // L1 data cache is 32 kib (Per Contex-A57, Contex-A72, Contex-A73) + // L2 cache is 0.5~4 Mib (Contex-A72 cluster) + int L1 = 32 * 1024; + int L2 = 0.5 * 1024 * 1024; - "vld1.32 {q0, q1}, [%[c]]! \n\t" - "vld1.32 {q2, q3}, [%[scale]]! \n\t" - "vld1.32 {q10, q11}, [%[bias]]! \n\t" - "vmla.f32 q10, q0, q2 \n\t" - "vmla.f32 q11, q1, q3 \n\t" - "vst1.32 {q10, q11}, [%[C]]! \n\t" + KC = k; + MC = L1 / (KC * sizeof(float)); + NC = L2 / (KC * sizeof(float)); - "vld1.32 {q4, q5}, [%[c]]! \n\t" - "vld1.32 {q6, q7}, [%[scale]]! \n\t" - "vld1.32 {q12, q13}, [%[bias]]! \n\t" - "vmla.f32 q12, q4, q6 \n\t" - "vmla.f32 q13, q5, q7 \n\t" - "vst1.32 {q12, q13}, [%[C]]! \n\t" + // make sure MC is multiple of MR, and NC is multiple of NR + int mblock_num = (m + MC - 1) / MC; + MC = (m + mblock_num - 1) / mblock_num; + MC = (MC + MR - 1) / MR * MR; + // DLOG << "mblock_num = " << mblock_num << ", MC = " << MC << "\n"; - "subs %[nc1], %[nc1], #1 \n\t" - "bge loop_nc1_%= \n\t" - "end_nc1_%=: \n\t" + int nblock_num = (n + NC - 1) / NC; + NC = (n + nblock_num - 1) / nblock_num; + NC = (NC + NR - 1) / NR * NR; + // DLOG << "nblock_num = " << nblock_num << ", NC = " << NC << "\n"; - "subs %[nc2], %[nc2], #1 \n\t" - "blt end_nc2_%= \n\t" - "loop_nc2_%=: \n\t" + packedA = static_cast( + paddle_mobile::memory::Alloc(sizeof(float) * MC * KC)); + packedB = static_cast( + paddle_mobile::memory::Alloc(sizeof(float) * KC * NC)); + packedC = static_cast( + paddle_mobile::memory::Alloc(sizeof(float) * MC * NC)); + zero = static_cast(paddle_mobile::memory::Alloc(sizeof(float) * KC)); - "vld1.32 {q0}, [%[c]]! \n\t" - "vld1.32 {q1}, [%[scale]]! \n\t" - "vld1.32 {q10}, [%[bias]]! \n\t" - "vmla.f32 q10, q0, q1 \n\t" - "vst1.32 {q10}, [%[C]]! \n\t" + for (int l = 0; l < KC; ++l) { + zero[l] = 0; + } - "subs %[nc2], %[nc2], #1 \n\t" - "bge loop_nc2_%= \n\t" - "end_nc2_%=: \n\t" + int mc, nc; + for (int j = 0; j < n; j += NC) { + nc = s_min(n - j, NC); + PackMatrixB_8c(KC, nc, nc % NR, &B(0, j), ldb, packedB); + for (int i = 0; i < m; i += MC) { + mc = s_min(m - i, MC); + PackMatrixA_6r(mc, KC, mc % MR, &A(i, 0), lda, packedA); + InnerKernel(mc, nc, alpha, packedA, packedB, beta, packedC, &C(i, j), ldc, + relu); + } + } - "cmp %[nc3], #16 \n\t" - "beq end_nc3_%= \n\t" + paddle_mobile::memory::Free(packedA); + paddle_mobile::memory::Free(packedB); + paddle_mobile::memory::Free(packedC); + paddle_mobile::memory::Free(zero); +} - "sub %[c], %[c], %[nc3] \n\t" - "sub %[scale], %[scale], %[nc3] \n\t" - "sub %[bias], %[bias], %[nc3] \n\t" - "sub %[C], %[C], %[nc3] \n\t" +void SgemmWithBn(int m, int n, int k, float alpha, const float *A, int lda, + const float *B, int ldb, float beta, float *C, int ldc, + bool relu, float *new_scale, float *new_bias) { + // L1 data cache is 32 kib (Per Contex-A57, Contex-A72, Contex-A73) + // L2 cache is 0.5~4 Mib (Contex-A72 cluster) + int L1 = 32 * 1024; + int L2 = 0.5 * 1024 * 1024; - "vld1.32 {q0}, [%[c]]! \n\t" - "vld1.32 {q1}, [%[scale]]! \n\t" - "vld1.32 {q10}, [%[bias]]! \n\t" - "vmla.f32 q10, q0, q1 \n\t" - "vst1.32 {q10}, [%[C]]! \n\t" - "end_nc3_%=: \n\t" + KC = k; + MC = L1 / (KC * sizeof(float)); + NC = L2 / (KC * sizeof(float)); - : - : [C] "r"(C), [c] "r"(c), [nc1] "r"(nc1), [nc2] "r"(nc2), [nc3] "r"(nc3), - [scale] "r"(scale), [bias] "r"(bias) - : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q10", "q11", - "q12", "q13"); + // make sure MC is multiple of MR, and NC is multiple of NR + int mblock_num = (m + MC - 1) / MC; + MC = (m + mblock_num - 1) / mblock_num; + MC = (MC + MR - 1) / MR * MR; + // DLOG << "mblock_num = " << mblock_num << ", MC = " << MC << "\n"; + + int nblock_num = (n + NC - 1) / NC; + NC = (n + nblock_num - 1) / nblock_num; + NC = (NC + NR - 1) / NR * NR; + // DLOG << "nblock_num = " << nblock_num << ", NC = " << NC << "\n"; + + packedA = static_cast( + paddle_mobile::memory::Alloc(sizeof(float) * MC * KC)); + packedB = static_cast( + paddle_mobile::memory::Alloc(sizeof(float) * KC * NC)); + packedC = static_cast( + paddle_mobile::memory::Alloc(sizeof(float) * MC * NC)); + zero = static_cast(paddle_mobile::memory::Alloc(sizeof(float) * KC)); + + for (int l = 0; l < KC; ++l) { + zero[l] = 0; + } + + int mc, nc; + for (int j = 0; j < n; j += NC) { + nc = s_min(n - j, NC); + PackMatrixB_8c(KC, nc, nc % NR, &B(0, j), ldb, packedB); + for (int i = 0; i < m; i += MC) { + mc = s_min(m - i, MC); + PackMatrixA_6r(mc, KC, mc % MR, &A(i, 0), lda, packedA); + InnerKernelWithBn(mc, nc, alpha, packedA, packedB, beta, packedC, + &C(i, j), ldc, relu, new_scale + i, new_bias + i); + } + } + + paddle_mobile::memory::Free(packedA); + paddle_mobile::memory::Free(packedB); + paddle_mobile::memory::Free(packedC); + paddle_mobile::memory::Free(zero); } -// C = A * B, batchnorm(C), relu(C) -void VecWriteWithBnRelu(int n, float *c, float *C, int ldc, float *scale, - float *bias) { - int nc1 = n / 16; - int _nc1 = n % 16; - int nc2 = _nc1 / 4; - int nc3 = 16 - 4 * (_nc1 % 4); +void AddDot6x8(int k, const float *a, const float *b, float *c, int ldc) { +#if __ARM_NEON +#if __aarch64__ - asm volatile( - "vmov.f32 q14, #0.0 \n\t" - "subs %[nc1], %[nc1], #1 \n\t" - "blt end_nc1_%= \n\t" - "loop_nc1_%=: \n\t" + // init C + float32x4_t cv0 = vdupq_n_f32(0.0); + float32x4_t cv1 = vdupq_n_f32(0.0); + float32x4_t cv2 = vdupq_n_f32(0.0); + float32x4_t cv3 = vdupq_n_f32(0.0); + float32x4_t cv4 = vdupq_n_f32(0.0); + float32x4_t cv5 = vdupq_n_f32(0.0); + float32x4_t cv6 = vdupq_n_f32(0.0); + float32x4_t cv7 = vdupq_n_f32(0.0); + float32x4_t cv8 = vdupq_n_f32(0.0); + float32x4_t cv9 = vdupq_n_f32(0.0); + float32x4_t cv10 = vdupq_n_f32(0.0); + float32x4_t cv11 = vdupq_n_f32(0.0); - "vld1.32 {q0, q1}, [%[c]]! \n\t" - "vld1.32 {q2, q3}, [%[scale]]! \n\t" - "vld1.32 {q10, q11}, [%[bias]]! \n\t" - "vmla.f32 q10, q0, q2 \n\t" - "vmla.f32 q11, q1, q3 \n\t" - "vmax.f32 q10, q10, q14 \n\t" - "vmax.f32 q11, q11, q14 \n\t" - "vst1.32 {q10, q11}, [%[C]]! \n\t" + float32x4_t av; + float32x4_t bv0; + float32x4_t bv1; - "vld1.32 {q4, q5}, [%[c]]! \n\t" - "vld1.32 {q6, q7}, [%[scale]]! \n\t" - "vld1.32 {q12, q13}, [%[bias]]! \n\t" - "vmla.f32 q12, q4, q6 \n\t" - "vmla.f32 q13, q5, q7 \n\t" - "vmax.f32 q12, q12, q14 \n\t" - "vmax.f32 q13, q13, q14 \n\t" - "vst1.32 {q12, q13}, [%[C]]! \n\t" + float32x2_t av01; + float32x2_t av23; + float32x2_t av45; - "subs %[nc1], %[nc1], #1 \n\t" - "bge loop_nc1_%= \n\t" - "end_nc1_%=: \n\t" + for (int p = 0; p < k; p += 1) { + av = vld1q_f32(a); + av01 = vget_low_f32(av); + av23 = vget_high_f32(av); + av45 = vld1_f32(a + 4); + bv0 = vld1q_f32(b); + bv1 = vld1q_f32(b + 4); - "subs %[nc2], %[nc2], #1 \n\t" - "blt end_nc2_%= \n\t" - "loop_nc2_%=: \n\t" + cv0 = vmlaq_lane_f32(cv0, bv0, av01, 0); + cv1 = vmlaq_lane_f32(cv1, bv1, av01, 0); + cv2 = vmlaq_lane_f32(cv2, bv0, av01, 1); + cv3 = vmlaq_lane_f32(cv3, bv1, av01, 1); - "vld1.32 {q0}, [%[c]]! \n\t" - "vld1.32 {q1}, [%[scale]]! \n\t" - "vld1.32 {q10}, [%[bias]]! \n\t" - "vmla.f32 q10, q0, q1 \n\t" - "vmax.f32 q10, q10, q14 \n\t" - "vst1.32 {q10}, [%[C]]! \n\t" + cv4 = vmlaq_lane_f32(cv4, bv0, av23, 0); + cv5 = vmlaq_lane_f32(cv5, bv1, av23, 0); + cv6 = vmlaq_lane_f32(cv6, bv0, av23, 1); + cv7 = vmlaq_lane_f32(cv7, bv1, av23, 1); - "subs %[nc2], %[nc2], #1 \n\t" - "bge loop_nc2_%= \n\t" - "end_nc2_%=: \n\t" + cv8 = vmlaq_lane_f32(cv8, bv0, av45, 0); + cv9 = vmlaq_lane_f32(cv9, bv1, av45, 0); + cv10 = vmlaq_lane_f32(cv10, bv0, av45, 1); + cv11 = vmlaq_lane_f32(cv11, bv1, av45, 1); - "cmp %[nc3], #16 \n\t" - "beq end_nc3_%= \n\t" + a += MR; + b += NR; + } - "sub %[c], %[c], %[nc3] \n\t" - "sub %[scale], %[scale], %[nc3] \n\t" - "sub %[bias], %[bias], %[nc3] \n\t" - "sub %[C], %[C], %[nc3] \n\t" + vst1q_f32(c, cv0); + vst1q_f32(c + 4, cv1); + vst1q_f32(c + ldc, cv2); + vst1q_f32(c + ldc + 4, cv3); + vst1q_f32(c + 2 * ldc, cv4); + vst1q_f32(c + 2 * ldc + 4, cv5); + vst1q_f32(c + 3 * ldc, cv6); + vst1q_f32(c + 3 * ldc + 4, cv7); + vst1q_f32(c + 4 * ldc, cv8); + vst1q_f32(c + 4 * ldc + 4, cv9); + vst1q_f32(c + 5 * ldc, cv10); + vst1q_f32(c + 5 * ldc + 4, cv11); - "vld1.32 {q0}, [%[c]]! \n\t" - "vld1.32 {q1}, [%[scale]]! \n\t" - "vld1.32 {q10}, [%[bias]]! \n\t" - "vmla.f32 q10, q0, q1 \n\t" - "vmax.f32 q10, q10, q14 \n\t" - "vst1.32 {q10}, [%[C]]! \n\t" - "end_nc3_%=: \n\t" +#else + + const float *a_ptr, *b_ptr; + a_ptr = a; + b_ptr = b; + int kc1 = k / 4; + int kc2 = k % 4; + int step = 4 * ldc; + asm volatile( + "pld [%[a_ptr]] \n\t" + "pld [%[b_ptr]] \n\t" + "pld [%[a_ptr], #64] \n\t" + "pld [%[b_ptr], #64] \n\t" + + "vmov.f32 q4, #0.0 \n\t" + "vmov.f32 q5, #0.0 \n\t" + "vmov.f32 q6, #0.0 \n\t" + "vmov.f32 q7, #0.0 \n\t" + "vmov.f32 q8, #0.0 \n\t" + "vmov.f32 q9, #0.0 \n\t" + "vmov.f32 q10, #0.0 \n\t" + "vmov.f32 q11, #0.0 \n\t" + "vmov.f32 q12, #0.0 \n\t" + "vmov.f32 q13, #0.0 \n\t" + "vmov.f32 q14, #0.0 \n\t" + "vmov.f32 q15, #0.0 \n\t" + + "subs %[kc1], %[kc1], #1 \n\t" + "blt end_kc1_%= \n\t" + "loop_kc1_%=: \n\t" + + // "pld [%[a_ptr], #128] \n\t" + // "pld [%[b_ptr], #128] \n\t" + // "pld [%[a_ptr], #192] \n\t" + // "pld [%[b_ptr], #192] \n\t" + + "vld1.32 {d0-d2}, [%[a_ptr]]! \n\t" + "vld1.32 {q2, q3}, [%[b_ptr]]! \n\t" + + "vmla.f32 q4, q2, d0[0] \n\t" + "vmla.f32 q5, q3, d0[0] \n\t" + "vmla.f32 q6, q2, d0[1] \n\t" + "vmla.f32 q7, q3, d0[1] \n\t" + "vmla.f32 q8, q2, d1[0] \n\t" + "vmla.f32 q9, q3, d1[0] \n\t" + "vmla.f32 q10, q2, d1[1] \n\t" + "vmla.f32 q11, q3, d1[1] \n\t" + "vmla.f32 q12, q2, d2[0] \n\t" + "vmla.f32 q13, q3, d2[0] \n\t" + "vmla.f32 q14, q2, d2[1] \n\t" + "vmla.f32 q15, q3, d2[1] \n\t" + + "vld1.32 {d0-d2}, [%[a_ptr]]! \n\t" + "vld1.32 {q2, q3}, [%[b_ptr]]! \n\t" + + "vmla.f32 q4, q2, d0[0] \n\t" + "vmla.f32 q5, q3, d0[0] \n\t" + "vmla.f32 q6, q2, d0[1] \n\t" + "vmla.f32 q7, q3, d0[1] \n\t" + "vmla.f32 q8, q2, d1[0] \n\t" + "vmla.f32 q9, q3, d1[0] \n\t" + "vmla.f32 q10, q2, d1[1] \n\t" + "vmla.f32 q11, q3, d1[1] \n\t" + "vmla.f32 q12, q2, d2[0] \n\t" + "vmla.f32 q13, q3, d2[0] \n\t" + "vmla.f32 q14, q2, d2[1] \n\t" + "vmla.f32 q15, q3, d2[1] \n\t" + + "vld1.32 {d0-d2}, [%[a_ptr]]! \n\t" + "vld1.32 {q2, q3}, [%[b_ptr]]! \n\t" + + "vmla.f32 q4, q2, d0[0] \n\t" + "vmla.f32 q5, q3, d0[0] \n\t" + "vmla.f32 q6, q2, d0[1] \n\t" + "vmla.f32 q7, q3, d0[1] \n\t" + "vmla.f32 q8, q2, d1[0] \n\t" + "vmla.f32 q9, q3, d1[0] \n\t" + "vmla.f32 q10, q2, d1[1] \n\t" + "vmla.f32 q11, q3, d1[1] \n\t" + "vmla.f32 q12, q2, d2[0] \n\t" + "vmla.f32 q13, q3, d2[0] \n\t" + "vmla.f32 q14, q2, d2[1] \n\t" + "vmla.f32 q15, q3, d2[1] \n\t" + + "vld1.32 {d0-d2}, [%[a_ptr]]! \n\t" + "vld1.32 {q2, q3}, [%[b_ptr]]! \n\t" + + "vmla.f32 q4, q2, d0[0] \n\t" + "vmla.f32 q5, q3, d0[0] \n\t" + "vmla.f32 q6, q2, d0[1] \n\t" + "vmla.f32 q7, q3, d0[1] \n\t" + "vmla.f32 q8, q2, d1[0] \n\t" + "vmla.f32 q9, q3, d1[0] \n\t" + "vmla.f32 q10, q2, d1[1] \n\t" + "vmla.f32 q11, q3, d1[1] \n\t" + "vmla.f32 q12, q2, d2[0] \n\t" + "vmla.f32 q13, q3, d2[0] \n\t" + "vmla.f32 q14, q2, d2[1] \n\t" + "vmla.f32 q15, q3, d2[1] \n\t" + + "subs %[kc1], %[kc1], #1 \n\t" + "bge loop_kc1_%= \n\t" + "end_kc1_%=: \n\t" + + "subs %[kc2], %[kc2], #1 \n\t" + "blt end_kc2_%= \n\t" + "loop_kc2_%=: \n\t" + + "vld1.32 {d0-d2}, [%[a_ptr]]! \n\t" + "vld1.32 {q2, q3}, [%[b_ptr]]! \n\t" + + "vmla.f32 q4, q2, d0[0] \n\t" + "vmla.f32 q5, q3, d0[0] \n\t" + "vmla.f32 q6, q2, d0[1] \n\t" + "vmla.f32 q7, q3, d0[1] \n\t" + "vmla.f32 q8, q2, d1[0] \n\t" + "vmla.f32 q9, q3, d1[0] \n\t" + "vmla.f32 q10, q2, d1[1] \n\t" + "vmla.f32 q11, q3, d1[1] \n\t" + "vmla.f32 q12, q2, d2[0] \n\t" + "vmla.f32 q13, q3, d2[0] \n\t" + "vmla.f32 q14, q2, d2[1] \n\t" + "vmla.f32 q15, q3, d2[1] \n\t" + + "subs %[kc2], %[kc2], #1 \n\t" + "bge loop_kc2_%= \n\t" + "end_kc2_%=: \n\t" + + "mov r5, %[c] \n\t" + "mov r6, %[step] \n\t" + "vst1.32 {q4, q5}, [r5], r6 \n\t" + "vst1.32 {q6, q7}, [r5], r6 \n\t" + "vst1.32 {q8, q9}, [r5], r6 \n\t" + "vst1.32 {q10, q11}, [r5], r6 \n\t" + "vst1.32 {q12, q13}, [r5], r6 \n\t" + "vst1.32 {q14, q15}, [r5] \n\t" : - : [C] "r"(C), [c] "r"(c), [nc1] "r"(nc1), [nc2] "r"(nc2), [nc3] "r"(nc3), - [scale] "r"(scale), [bias] "r"(bias) - : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q10", "q11", - "q12", "q13", "q14"); + : [a_ptr] "r"(a_ptr), [b_ptr] "r"(b_ptr), [c] "r"(c), [kc1] "r"(kc1), + [kc2] "r"(kc2), [step] "r"(step) + : "memory", "r5", "r6", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", + "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); + +#endif // __aarch64__ +#else + +#endif // __ARM_NEON } +} // namespace math } // namespace operators } // namespace paddle_mobile -} // namespace paddle_mobile diff --git a/src/operators/math/gemm.h b/src/operators/math/gemm.h index b4bce43c7a29fba09ade7512cbc660f0ac2888ab..2044c264ed1c0f8624690874ed248661a753804c 100644 --- a/src/operators/math/gemm.h +++ b/src/operators/math/gemm.h @@ -19,7 +19,7 @@ limitations under the License. */ #define B(i, j) B[(i)*ldb + (j)] #define C(i, j) C[(i)*ldc + (j)] -#define MR 4 +#define MR 6 #define NR 8 #define s_min(i, j) ((i) < (j) ? (i) : (j)) @@ -28,6 +28,7 @@ namespace paddle_mobile { namespace operators { namespace math { +/* // 将 A 矩阵分块复制到连续内存(ColMajor) void PackMatrixA(int m, int k, int m_tail, const float *A, int lda, float *buffer); @@ -35,14 +36,17 @@ void PackMatrixA(int m, int k, int m_tail, const float *A, int lda, // 将 B 矩阵分块复制到连续内存(ColMajor) void PackMatrixB(int k, int n, int n_tail, const float *B, int ldb, float *buffer); +*/ // 将 A 矩阵分块复制到连续内存(RowMajor) -void PackMatrixA_(int m, int k, int m_tail, const float *A, int lda, - float *buffer); +void PackMatrixA_4r(int m, int k, int m_tail, const float *A, int lda, + float *buffer); +void PackMatrixA_6r(int m, int k, int m_tail, const float *A, int lda, + float *buffer); // 将 B 矩阵分块复制到连续内存(RowMajor) -void PackMatrixB_(int k, int n, int n_tail, const float *B, int ldb, - float *buffer); +void PackMatrixB_8c(int k, int n, int n_tail, const float *B, int ldb, + float *buffer); // 分块矩阵乘法 void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b, @@ -51,7 +55,7 @@ void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b, void InnerKernelWithBn(int mc, int nc, float alpha, const float *a, const float *b, float beta, float *c, float *C, int ldc, bool relu, float *new_scale, float *new_bias); - +/* // 向量矩阵乘法 (M = 1) void VectorKernel(int m, int n, int k, float alpha, const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc, @@ -60,10 +64,12 @@ void VectorKernel(int m, int n, int k, float alpha, const float *A, int lda, void VectorKernelWithBn(int m, int n, int k, float alpha, const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc, bool relu, float *new_scale, float *new_bias); +*/ // 计算一个更小的 C 矩阵分块 void AddDot4x4(int k, const float *a, const float *b, float *c, int ldc); void AddDot4x8(int k, const float *a, const float *b, float *c, int ldc); +void AddDot6x8(int k, const float *a, const float *b, float *c, int ldc); // 分块矩阵乘法结果回写 // C = A * B @@ -81,6 +87,7 @@ void WriteWithBn(int mc, int nc, float *c, float *C, int ldc, float *new_scale, void WriteWithBnRelu(int mc, int nc, float *c, float *C, int ldc, float *new_scale, float *new_bias); +/* // 向量矩阵乘法结果回写 // C = A * B void VecWriteBasic(int n, float *c, float *C, int ldc); @@ -96,6 +103,7 @@ void VecWriteWithBn(int n, float *c, float *C, int ldc, float *new_scale, // C = A * B, batchnorm(C), relu(C) void VecWriteWithBnRelu(int n, float *c, float *C, int ldc, float *new_scale, float *new_bias); +*/ // 32位 float 矩阵乘法 void Sgemm(int m, int n, int k, float alpha, const float *A, int lda, diff --git a/src/operators/math/im2col.cpp b/src/operators/math/im2col.cpp index 625d120705aab8fcc3ea8d232b4077e213941ec4..7b0b974b542a83d381727128887bef8a48ce937f 100644 --- a/src/operators/math/im2col.cpp +++ b/src/operators/math/im2col.cpp @@ -15,7 +15,7 @@ limitations under the License. */ #include "operators/math/im2col.h" #include #ifdef __ARM_NEON -#include "arm_neon.h" +#include #endif #include "common/types.h" namespace paddle_mobile { @@ -69,7 +69,7 @@ class Im2ColFunctor { int channels_col = im_channels * filter_height * filter_width; const T *im_data = im.data(); T *col_data = col->data(); -#ifdef __ARM_NEON +#if __ARM_NEON const int osize = col_height; const int isize = im_height; bool pad1 = padding[0] > 0; diff --git a/src/operators/math/pool_2x2.cpp b/src/operators/math/pool_2x2.cpp index c86003f6f96b632efd50bbb156293510e3d8521c..0a2d96d4d065d7938e6872b4f073e080d7be8c3a 100644 --- a/src/operators/math/pool_2x2.cpp +++ b/src/operators/math/pool_2x2.cpp @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #ifdef POOL_OP -#include "pool_2x2.h" +#include "operators/math/pool_2x2.h" +#include +#include namespace paddle_mobile { namespace operators { @@ -21,10 +23,10 @@ namespace math { void Pool2x2Max(vector strides, vector paddings, const Tensor *input, Tensor *output) { -#ifdef __ARM_NEON - -#ifdef ARMV7 +#if __ARM_NEON +#if __aarch64__ +#else const int batch_size = input->dims()[0]; const int input_height = input->dims()[2]; @@ -93,15 +95,16 @@ void Pool2x2Max(vector strides, vector paddings, const Tensor *input, output_data += output_batch_stride; } #endif - +#else #endif } void Pool2x2Avg(vector strides, vector paddings, const Tensor *input, Tensor *output) { -#ifdef __ARM_NEON +#if __ARM_NEON -#ifdef ARMV7 +#if __aarch64__ +#else const int batch_size = input->dims()[0]; const int input_height = input->dims()[2]; @@ -171,12 +174,9 @@ void Pool2x2Avg(vector strides, vector paddings, const Tensor *input, input_data += input_batch_stride; output_data += output_batch_stride; } -#else - -// TODO(): to imp other asm #endif - +#else #endif } diff --git a/src/operators/math/pool_3x3.cpp b/src/operators/math/pool_3x3.cpp index 28a8877355b2c2cc1221512884b5be1497bc4243..28547b71fca6caea2ff4341b3f832c0035436a72 100644 --- a/src/operators/math/pool_3x3.cpp +++ b/src/operators/math/pool_3x3.cpp @@ -17,7 +17,7 @@ limitations under the License. */ #include #endif #include "framework/tensor.h" -#include "pool_3x3.h" +#include "operators/math/pool_3x3.h" #if __ARM_NEON #include #endif // __ARM_NEON @@ -518,6 +518,8 @@ void Pool3x3Maxs1p1(const Tensor *input, Tensor *output) { input_data += input_batch_stride; out_data += output_batch_stride; } +#else + #endif } @@ -582,7 +584,18 @@ void Pool3x3Max(vector strides, vector paddings, const Tensor *input, } output_seg[ph * output_width + pw] = max_value; } else { -#if defined(ARMV7) +#if __aarch64__ + const float32x4_t data1 = vld1q_f32(pos1); + const float32x4_t data2 = vld1q_f32(pos1 + input_width); + const float32x4_t data3 = vld1q_f32(pos1 + 2 * input_width); + const float32x4_t max_data = + vmaxq_f32(vmaxq_f32(data1, data2), data3); + float32x2_t res = + vpmax_f32(vget_high_f32(vsetq_lane_f32(-INT_MAX, max_data, 3)), + vget_low_f32(max_data)); + res = vpmax_f32(res, res); + output_seg[ph * output_width + pw] = vget_lane_f32(res, 0); +#else asm volatile( "vld1.32 {q1}, [%[pos1]] \n\t" "vld1.32 {q2}, [%[pos2]] \n\t" @@ -598,17 +611,6 @@ void Pool3x3Max(vector strides, vector paddings, const Tensor *input, [pos2] "r"(pos2), [pos3] "r"(pos3), [output_ptr] "r"(output_ptr), [negative_max] "r"(negative_max) : "memory", "q1", "q2", "q3", "q4"); -#else - const float32x4_t data1 = vld1q_f32(pos1); - const float32x4_t data2 = vld1q_f32(pos1 + input_width); - const float32x4_t data3 = vld1q_f32(pos1 + 2 * input_width); - const float32x4_t max_data = - vmaxq_f32(vmaxq_f32(data1, data2), data3); - float32x2_t res = - vpmax_f32(vget_high_f32(vsetq_lane_f32(-INT_MAX, max_data, 3)), - vget_low_f32(max_data)); - res = vpmax_f32(res, res); - output_seg[ph * output_width + pw] = vget_lane_f32(res, 0); #endif } } @@ -676,8 +678,8 @@ void Pool3x3Avg(vector strides, vector paddings, const Tensor *input, } output_seg[ph * output_width + pw] = sum / 9.0; } else { -#if defined(ARMV7) - +#if __aarch64__ +#else asm volatile( "vld1.32 {q1}, [%[pos1]] \n\t" "vld1.32 {q2}, [%[pos2]] \n\t" @@ -696,7 +698,7 @@ void Pool3x3Avg(vector strides, vector paddings, const Tensor *input, [output_ptr] "r"(output_ptr), [zero] "r"(zero), [nine_ptr] "r"(nine_ptr) : "memory", "r6", "q1", "q2", "q3", "q4"); -#else +#endif const float32x4_t data1 = vld1q_f32(pos1); const float32x4_t data2 = vld1q_f32(pos2); const float32x4_t data3 = vld1q_f32(pos3); @@ -707,7 +709,6 @@ void Pool3x3Avg(vector strides, vector paddings, const Tensor *input, vget_low_f32(sum_data)); res = vpadd_f32(res, res); output_seg[ph * output_width + pw] = vget_lane_f32(res, 0) / 9.0; -#endif } } } @@ -715,6 +716,7 @@ void Pool3x3Avg(vector strides, vector paddings, const Tensor *input, input_data += input_batch_stride; output_data += output_batch_stride; } +#else #endif } } // namespace math diff --git a/src/operators/math/softmax.cpp b/src/operators/math/softmax.cpp index 968915f21e08fce9f25ceb63831ee40ecba9cee6..dba88c93969014f2ad0d2636b4141c734dbc2ed5 100644 --- a/src/operators/math/softmax.cpp +++ b/src/operators/math/softmax.cpp @@ -135,6 +135,7 @@ class SoftmaxFuntor { } } } +#else #endif // ARM_NEON public: diff --git a/src/operators/mul_op.cpp b/src/operators/mul_op.cpp index 60e0c087383388c83ca1711c057af822a6e2a730..044da7012eccde57a87d417f4f3c00b82e01da42 100644 --- a/src/operators/mul_op.cpp +++ b/src/operators/mul_op.cpp @@ -50,7 +50,7 @@ void MulOp::InferShape() const { framework::DDim ddim = framework::make_ddim(output_dims); this->param_.Out()->Resize(ddim); } -template class MulOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/multiclass_nms_op.cpp b/src/operators/multiclass_nms_op.cpp index eea625469ec030e0c7d62baea8312e11f1308ce2..4324cab35298a45ece7e375299909994648a27a4 100644 --- a/src/operators/multiclass_nms_op.cpp +++ b/src/operators/multiclass_nms_op.cpp @@ -34,7 +34,7 @@ void MultiClassNMSOp::InferShape() const { // pre size, will change in Compute. this->param_.Out()->Resize(framework::make_ddim({input_bboxes_dims[1], 6})); } -template class MultiClassNMSOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 24e3699f2fc8947ee23341ddcefcb219c6f8df03..e2795b3aefe3c67df9b51c882298a717a388ae15 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -232,7 +232,6 @@ class ConvParam : OpParam { Print &operator<<(Print &printer, const ConvParam &conv_param); #endif -#ifdef ELEMENTWISEADD_OP class ElementwiseAddParam : OpParam { public: ElementwiseAddParam(const VariableNameMap &inputs, @@ -259,6 +258,8 @@ class ElementwiseAddParam : OpParam { int axis_; }; +#ifdef FUSION_ELEMENTWISEADDRELU_OP +using ElementwiseAddReluParam = ElementwiseAddParam; #endif #ifdef MUL_OP @@ -371,7 +372,7 @@ class BatchNormParam : OpParam { input_variance_ = InputVarianceFrom(inputs, scope); epsilon_ = GetAttr("epsilon", attrs); momentum_ = GetAttr("momentum", attrs); - is_test_ = GetAttr("is_test", attrs); + // is_test_ = GetAttr("is_test", attrs); } const Tensor *InputX() const { return input_x_; } @@ -421,7 +422,7 @@ class PoolParam : public OpParam { strides_ = GetAttr>("strides", attrs); paddings_ = GetAttr>("paddings", attrs); ceil_mode_ = GetAttr("ceil_mode", attrs); - gloabal_pooling_ = GetAttr("global_pooling", attrs); + global_pooling_ = GetAttr("global_pooling", attrs); } const Tensor *Input() const { return input_; } @@ -438,7 +439,7 @@ class PoolParam : public OpParam { bool isCeilMode() const { return ceil_mode_; } - bool isGlobalPooling() const { return gloabal_pooling_; } + bool isGlobalPooling() const { return global_pooling_; } private: Tensor *input_; @@ -448,9 +449,82 @@ class PoolParam : public OpParam { vector strides_; vector paddings_; bool ceil_mode_; - bool gloabal_pooling_ = false; + bool global_pooling_ = false; }; +#endif + +#ifdef FUSION_POOLBN_OP +class FusionPoolBNParam : OpParam { + public: + FusionPoolBNParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs, + const Scope &scope) { + input_ = InputXFrom(inputs, scope); + pooling_type_ = GetAttr("pooling_type", attrs); + ksize_ = GetAttr>("ksize", attrs); + strides_ = GetAttr>("strides", attrs); + paddings_ = GetAttr>("paddings", attrs); + ceil_mode_ = GetAttr("ceil_mode", attrs); + global_pooling_ = GetAttr("global_pooling", attrs); + output_y_ = OutputYFrom(outputs, scope); + input_bias_ = InputBiasFrom(inputs, scope); + input_mean_ = InputMeanFrom(inputs, scope); + input_scale_ = InputScaleFrom(inputs, scope); + input_variance_ = InputVarianceFrom(inputs, scope); + epsilon_ = GetAttr("epsilon", attrs); + momentum_ = GetAttr("momentum", attrs); + // is_test_ = GetAttr("is_test", attrs); + } + const Tensor *Input() const { return input_; } + + const string &PoolingType() const { return pooling_type_; } + + const vector &Ksize() const { return ksize_; } + + const vector &Strides() const { return strides_; } + + const vector &Paddings() const { return paddings_; } + + bool isCeilMode() const { return ceil_mode_; } + bool isGlobalPooling() const { return global_pooling_; } + + Tensor *OutputY() const { return output_y_; } + + const Tensor *InputBias() const { return input_bias_; } + + const Tensor *InputMean() const { return input_mean_; } + + const Tensor *InputScale() const { return input_scale_; } + + const Tensor *InputVariance() const { return input_variance_; } + + const float &Epsilon() const { return epsilon_; } + + const float &Momentum() const { return momentum_; } + + const bool &IsTest() const { return is_test_; } + + const string &DataFormat() const { return data_format_; } + + private: + Tensor *input_; + string pooling_type_; + vector ksize_; + vector strides_; + vector paddings_; + bool ceil_mode_; + bool global_pooling_ = false; + Tensor *output_y_; + Tensor *input_bias_; + Tensor *input_mean_; + Tensor *input_scale_; + Tensor *input_variance_; + float epsilon_; + float momentum_; + bool is_test_; + string data_format_; +}; #endif #ifdef PRIORBOX_OP @@ -875,7 +949,6 @@ class PReluParam : public OpParam { }; #endif -#ifdef FUSION_FC_OP class FusionFcParam : public OpParam { public: FusionFcParam(const VariableNameMap &inputs, const VariableNameMap &outputs, @@ -911,9 +984,11 @@ class FusionFcParam : public OpParam { int y_num_col_dims_; int axis_; }; + +#ifdef FUSION_FCRELU_OP +using FusionFcReluParam = FusionFcParam; #endif -#ifdef FUSION_CONVADD_OP class FusionConvAddParam : public OpParam { public: FusionConvAddParam(const VariableNameMap &inputs, @@ -960,9 +1035,8 @@ class FusionConvAddParam : public OpParam { }; Print &operator<<(Print &printer, const FusionConvAddParam &conv_param); -#endif -#ifdef FUSION_CONVADD_RELU_OP +#ifdef FUSION_CONVADDRELU_OP class FusionConvAddReluParam : public FusionConvAddParam { public: FusionConvAddReluParam(const VariableNameMap &inputs, @@ -993,7 +1067,7 @@ class FusionConvAddBNReluParam : public OpParam { input_variance_ = InputVarianceFrom(inputs, scope); epsilon_ = GetAttr("epsilon", attrs); momentum_ = GetAttr("momentum", attrs); - is_test_ = GetAttr("is_test", attrs); + // is_test_ = GetAttr("is_test", attrs); } Tensor *Bias() const { return bias_; } @@ -1055,8 +1129,91 @@ class FusionConvAddBNReluParam : public OpParam { Tensor *new_bias_; Tensor *new_scale_; }; +#endif -Print &operator<<(Print &printer, const FusionConvAddParam &conv_param); +#ifdef FUSION_CONVADDBN_OP +class FusionConvAddBNParam : public OpParam { + public: + FusionConvAddBNParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, + const AttributeMap &attrs, const Scope &scope) { + bias_ = InputYFrom(inputs, scope); + axis_ = GetAttr("axis", attrs); + filter_ = FilterFrom(inputs, scope); + input_ = InputFrom(inputs, scope); + output_y_ = OutputYFrom(outputs, scope); + strides_ = GetAttr>("strides", attrs); + paddings_ = GetAttr>("paddings", attrs); + dilations_ = GetAttr>("dilations", attrs); + groups = GetAttr("groups", attrs); + input_bias_ = InputBiasFrom(inputs, scope); + input_mean_ = InputMeanFrom(inputs, scope); + input_scale_ = InputScaleFrom(inputs, scope); + input_variance_ = InputVarianceFrom(inputs, scope); + epsilon_ = GetAttr("epsilon", attrs); + momentum_ = GetAttr("momentum", attrs); + // is_test_ = GetAttr("is_test", attrs); + } + Tensor *Bias() const { return bias_; } + + const int &Axis() const { return axis_; } + + const Tensor *Input() const { return input_; } + + const Tensor *Filter() const { return filter_; } + + Tensor *OutputY() const { return output_y_; } + + const vector &Strides() const { return strides_; } + + const vector &Paddings() const { return paddings_; } + + const vector &Dilations() const { return dilations_; } + + const int &Groups() const { return groups; } + + const Tensor *InputBias() const { return input_bias_; } + + const Tensor *InputMean() const { return input_mean_; } + + const Tensor *InputScale() const { return input_scale_; } + + const Tensor *InputVariance() const { return input_variance_; } + + const float &Epsilon() const { return epsilon_; } + + const float &Momentum() const { return momentum_; } + + const bool &IsTest() const { return is_test_; } + + void SetNewScale(Tensor *new_scale) { new_scale_ = new_scale; } + + void SetNewBias(Tensor *new_bias) { new_bias_ = new_bias; } + + const Tensor *NewScale() const { return new_scale_; } + + const Tensor *NewBias() const { return new_bias_; } + + protected: + Tensor *bias_; + int axis_; + Tensor *input_; + Tensor *output_y_; + Tensor *filter_; + vector strides_; + vector paddings_; + vector dilations_; + int groups; + Tensor *input_bias_; + Tensor *input_mean_; + Tensor *input_scale_; + Tensor *input_variance_; + float epsilon_; + float momentum_; + bool is_test_; + Tensor *new_bias_; + Tensor *new_scale_; +}; #endif #ifdef FUSION_DWCONVBNRELU_OP @@ -1078,7 +1235,7 @@ class FusionDWConvBNReluParam : public OpParam { input_variance_ = InputVarianceFrom(inputs, scope); epsilon_ = GetAttr("epsilon", attrs); momentum_ = GetAttr("momentum", attrs); - is_test_ = GetAttr("is_test", attrs); + // is_test_ = GetAttr("is_test", attrs); } const Tensor *Input() const { return input_; } @@ -1139,6 +1296,85 @@ class FusionDWConvBNReluParam : public OpParam { Print &operator<<(Print &printer, const FusionConvAddParam &conv_param); #endif +#ifdef FUSION_CONVBNRELU_OP +class FusionConvBNReluParam : public OpParam { + public: + FusionConvBNReluParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, + const AttributeMap &attrs, const Scope &scope) { + filter_ = FilterFrom(inputs, scope); + input_ = InputFrom(inputs, scope); + output_ = OutFrom(outputs, scope); + + strides_ = GetAttr>("strides", attrs); + paddings_ = GetAttr>("paddings", attrs); + dilations_ = GetAttr>("dilations", attrs); + groups = GetAttr("groups", attrs); + input_bias_ = InputBiasFrom(inputs, scope); + input_mean_ = InputMeanFrom(inputs, scope); + input_scale_ = InputScaleFrom(inputs, scope); + input_variance_ = InputVarianceFrom(inputs, scope); + epsilon_ = GetAttr("epsilon", attrs); + momentum_ = GetAttr("momentum", attrs); + // is_test_ = GetAttr("is_test", attrs); + } + + const Tensor *Input() const { return input_; } + + const Tensor *Filter() const { return filter_; } + + Tensor *Output() const { return output_; } + + const vector &Strides() const { return strides_; } + + const vector &Paddings() const { return paddings_; } + + const vector &Dilations() const { return dilations_; } + + const int &Groups() const { return groups; } + + const Tensor *InputBias() const { return input_bias_; } + + const Tensor *InputMean() const { return input_mean_; } + + const Tensor *InputScale() const { return input_scale_; } + + const Tensor *InputVariance() const { return input_variance_; } + + const float &Epsilon() const { return epsilon_; } + + const float &Momentum() const { return momentum_; } + + const bool &IsTest() const { return is_test_; } + + void SetNewScale(Tensor *new_scale) { new_scale_ = new_scale; } + + void SetNewBias(Tensor *new_bias) { new_bias_ = new_bias; } + + const Tensor *NewScale() const { return new_scale_; } + + const Tensor *NewBias() const { return new_bias_; } + + protected: + Tensor *input_; + Tensor *output_; + Tensor *filter_; + vector strides_; + vector paddings_; + vector dilations_; + int groups; + Tensor *input_bias_; + Tensor *input_mean_; + Tensor *input_scale_; + Tensor *input_variance_; + float epsilon_; + float momentum_; + bool is_test_; + Tensor *new_bias_; + Tensor *new_scale_; +}; +#endif + #ifdef IM2SEQUENCE_OP class Im2SequenceParam : public OpParam { public: @@ -1190,5 +1426,9 @@ class DropoutParam : public OpParam { }; #endif +#ifdef REGION_OP +class RegionParam : public OpParam {}; +#endif + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/pool_op.cpp b/src/operators/pool_op.cpp index 41016d74deb5bcd7d3679b1c762467e2dc65de34..0477c88cf84054090b4c46524284fb0cdf525c0e 100644 --- a/src/operators/pool_op.cpp +++ b/src/operators/pool_op.cpp @@ -54,7 +54,7 @@ void PoolOp::InferShape() const { } this->param_.Output()->Resize(framework::make_ddim(output_shape)); } -template class PoolOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/prelu_op.cpp b/src/operators/prelu_op.cpp index e78f6b0374336a3d891a1f3e73f63c706b321ccc..245154ca5ea6971dee33e14550bf1e090fa0ec71 100644 --- a/src/operators/prelu_op.cpp +++ b/src/operators/prelu_op.cpp @@ -23,7 +23,7 @@ void PReluOp::InferShape() const { auto input_dims = this->param_.InputX()->dims(); this->param_.Out()->Resize(input_dims); } -template class PReluOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/prior_box_op.cpp b/src/operators/prior_box_op.cpp index 81ba045a209a48105ab895f7687e56ed3db44305..a05a0ddcec5ba9d442b58846468a121e9b655a6a 100644 --- a/src/operators/prior_box_op.cpp +++ b/src/operators/prior_box_op.cpp @@ -44,7 +44,7 @@ void PriorBoxOp::InferShape() const { this->param_.OutputBoxes()->Resize(framework::make_ddim(dim_vec)); this->param_.OutputVariances()->Resize(framework::make_ddim(dim_vec)); } -template class PriorBoxOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/relu_op.cpp b/src/operators/relu_op.cpp index b80a56f38aec4bf1bf625d54f4115626447a654a..2a771e81e7a5a0e869984990b52b98d15036543a 100644 --- a/src/operators/relu_op.cpp +++ b/src/operators/relu_op.cpp @@ -23,7 +23,7 @@ void ReluOp::InferShape() const { auto input_dims = this->param_.InputX()->dims(); this->param_.Out()->Resize(input_dims); } -template class ReluOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/reshape_op.cpp b/src/operators/reshape_op.cpp index 193678613cc8dd2b8f9b8ae1654b0adacea09505..dcc15009af2b23129552d58b3fa22c3c67684dce 100644 --- a/src/operators/reshape_op.cpp +++ b/src/operators/reshape_op.cpp @@ -27,7 +27,7 @@ void ReshapeOp::InferShape() const { auto out_dims = ValidateShape(shape, input_x_dims); this->param_.Out()->Resize(out_dims); } -template class ReshapeOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/resize_op.cpp b/src/operators/resize_op.cpp index f378ff53f513ccf7cfb986f606378895b5af4b9f..02c50b662665fc9bd2f662922cb88dbce9fc5d53 100644 --- a/src/operators/resize_op.cpp +++ b/src/operators/resize_op.cpp @@ -24,7 +24,7 @@ void ResizeOp::InferShape() const { auto out_dims = CalOutputShape(this->param_); this->param_.Out()->Resize(out_dims); } -template class ResizeOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/scale_op.cpp b/src/operators/scale_op.cpp index c1931ed4fdc4c058c979fdceba11ea25f7d752f4..968fcd4098e92a47899c9a733c0261d91c314c29 100644 --- a/src/operators/scale_op.cpp +++ b/src/operators/scale_op.cpp @@ -24,7 +24,7 @@ void ScaleOp::InferShape() const { auto input_dims = this->param_.InputX()->dims(); this->param_.Out()->Resize(input_dims); } -template class ScaleOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/sigmoid_op.cpp b/src/operators/sigmoid_op.cpp index c83738b2c88c3c51ebc0d649fe134da9e44f30ea..8ea4c98942e0630f5b69133991583ee1192c8153 100644 --- a/src/operators/sigmoid_op.cpp +++ b/src/operators/sigmoid_op.cpp @@ -22,7 +22,7 @@ template void SigmoidOp::InferShape() const { this->param_.Out()->Resize(this->param_.InputX()->dims()); } -template class SigmoidOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/slice_op.cpp b/src/operators/slice_op.cpp index 6d70895fcc5edf75f73368813212f7d9177c760b..b77a675e10ed030443e1d4074239a715ddedf772 100644 --- a/src/operators/slice_op.cpp +++ b/src/operators/slice_op.cpp @@ -23,7 +23,7 @@ template void SliceOp::InferShape() const { /// todo: add InputShape() detection. } -template class SliceOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/softmax_op.cpp b/src/operators/softmax_op.cpp index db8fe1d94363c1db578a369d9eca00dde17d30af..c9edfccf4ff08e5a12d735526c3d63c689711357 100644 --- a/src/operators/softmax_op.cpp +++ b/src/operators/softmax_op.cpp @@ -22,7 +22,7 @@ template void SoftmaxOp::InferShape() const { this->param_.Out()->Resize(this->param_.InputX()->dims()); } -template class SoftmaxOp; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/transpose_op.cpp b/src/operators/transpose_op.cpp index 7e578b290174734ba8c210a354c9e56fde364858..5f193f96396c8d4d7cb58143573015384e7a7c28 100644 --- a/src/operators/transpose_op.cpp +++ b/src/operators/transpose_op.cpp @@ -47,7 +47,7 @@ void TransposeOp::InferShape() const { } this->param_.Out()->Resize(out_dims); } -template class TransposeOp; + } // namespace operators } // namespace paddle_mobile diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 09d1ff031f2d29eb64c83d43724b1039fce9385f..418ebff79161675e8b23a4cca8f4319121aa6002 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,23 +1,23 @@ set(dir ${CMAKE_CURRENT_SOURCE_DIR}) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${dir}/build") -if (NET STREQUAL "googlenet") +if ("googlenet" IN_LIST NET) # gen test ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-googlenet paddle-mobile) -elseif (NET STREQUAL "mobilenet") +elseif ("mobilenet" IN_LIST NET) # gen test ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-mobilenet paddle-mobile) -elseif (NET STREQUAL "yolo") +elseif ("yolo" IN_LIST NET) # gen test ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-yolo paddle-mobile) -elseif (NET STREQUAL "squeezenet") +elseif ("squeezenet" IN_LIST NET) # gen test ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-squeezenet paddle-mobile) -elseif(NET STREQUAL "resnet") +elseif("resnet" IN_LIST NET) # gen test ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-resnet paddle-mobile) @@ -145,6 +145,10 @@ else () ADD_EXECUTABLE(test-conv-add-relu-op operators/test_conv_add_relu_op.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-conv-add-relu-op paddle-mobile) + # gen test + ADD_EXECUTABLE(test-conv-add-bn-relu-op operators/test_fusion_conv_add_bn_relu_op.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-conv-add-bn-relu-op paddle-mobile) + #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp) endif() diff --git a/test/executor_for_test.h b/test/executor_for_test.h index c9ab4783d6826992ee81ffd63b0391169645576c..93847af20a6d48a6df33dc50f6c6a1db76facf51 100644 --- a/test/executor_for_test.h +++ b/test/executor_for_test.h @@ -43,7 +43,7 @@ template class Executor4Test : public Executor { public: Executor4Test(Program p, string op_type, - bool use_optimize = false) + bool use_optimize = false, int predict_op_count = 1) : Executor() { this->use_optimize_ = use_optimize; this->program_ = p; @@ -57,12 +57,14 @@ class Executor4Test : public Executor { LOG(paddle_mobile::LogLevel::kLOG_ERROR) << "to_predict_program_ == nullptr"; } + const std::vector> blocks = this->to_predict_program_->Blocks(); for (std::shared_ptr block_desc : blocks) { std::vector> ops = block_desc->Ops(); - for (std::shared_ptr op : ops) { - if (op->Type() == op_type) { + for (int i = 0; i < ops.size(); ++i) { + auto op = ops[i]; + if (op->Type() == op_type && i < predict_op_count) { DLOG << "匹配到: " << op->Type(); /// test first meeting op in program @@ -72,11 +74,17 @@ class Executor4Test : public Executor { op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(), this->program_.scope); this->ops_of_block_[*block_desc.get()].push_back(op_ptr); - break; } } } this->InitMemory(); + + std::shared_ptr to_predict_block = + this->to_predict_program_->Block(0); + auto &ops = this->ops_of_block_[*to_predict_block.get()]; + for (const auto &op : ops) { + op->Init(); + } } template @@ -130,9 +138,6 @@ class Executor4Test : public Executor { auto *output_tensor = con_output->GetMutable(); output_tensor->mutable_data(dDim); - std::shared_ptr out_tensor = std::make_shared(); - out_tensor.reset(output_tensor); - std::shared_ptr to_predict_block = this->to_predict_program_->Block(0); for (int j = 0; j < this->ops_of_block_[*to_predict_block.get()].size(); @@ -141,6 +146,7 @@ class Executor4Test : public Executor { op->Run(); } - return out_tensor; + return std::make_shared( + paddle_mobile::framework::Tensor(*output_tensor)); } }; diff --git a/test/framework/test_load.cpp b/test/framework/test_load.cpp index f4215de46c2bafd732b0092b58c25bf6fcefdf7a..bea7d4ba7d2df1344f0819222fbdb389106fa77e 100644 --- a/test/framework/test_load.cpp +++ b/test/framework/test_load.cpp @@ -19,7 +19,9 @@ int main() { paddle_mobile::Loader loader; // ../../../test/models/googlenet // ../../../test/models/mobilenet - auto program = loader.Load(g_googlenet, true); + // auto program = loader.Load(g_googlenet, true); + + auto program = loader.Load(g_mobilenet_ssd, true); // auto program = loader.Load(g_googlenet_combine + "/model", // g_googlenet_combine + // "/params", true); diff --git a/test/net/test_googlenet.cpp b/test/net/test_googlenet.cpp index 2ab24736397c1e71350335561abbcabcba6e27a4..d230b9469229946fc74f4dc9e1ee6100196ed9aa 100644 --- a/test/net/test_googlenet.cpp +++ b/test/net/test_googlenet.cpp @@ -23,7 +23,7 @@ int main() { auto time1 = time(); if (paddle_mobile.Load(g_googlenet, optimize)) { auto time2 = time(); - DLOG << "load cost :" << time_diff(time1, time1) << "ms"; + DLOG << "load cost: " << time_diff(time1, time1) << "ms"; std::vector input; std::vector dims{1, 3, 224, 224}; GetInput(g_test_image_1x3x224x224, &input, dims); diff --git a/test/net/test_mobilenet+ssd.cpp b/test/net/test_mobilenet+ssd.cpp index 1a7c4cd49cb1707b9c7783cf74e87e74da39732e..9b4e5f2d3a431001e138977b78994f5dfedbe0a3 100644 --- a/test/net/test_mobilenet+ssd.cpp +++ b/test/net/test_mobilenet+ssd.cpp @@ -12,28 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +#include #include "../test_helper.h" #include "../test_include.h" int main() { paddle_mobile::PaddleMobile paddle_mobile; + paddle_mobile.SetThreadNum(4); auto time1 = time(); - if (paddle_mobile.Load(g_mobilenet_ssd, true)) { + auto isok = paddle_mobile.Load( + std::string(g_mobilenet_ssd_gesture) + "/model", + std::string(g_mobilenet_ssd_gesture) + "/params", true); + // auto isok = paddle_mobile.Load(g_mobilenet_ssd, false); + if (isok) { auto time2 = time(); - DLOG << "load cost :" << time_diff(time1, time1) << "ms"; + std::cout << "load cost :" << time_diff(time1, time2) << "ms" << std::endl; + std::vector input; std::vector dims{1, 3, 300, 300}; - Tensor input_tensor; - SetupTensor(&input_tensor, {1, 3, 300, 300}, static_cast(0), - static_cast(1)); + GetInput(g_hand, &input, dims); - std::vector input(input_tensor.data(), - input_tensor.data() + input_tensor.numel()); auto time3 = time(); - paddle_mobile.Predict(input, dims); + auto output = paddle_mobile.Predict(input, dims); auto time4 = time(); - DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; + std::cout << "predict cost :" << time_diff(time3, time4) << "ms" + << std::endl; } return 0; } diff --git a/test/net/test_mobilenet.cpp b/test/net/test_mobilenet.cpp index 2e285695fb79f3ed5471a653c71a10b36ef4e7f2..9fc7226fc12fa7a0c631c9920487c0bd56c90816 100644 --- a/test/net/test_mobilenet.cpp +++ b/test/net/test_mobilenet.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +#include #include "../test_helper.h" #include "../test_include.h" @@ -22,20 +22,23 @@ int main() { auto time1 = time(); if (paddle_mobile.Load(g_mobilenet, true)) { auto time2 = time(); - DLOG << "load cost :" << time_diff(time1, time1) << "ms"; + std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; + std::vector input; std::vector dims{1, 3, 224, 224}; - Tensor input_tensor; - SetupTensor(&input_tensor, {1, 3, 224, 224}, static_cast(0), - static_cast(1)); - - std::vector input(input_tensor.data(), - input_tensor.data() + input_tensor.numel()); - auto time3 = time(); - auto vec_result = paddle_mobile.Predict(input, dims); - auto time4 = time(); - - DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; + GetInput(g_test_image_1x3x224x224, &input, dims); + + for (int i = 0; i < 10; ++i) { + auto time3 = time(); + auto vec_result = paddle_mobile.Predict(input, dims); + auto time4 = time(); + std::vector::iterator biggest = + std::max_element(std::begin(vec_result), std::end(vec_result)); + std::cout << " Max element is " << *biggest << " at position " + << std::distance(std::begin(vec_result), biggest) << std::endl; + std::cout << "predict cost :" << time_diff(time3, time4) << "ms" + << std::endl; + } } return 0; diff --git a/test/operators/test_fusion_conv_add_bn_relu_op.cpp b/test/operators/test_fusion_conv_add_bn_relu_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..81400d987195364c06b4b93d0859469b43f90e7b --- /dev/null +++ b/test/operators/test_fusion_conv_add_bn_relu_op.cpp @@ -0,0 +1,62 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "../test_include.h" +#include "operators/fusion_conv_add_bn_relu_op.h" + +int main() { + paddle_mobile::Loader loader; + // ../models/image_classification_resnet.inference.model + auto program = loader.Load(g_mobilenet, true); + + PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, + "program file read fail"); + + Executor4Test> + executor(program, "fusion_conv_add_bn_relu", true); + + std::cout << "executor 4 test: " << std::endl; + + paddle_mobile::framework::Tensor input; + GetInput(g_test_image_1x3x224x224_banana, &input, {1, 3, 224, 224}); + // // use SetupTensor if not has local input image . + // SetupTensor(&input, {1, 3, 224, 224}, static_cast(0), + // static_cast(1)); + + DLOG << " fuck: " << input; + + auto out_ddim = paddle_mobile::framework::make_ddim({1, 32, 112, 112}); + std::cout << "before predict: " << std::endl; + auto output = + executor.Predict(input, "data", "conv2_1_dw_bn.tmp_2", out_ddim); + std::cout << "after predict " << std::endl; + auto output_ptr = output->data(); + + int stride = output->numel() / 100; + for (int i = 0; i < 100; i++) { + DLOG << " index:" << i * stride << " value: " << output_ptr[i * stride]; + } + + // for (int i = 0; i < 100; i++) { + // DLOG << " index:" << i << " value: "<< output_ptr[i]; + // } + + // for (int j = 0; j < output->numel(); ++j) { + // std::cout << " (index: " << j << " value: " << output_ptr[j] << ") "; + // } + std::cout << std::endl; + return 0; +} diff --git a/test/test_helper.h b/test/test_helper.h index 81ad23ff3b4e53db0225630eebaa34878ad4c139..9a5c62c79c44fdf52657ea5facb5f0768810c440 100644 --- a/test/test_helper.h +++ b/test/test_helper.h @@ -16,22 +16,29 @@ limitations under the License. */ #include #include +#include +#include #include "common/common.h" #include "common/log.h" #include "framework/ddim.h" #include "framework/tensor.h" -static const std::string g_mobilenet_ssd = "../models/mobilenet+ssd"; -static const std::string g_squeezenet = "../models/squeezenet"; -static const std::string g_googlenet = "../models/googlenet"; -static const std::string g_mobilenet = "../models/mobilenet"; -static const std::string g_resnet_50 = "../models/resnet_50"; -static const std::string g_resnet = "../models/resnet"; -static const std::string g_googlenet_combine = "../models/googlenet_combine"; -static const std::string g_yolo = "../models/yolo"; -static const std::string g_test_image_1x3x224x224 = +static const char *g_mobilenet_ssd = "../models/mobilenet+ssd"; +static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture"; +static const char *g_squeezenet = "../models/squeezenet"; +static const char *g_googlenet = "../models/googlenet"; +static const char *g_mobilenet = "../models/mobilenet"; +static const char *g_resnet_50 = "../models/resnet_50"; +static const char *g_resnet = "../models/resnet"; +static const char *g_googlenet_combine = "../models/googlenet_combine"; +static const char *g_yolo = "../models/yolo"; +static const char *g_test_image_1x3x224x224 = "../images/test_image_1x3x224x224_float"; +static const char *g_test_image_1x3x224x224_banana = + "../images/input_3x224x224_banana"; +static const char *g_hand = "../images/hand_image"; + using paddle_mobile::framework::DDim; using paddle_mobile::framework::Tensor; @@ -62,9 +69,9 @@ void GetInput(const std::string &input_name, std::vector *input, size *= dim; } - T *input_ptr = (T *)malloc(sizeof(T) * size); + T *input_ptr = reinterpret_cast(malloc(sizeof(T) * size)); std::ifstream in(input_name, std::ios::in | std::ios::binary); - in.read((char *)(input_ptr), size * sizeof(T)); + in.read(reinterpret_cast(input_ptr), size * sizeof(T)); in.close(); for (int i = 0; i < size; ++i) { input->push_back(input_ptr[i]); @@ -79,6 +86,6 @@ void GetInput(const std::string &input_name, T *input_ptr = input->mutable_data(dims); std::ifstream in(input_name, std::ios::in | std::ios::binary); - in.read((char *)(input_ptr), input->numel() * sizeof(T)); + in.read(reinterpret_cast(input_ptr), input->numel() * sizeof(T)); in.close(); } diff --git a/tools/build.sh b/tools/build.sh index ce330e6d631ea1009f28ccf987a50e5f79a032b6..db809f71076e6b6d4aacc53bd8e144db3935cb91 100755 --- a/tools/build.sh +++ b/tools/build.sh @@ -1,4 +1,6 @@ #!/usr/bin/env bash +NETS="" +declare -a supportedNets=("googlenet" "mobilenet" "yolo" "squeezenet" "resnet") build_for_mac() { if [ ! `which brew` ]; then @@ -38,7 +40,8 @@ build_for_android() { fi if [ -z "$PLATFORM" ]; then - PLATFORM="arm-v7a" # Users could choose "arm-v8a" or other platforms from the command line. + PLATFORM="arm-v7a" # Users could choose "arm-v8a" platform. +# PLATFORM="arm-v8a" fi if [ "${PLATFORM}" = "arm-v7a" ]; then @@ -59,7 +62,8 @@ build_for_android() { ANDROID_PLATFORM_VERSION="android-22" TOOLCHAIN_FILE="./tools/android-cmake/android.toolchain.cmake" ANDROID_ARM_MODE="arm" - if [ $# -eq 1 ]; then + + if [ "${#NETS}" > 1 ]; then cmake .. \ -B"../build/release/${PLATFORM}" \ -DANDROID_ABI="${ABI}" \ @@ -69,7 +73,7 @@ build_for_android() { -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ -DANDROID_STL=c++_static \ -DANDROID=true \ - -DNET=$1 \ + -DNET="${NETS}" \ -D"${ARM_PLATFORM}"=true else @@ -92,23 +96,25 @@ build_for_ios() { # rm -rf "../build" PLATFORM="ios" MODE="Release" - BUILD_DIR=../build/release/"${PLATFORM}" + BUILD_DIR=../build/release/"${PLATFORM}"/ TOOLCHAIN_FILE="./tools/ios-cmake/ios.toolchain.cmake" mkdir -p "${BUILD_DIR}" - if [ $# -eq 1 ]; then + if [ "${#NETS}" > 1 ]; then cmake .. \ -B"${BUILD_DIR}" \ -DCMAKE_BUILD_TYPE="${MODE}" \ - -DCMAKE_TOOLCHAIN_FILE="${TOOLCHAIN_FILE}" \ -DIOS_PLATFORM=OS \ - -DNET=$1 \ + -DIOS_ARCH="${IOS_ARCH}" \ + -DCMAKE_TOOLCHAIN_FILE="${TOOLCHAIN_FILE}" \ + -DNET="${NETS}" \ -DIS_IOS="true" else cmake .. \ -B"${BUILD_DIR}" \ -DCMAKE_BUILD_TYPE="${MODE}" \ - -DCMAKE_TOOLCHAIN_FILE="${TOOLCHAIN_FILE}" \ -DIOS_PLATFORM=OS \ + -DIOS_ARCH="${IOS_ARCH}" \ + -DCMAKE_TOOLCHAIN_FILE="${TOOLCHAIN_FILE}" \ -DIS_IOS="true" fi cd "${BUILD_DIR}" @@ -120,7 +126,7 @@ build_for_ios() { } build_error() { - echo "unknown argument" + echo "unknown target : $1" } if [ $# -lt 1 ]; then @@ -128,31 +134,37 @@ if [ $# -lt 1 ]; then echo "available targets: ios|android" echo "sample usage: ./build.sh android" else - if [ $# -eq 2 ]; then - if [ $2 != "googlenet" -a $2 != "mobilenet" -a $2 != "yolo" -a $2 != "squeezenet" -a $2 != "resnet" ]; then - if [ $1 = "android" ]; then - build_for_android - elif [ $1 = "ios" ]; then - build_for_ios - else - build_error - fi - else - if [ $1 = "android" ]; then - build_for_android $2 - elif [ $1 = "ios" ]; then - build_for_ios $2 - else - build_error - fi + params=($@) + for(( i=1; i<$#; i++ )); do + if [ ${i} != 1 ]; then + NETS=$NETS$";" + fi + NETS=$NETS$"${params[i]}" + done + params=${@:2} + + supported=false + for name in ${params[@]}; do + for net in ${supportedNets[@]}; do + match=false + if [ "$name"x = "$net"x ];then + supported=true + match=true + break 1 + fi + done + if [ "$match" = false ];then + echo "${name} not supported!" + echo "supported nets are: ${supportedNets[@]}" + exit -1 fi + done + + if [ $1 = "android" ]; then + build_for_android + elif [ $1 = "ios" ]; then + build_for_ios else - if [ $1 = "android" ]; then - build_for_android - elif [ $1 = "ios" ]; then - build_for_ios - else - build_error - fi - fi + build_error "$1" + fi fi \ No newline at end of file diff --git a/tools/ios-cmake/ios.toolchain.cmake b/tools/ios-cmake/ios.toolchain.cmake index a8735adc8d853a5825a23f1ddf129d0a95199275..4db079d01de8db35fca8fbe63b59e58fd5a3463e 100644 --- a/tools/ios-cmake/ios.toolchain.cmake +++ b/tools/ios-cmake/ios.toolchain.cmake @@ -34,6 +34,7 @@ set (CMAKE_SYSTEM_VERSION 1) set (UNIX True) set (APPLE True) set (IOS True) +set (IOS_ARCH armv7 armv7s arm64) # Required as of cmake 2.8.10 set (CMAKE_OSX_DEPLOYMENT_TARGET "" CACHE STRING "Force unset of the deployment target for iOS" FORCE) @@ -159,7 +160,6 @@ set (CMAKE_OSX_SYSROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Sysroot used for iOS su # set the architecture for iOS if (${IOS_PLATFORM} STREQUAL "OS") - set (IOS_ARCH armv7 armv7s arm64) elseif (${IOS_PLATFORM} STREQUAL "SIMULATOR") set (IOS_ARCH i386) elseif (${IOS_PLATFORM} STREQUAL "SIMULATOR64") diff --git a/tools/net-detail.awk b/tools/net-detail.awk new file mode 100644 index 0000000000000000000000000000000000000000..84d0166ac777b5b7fbd9801665031bb2d51fedbb --- /dev/null +++ b/tools/net-detail.awk @@ -0,0 +1,91 @@ +BEGIN { +print "digraph G {" +} +/op:/ { + id++ + opname[id] = $NF +} +/input/ { + type = "input" + para = $NF + if (input[id]) { + input[id] = input[id] "|" + } + input[id] = input[id] "<" para ">" para +} +/output/ { + type = "output" + para = $NF + if (output[id]) { + output[id] = output[id] "|" + } + output[id] = output[id] "<" para ">" para +} +/attr/ { + type = "attr" + aname = $NF + if (attr_key[id]) { + attr_key[id] = attr_key[id] "|" + attr_value[id] = attr_value[id] "|" + } + attr_key[id] = attr_key[id] $NF +} +/argument/ { + if (type == "attr") { + split($0, arr, " - ") + attr_value[id] = attr_value[id] arr[2] + } else if ((type == "input") || (type == "output")) { + if (!var2id[$NF]) { + var_id++ + var[var_id] = $NF + var2id[$NF] = var_id + } + varid = var2id[$NF] + lid++ + if (type == "input") { + line[lid] = "var_" varid " -> " "op_" id ":<" para ">" + if (xout[$NF]) { + xi++ + xline[xi] = "xop_" xout[$NF] " -> " "xop_" id + } + } else if (type == "output") { + line[lid] = "op_" id ":<" para ">" " -> " "var_" varid + xout[$NF] = id + } + } +} +/var name/ { + varname = $NF + vid = var2id[varname] +} +/var tensor desc dim / { + if (tensor[vid]) tensor[vid] = tensor[vid] " x " + tensor[vid] = tensor[vid] $NF +} +END { + +print "subgraph cluster_G0 {" +for (i = 1; i <= id; i++) { + print "xop_" i "[label=\"" i ". " opname[i] "\"]" +} +for (i = 1; i <= xi; i++) { + print xline[i] +} +print "}" + +for (i = 1; i <= id; i++) { +print "op_" i "[group=op;shape=record;label=\"{{" input[i] "}|" i ". " opname[i] "|{" output[i] "}}\"]" +} +for (i = 1; i <= var_id; i++) { +print "var_" i "[label=\"" var[i] " [" tensor[i] "]\"]" +} +for (i = 1; i <= lid; i++) { +print line[i] +} +for (i = 1; i <= id; i++) { +print "attr_" i "[shape=record;label=\"{" attr_key[i] "}|{" attr_value[i] "}\"]" +print "attr_" i " -> " "op_" i ":" +} +print "}" +} + diff --git a/tools/op.cmake b/tools/op.cmake index 456d36262e9abf997a7861838c870e698d64f3c1..ec9768443c5e9825931111803acf1f51c1aa1acd 100644 --- a/tools/op.cmake +++ b/tools/op.cmake @@ -1,4 +1,6 @@ -if (NET STREQUAL "googlenet") +set(FOUND_MATCH OFF) +if ("googlenet" IN_LIST NET) + message("googlenet enabled") set(CONCAT_OP ON) set(CONV_OP ON) set(LRN_OP ON) @@ -8,8 +10,13 @@ if (NET STREQUAL "googlenet") set(POOL_OP ON) set(RELU_OP ON) set(FUSION_CONVADD_OP ON) - set(FUSION_CONVADD_RELU_OP ON) -elseif (NET STREQUAL "mobilenet") + set(FUSION_CONVADDRELU_OP ON) + + set(FOUND_MATCH ON) +endif() + +if ("mobilenet" IN_LIST NET) + message("mobilenet enabled") set(CONV_OP ON) set(ELEMENTWISEADD_OP ON) set(RELU_OP ON) @@ -21,12 +28,23 @@ elseif (NET STREQUAL "mobilenet") set(RESHAPE_OP ON) set(FUSION_CONVADDBNRELU_OP ON) set(FUSION_CONVADD_OP ON) -elseif (NET STREQUAL "yolo") + + set(FOUND_MATCH ON) +endif() + + +if ("yolo" IN_LIST NET) + message("yolo enabled") set(BATCHNORM_OP ON) set(CONV_OP ON) set(RELU_OP ON) set(ELEMENTWISEADD_OP ON) -elseif (NET STREQUAL "squeezenet") + + set(FOUND_MATCH ON) +endif() + +if ("squeezenet" IN_LIST NET) + message("squeezenet enabled") set(CONCAT_OP ON) set(CONV_OP ON) set(RELU_OP ON) @@ -34,15 +52,45 @@ elseif (NET STREQUAL "squeezenet") set(POOL_OP ON) set(RESHAPE_OP ON) set(SOFTMAX_OP ON) -elseif (NET STREQUAL "resnet") + + set(FOUND_MATCH ON) +endif() + + +if ("resnet" IN_LIST NET) + message("resnet enabled") + set(CONCAT_OP ON) set(CONV_OP ON) - set(BATCHNORM_OP ON) + set(RELU_OP ON) set(ELEMENTWISEADD_OP ON) + set(POOL_OP ON) + set(RESHAPE_OP ON) set(SOFTMAX_OP ON) - set(MUL_OP ON) + + set(FOUND_MATCH ON) +endif() + +if ("FPGAnets" IN_LIST NET) + message("FPGAnets enabled") + set(FUSION_CONVADDRELU_OP ON) + set(FUSION_CONVADDBNRELU_OP ON) + set(FUSION_CONVADDBN_OP ON) + set(FUSION_POOLBN_OP ON) + set(FUSION_ELEMENTWISEADDRELU_OP ON) + set(FUSION_FC_OP ON) + set(FUSION_FCRELU_OP ON) + set(REGION_OP ON) set(POOL_OP ON) - set(RELU_OP ON) -else () + set(CONCAT_OP ON) + set(SOFTMAX_OP ON) + set(DROPOUT_OP ON) + + set(FOUND_MATCH ON) +endif() + + +if(NOT FOUND_MATCH) + message("--default--") set(BATCHNORM_OP ON) set(BOXCODER_OP ON) set(CONCAT_OP ON) @@ -50,7 +98,7 @@ else () set(DEPTHWISECONV_OP ON) set(ELEMENTWISEADD_OP ON) set(FUSION_CONVADD_OP ON) - set(CONVADDRELU_OP ON) + set(FUSION_CONVADDRELU_OP ON) set(FUSION_FC_OP ON) set(LRN_OP ON) set(MUL_OP ON) @@ -62,15 +110,17 @@ else () set(SIGMOID_OP ON) set(SOFTMAX_OP ON) set(TRANSPOSE_OP ON) - set(FUSION_CONVADD_RELU_OP ON) set(FUSION_CONVADDBNRELU_OP ON) set(FUSION_DWCONVBNRELU_OP ON) + set(FUSION_CONVBNRELU_OP ON) set(PRELU_OP ON) set(RESIZE_OP ON) set(SCALE_OP ON) set(SLICE_OP ON) set(DROPOUT_OP ON) set(IM2SEQUENCE_OP ON) +endif() + # option(BATCHNORM_OP "" ON) # option(BOXCODER_OP "" ON) # option(CONCAT_OP "" ON) @@ -78,7 +128,7 @@ else () # option(DEPTHWISECONV_OP "" ON) # option(ELEMENTWISEADD_OP "" ON) # option(FUSION_CONVADD_OP "" ON) - # option(CONVADDRELU_OP "" ON) + # option(FUSION_CONVADDRELU_OP "" ON) # option(FUSION_FC_OP "" ON) # option(LRN_OP "" ON) # option(MUL_OP "" ON) @@ -90,8 +140,7 @@ else () # option(SIGMOID_OP "" ON) # option(SOFTMAX_OP "" ON) # option(TRANSPOSE_OP "" ON) - # option(FUSION_CONVADD_RELU_OP "" ON) -endif () +# endif () if (BATCHNORM_OP) add_definitions(-DBATCHNORM_OP) @@ -114,8 +163,8 @@ endif() if (FUSION_CONVADD_OP) add_definitions(-DFUSION_CONVADD_OP) endif() -if (CONVADDRELU_OP) - add_definitions(-DCONVADDRELU_OP) +if (FUSION_CONVADDRELU_OP) + add_definitions(-DFUSION_CONVADDRELU_OP) endif() if (FUSION_FC_OP) add_definitions(-DFUSION_FC_OP) @@ -150,15 +199,17 @@ endif() if (TRANSPOSE_OP) add_definitions(-DTRANSPOSE_OP) endif() -if (FUSION_CONVADD_RELU_OP) - add_definitions(-DFUSION_CONVADD_RELU_OP) -endif() if (FUSION_CONVADDBNRELU_OP) add_definitions(-DFUSION_CONVADDBNRELU_OP) endif() if (FUSION_DWCONVBNRELU_OP) add_definitions(-DFUSION_DWCONVBNRELU_OP) endif() + +if (FUSION_CONVBNRELU_OP) + add_definitions(-DFUSION_CONVBNRELU_OP) +endif() + if (PRELU_OP) add_definitions(-DPRELU_OP) endif() @@ -177,3 +228,20 @@ endif() if (IM2SEQUENCE_OP) add_definitions(-DIM2SEQUENCE_OP) endif() + +if (FUSION_CONVADDBN_OP) + add_definitions(-DFUSION_CONVADDBN_OP) +endif() +if (FUSION_FCRELU_OP) + add_definitions(-DFUSION_FCRELU_OP) +endif() +if (FUSION_POOLBN_OP) + add_definitions(-DFUSION_POOLBN_OP) +endif() +if (FUSION_ELEMENTWISEADDRELU_OP) + add_definitions(-DFUSION_ELEMENTWISEADDRELU_OP) +endif() +if (REGION_OP) + add_definitions(-DREGION_OP) +endif() + diff --git a/tools/quantification/CMakeLists.txt b/tools/quantification/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5f1ca7fdc2b65638c7158b0933b924c71eadc4a0 --- /dev/null +++ b/tools/quantification/CMakeLists.txt @@ -0,0 +1,12 @@ +cmake_minimum_required(VERSION 3.6) +project(quali) +add_definitions(-DENABLE_EXCEPTION) + +set(CMAKE_CXX_STANDARD 11) +file(GLOB_RECURSE QULIFICATON_CC src/*.cc src/*.cpp src/*.c src/*.mm) +file(GLOB_RECURSE QULIFICATON_H src/*.h) +include_directories(. src/) + +#add_library(paddle-mobile SHARED ${QULIFICATON_CC} ${QULIFICATON_H} convert.cpp) + +add_executable(quantify convert.cpp ${QULIFICATON_CC} ${QULIFICATON_H}) \ No newline at end of file diff --git a/tools/quantification/README.md b/tools/quantification/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ac729af01e7e73328b884097009dad1d468e7997 --- /dev/null +++ b/tools/quantification/README.md @@ -0,0 +1,39 @@ +# 模型量化脚本 + +#### 量化脚本使用指南 +1. 在PaddleMobile项目目录下(如 ~/PaddleProject/paddle-mobile) + +2. cd到 tools/quantification/ 目录 + +3. cmake编译 + + ``` sh + cmake . + make + ``` + +4. 运行量化脚本 + ```sh + ./quantify (0:seperated. 1:combined ) (输入路径) (输出路径) + # quantify googlenet seperated from /Users/xiebaiyuan/PaddleProject/quali/models/googlenet to ./googlenet_min + ./quantify 0 /Users/xiebaiyuan/PaddleProject/quali/models/googlenet ./googlenet_min + + ``` + +*注:* +*量化工具中* +*1.seperated模型model文件默认命名为 "__model__";* +*2.combined模型的model文件默认命名为 "model",参数文件默认命名为"params";* + + +##### 整体如下: +以googlenet非combined为例: + +```sh +cd tools/quantification/ +cmake . +make +./quantify 0 /Users/xiebaiyuan/PaddleProject/quali/models/googlenet ./googlenet_min +``` + + diff --git a/tools/quantification/convert.cpp b/tools/quantification/convert.cpp new file mode 100644 index 0000000000000000000000000000000000000000..88eef48b39ab8d2aeb1d4e3858ba97ef6360c9a9 --- /dev/null +++ b/tools/quantification/convert.cpp @@ -0,0 +1,275 @@ + + +#include "src/enforce.h" +#include "src/var_desc.h" +#include "src/program_desc.h" +#include +#include +#include +#include +#include +#include +#include "src/framework.pb-c.h" +#include "src/protobuf-c.h" +#include +#include + + +const size_t kSize64 = sizeof(uint64_t); +const size_t kSize32 = sizeof(uint32_t); + +char *Get_binary_data(const std::string &filename) { + + FILE *file = fopen(filename.c_str(), "rb"); + + PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ", + filename.c_str()); + fseek(file, 0, SEEK_END); + int64_t size = ftell(file); + + PADDLE_MOBILE_ENFORCE(size > 0, "size is too small"); + rewind(file); + auto *data = new char[size]; + size_t bytes_read = fread(data, 1, static_cast(size), file); + PADDLE_MOBILE_ENFORCE(bytes_read == size, + "read binary file bytes do not match with fseek"); + fclose(file); + return data; +} + + +static size_t ReadBuffer(const char *file_name, uint8_t **out) { + FILE *fp; + fp = fopen(file_name, "rb"); + PADDLE_MOBILE_ENFORCE(fp != nullptr, " %s open failed !", file_name); + fseek(fp, 0, SEEK_END); + auto size = static_cast(ftell(fp)); + rewind(fp); + *out = reinterpret_cast(malloc(size)); + size_t cur_len = 0; + size_t nread; + while ((nread = fread(*out + cur_len, 1, size - cur_len, fp)) != 0) { + cur_len += nread; + } + fclose(fp); + return cur_len; +} + +std::shared_ptr loadParams(const std::string &model_path) { + PaddleMobile__Framework__Proto__ProgramDesc *c_program; + uint8_t *buf = nullptr; + size_t read_size = ReadBuffer(model_path.c_str(), &buf); + PADDLE_MOBILE_ENFORCE(buf != nullptr, "read from __model__ is null"); + c_program = paddle_mobile__framework__proto__program_desc__unpack( + nullptr, read_size, buf); + PADDLE_MOBILE_ENFORCE(c_program != nullptr, "program is null"); + auto originProgramDesc = std::make_shared(c_program); + return originProgramDesc; + +} + +void LoadWithDump(const paddle_mobile::framework::VarDesc &var_desc, char *dataP, FILE *out_file) { + // 1. version + uint32_t version = *reinterpret_cast(dataP); + + // write version + fwrite(&version, kSize32, 1, out_file); + + dataP += kSize32; + + // 2 Lod information + auto *lod_level_ptr = new uint64_t(); + memcpy(lod_level_ptr, dataP, kSize64); + + uint64_t lod_level = 0; + // write lod Information + fwrite(&lod_level, kSize64, 1, out_file); + delete lod_level_ptr; + + dataP += kSize64; + + for (uint64_t i = 0; i < lod_level; ++i) { + uint64_t size = *reinterpret_cast(dataP); + // write lod size + fwrite(&size, kSize64, 1, out_file); + (dataP) += kSize64; + + std::vector tmp(size / sizeof(size_t)); + for (unsigned long &k : tmp) { + k = *reinterpret_cast(dataP); + (dataP) += sizeof(size_t); + } + // write lod size vector + fwrite(&tmp, sizeof(size_t), tmp.size(), out_file); + } + + // 3. tensor version + uint32_t tensor_version = *reinterpret_cast(dataP); + // write tensor version + fwrite(&tensor_version, kSize32, 1, out_file); + (dataP) += kSize32; + + // 4. tensor desc + int32_t size = *reinterpret_cast(dataP); + // write tensor desc + fwrite(&size, sizeof(int32_t), 1, out_file); + (dataP) += sizeof(int32_t); + + std::unique_ptr buf(new char[size]); + for (int m = 0; m < size; ++m) { + buf.get()[m] = (dataP)[m]; + } + + fwrite(buf.get(), sizeof(char), static_cast(size), out_file); + (dataP) += (sizeof(char) * size); + + const paddle_mobile::framework::TensorDesc &desc = var_desc.Tensor_desc(); + int memory_size = 1; + for (auto l : desc.Dims()) { + memory_size *= l; + } + + void *memory = nullptr; + int type_size = 0; + switch (desc.DataType()) { + case paddle_mobile::framework::VARTYPE_TYPE_FP16: + type_size = 2; + break; + case paddle_mobile::framework::VARTYPE_TYPE_FP32: + type_size = 4; + break; + case paddle_mobile::framework::VARTYPE_TYPE_FP64: + type_size = 8; + break; + case paddle_mobile::framework::VARTYPE_TYPE_INT32: + type_size = 4; + break; + case paddle_mobile::framework::VARTYPE_TYPE_INT64: + type_size = 8; + break; + case paddle_mobile::framework::VARTYPE_TYPE_BOOL: + type_size = 1; + break; + default: + break; + } + size_t tensorSize = sizeof(char) * memory_size * type_size; + + memory = new char[tensorSize]; + + for (int n = 0; n < tensorSize; ++n) { + static_cast(memory)[n] = (dataP)[n]; + } + dataP += tensorSize; + + // for float 32 + float min_value = std::numeric_limits::max(); + float max_value = std::numeric_limits::min(); + + for (int k = 0; k < memory_size; ++k) { + min_value = std::min(min_value, static_cast (memory)[k]); + max_value = std::max(max_value, static_cast (memory)[k]); + } + + fwrite(&min_value, sizeof(float), 1, out_file); + fwrite(&max_value, sizeof(float), 1, out_file); + + for (int g = 0; g < memory_size; ++g) { + float value = static_cast (memory)[g]; + auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255); + fwrite(&factor, sizeof(uint8_t), 1, out_file); + } +} + +void +quantificate_combined(const std::string &model_path, const std::string ¶m_path, const std::string ¶m_min_path) { + + auto program = loadParams(model_path); + char *origin_data = Get_binary_data(param_path); + char *data = origin_data; + FILE *out_file = fopen(param_min_path.c_str(), "wb"); + for (const auto &block : program->Blocks()) { + for (const auto &var_desc : block->Vars()) { + if (var_desc->Persistable()) { + if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { + continue; + } + LoadWithDump(*var_desc, data, out_file); + } + } + } + fclose(out_file); + delete origin_data; + +} + +void quantificate_seperated(const std::string model_dir, const std::string param_min_path) { + + auto program = loadParams(model_dir + "/__model__"); + + std::string shell_command = "mkdir " + param_min_path; + system(shell_command.c_str()); + + for (const auto &block : program->Blocks()) { + for (const auto &var_desc : block->Vars()) { + if (var_desc->Persistable()) { + if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { + continue; + } + std::string file_name = param_min_path + "/" + var_desc->Name(); + FILE *out_file = fopen(file_name.c_str(), "wb"); + char *origin_data = Get_binary_data(model_dir + "/" + var_desc->Name()); + char *data = origin_data; + LoadWithDump(*var_desc, data, out_file); + delete origin_data; + fclose(out_file); + } + } + } + +} + + +int main(int argc, char **argv) { + + const std::string kNoteEg = "( eg: ./quantify 1 your_combined_model_path output_path or ./quantify 0 your_seperated_model_path output_path)"; + + PADDLE_MOBILE_ENFORCE(argc > 1, "wee need params.%s ", kNoteEg.c_str()); + + std::string action_type = argv[1]; + PADDLE_MOBILE_ENFORCE(argc > 1 && (action_type) == "1" || action_type == "0", + "only 1 or 2 supported, current is %s %s ", + action_type.c_str(), + kNoteEg.c_str()); + + PADDLE_MOBILE_ENFORCE(argc > 2, "we need your model path. %s ", kNoteEg.c_str()); + std::string base_path = argv[2]; + + PADDLE_MOBILE_ENFORCE(argc > 3, "we need your output path. %s ", kNoteEg.c_str()); + std::string output_path = argv[3]; + + if (action_type == "0") { + // for seperated + const std::string &seperated_min_dir = output_path; + quantificate_seperated(base_path, seperated_min_dir); + return 0; + } + + if (action_type == "1") { + // for combined + const std::string &combined_min_dir = output_path; + std::string model_path = base_path + "/model"; + std::string param_path = base_path + "/params"; + quantificate_combined(model_path, param_path, combined_min_dir); + + return 0; + } + + return -1; +} + + + + + + diff --git a/tools/quantification/src/block_desc_local.cpp b/tools/quantification/src/block_desc_local.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8ad1982c05ed0b1b7c7bec5ef26aa8151f941cf3 --- /dev/null +++ b/tools/quantification/src/block_desc_local.cpp @@ -0,0 +1,48 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +// +// Created by 谢柏渊 on 2018/7/25. +// +#include "src/block_desc_local.h" +#include +#include +#include + +#include "src/framework.pb-c.h" + +std::vector> +BlockDesc::Vars() const { + return vars_; +} + +BlockDesc::BlockDesc(PaddleMobile__Framework__Proto__BlockDesc *desc) + : index_(desc->idx), parent_index_(desc->idx) { + for (int i = 0; i < desc->n_vars; ++i) { + PaddleMobile__Framework__Proto__VarDesc *var_desc = desc->vars[i]; + vars_.emplace_back(std::shared_ptr( + new paddle_mobile::framework::VarDesc(var_desc))); + } + + std::sort(vars_.begin(), vars_.end(), + [](std::shared_ptr left, + std::shared_ptr right) { + return left->Name() < right->Name(); + }); + + // for (int j = 0; j < desc->n_ops; ++j) { + // PaddleMobile__Framework__Proto__OpDesc *op_desc = desc->ops[j]; + // ops_.emplace_back(new OpDesc(op_desc)); + // } +} diff --git a/tools/quantification/src/block_desc_local.h b/tools/quantification/src/block_desc_local.h new file mode 100644 index 0000000000000000000000000000000000000000..41c2dc0abbdf8bb006f4152674e92dd1f7d01500 --- /dev/null +++ b/tools/quantification/src/block_desc_local.h @@ -0,0 +1,55 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +// +// Created by 谢柏渊 on 2018/7/25. +// + +#ifndef TOOLS_QUANTIFICATION_SRC_BLOCK_DESC_LOCAL_H_ +#define TOOLS_QUANTIFICATION_SRC_BLOCK_DESC_LOCAL_H_ + +#include +#include "src/var_desc.h" + +class BlockDesc { + public: + friend class Node; + friend class ProgramOptimize; + BlockDesc() {} + explicit BlockDesc(PaddleMobile__Framework__Proto__BlockDesc *desc); + + const int &ID() const { return index_; } + + const bool &MultiThread() const { return multi_thread_; } + + const int &Parent() const { return parent_index_; } + + bool operator==(const BlockDesc &in_block) const { + return this->ID() == in_block.ID() && this->Parent() == in_block.Parent(); + } + + bool operator<(const BlockDesc &in_block) const { + return this->ID() < in_block.ID() && this->Parent() < in_block.Parent(); + } + + std::vector> Vars() const; + + private: + int index_; + bool multi_thread_; + int parent_index_; + std::vector> vars_; +}; + +#endif // TOOLS_QUANTIFICATION_SRC_BLOCK_DESC_LOCAL_H_ diff --git a/tools/quantification/src/enforce.h b/tools/quantification/src/enforce.h new file mode 100644 index 0000000000000000000000000000000000000000..51d2110e32433686d1b3353bc63b92a564a13e9d --- /dev/null +++ b/tools/quantification/src/enforce.h @@ -0,0 +1,67 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifdef ENABLE_EXCEPTION +#include +#include +#include + +#endif + +namespace paddle_mobile { + +#ifdef ENABLE_EXCEPTION +struct PaddleMobileException : public std::exception { + const std::string exception_prefix = "paddle mobile C++ Exception: \n"; + std::string message; + + PaddleMobileException(const char *header, const char *detail, + const char *file, const int line) { + char buffer[1500]; + snprintf(buffer, sizeof(buffer), + "%s| %s \n| [in file] : %s\n| [on line] : %d\n| [detail] : %s\n", + exception_prefix.c_str(), header, file, line, detail); + message = std::string(buffer); + } + const char *what() const noexcept { return message.c_str(); } +}; + +#define PADDLE_MOBILE_THROW_EXCEPTION(...) \ + { \ + char buffer[1000]; \ + snprintf(buffer, sizeof(buffer), __VA_ARGS__); \ + std::string detail(buffer); \ + throw paddle_mobile::PaddleMobileException("Custom Exception", buffer, \ + __FILE__, __LINE__); \ + } + +#define PADDLE_MOBILE_ENFORCE(stat, ...) \ + { \ + if (stat) { \ + } else { \ + char buffer[1000]; \ + snprintf(buffer, sizeof(buffer), __VA_ARGS__); \ + std::string detail(buffer); \ + throw paddle_mobile::PaddleMobileException("paddle-mobile enforce", \ + buffer, __FILE__, __LINE__); \ + } \ + } +#else +#define PADDLE_MOBILE_THROW_EXCEPTION(...) +#define PADDLE_MOBILE_ENFORCE(stat, ...) +#endif + +} // namespace paddle_mobile diff --git a/tools/quantification/src/framework.pb-c.c b/tools/quantification/src/framework.pb-c.c new file mode 100644 index 0000000000000000000000000000000000000000..aed0a6c9c0614da74a82cea8c7aa705978dddafc --- /dev/null +++ b/tools/quantification/src/framework.pb-c.c @@ -0,0 +1,1403 @@ +/* Generated by the protocol buffer compiler. DO NOT EDIT! */ +/* Generated from: framework.proto */ + +/* Do not generate deprecated warnings for self */ +#ifndef PROTOBUF_C__NO_DEPRECATED +#define PROTOBUF_C__NO_DEPRECATED +#endif + +#include "framework.pb-c.h" +void paddle_mobile__framework__proto__op_desc__attr__init( + PaddleMobile__Framework__Proto__OpDesc__Attr *message) { + static const PaddleMobile__Framework__Proto__OpDesc__Attr init_value = + PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__ATTR__INIT; + *message = init_value; +} +void paddle_mobile__framework__proto__op_desc__var__init( + PaddleMobile__Framework__Proto__OpDesc__Var *message) { + static const PaddleMobile__Framework__Proto__OpDesc__Var init_value = + PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__VAR__INIT; + *message = init_value; +} +void paddle_mobile__framework__proto__op_desc__init( + PaddleMobile__Framework__Proto__OpDesc *message) { + static const PaddleMobile__Framework__Proto__OpDesc init_value = + PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__INIT; + *message = init_value; +} +size_t paddle_mobile__framework__proto__op_desc__get_packed_size( + const PaddleMobile__Framework__Proto__OpDesc *message) { + assert(message->base.descriptor == + &paddle_mobile__framework__proto__op_desc__descriptor); + return protobuf_c_message_get_packed_size( + (const ProtobufCMessage *)(message)); +} + +PaddleMobile__Framework__Proto__OpDesc * +paddle_mobile__framework__proto__op_desc__unpack(ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) { + return (PaddleMobile__Framework__Proto__OpDesc *)protobuf_c_message_unpack( + &paddle_mobile__framework__proto__op_desc__descriptor, allocator, len, + data); +} +void paddle_mobile__framework__proto__op_desc__free_unpacked( + PaddleMobile__Framework__Proto__OpDesc *message, + ProtobufCAllocator *allocator) { + if (!message) return; + assert(message->base.descriptor == + &paddle_mobile__framework__proto__op_desc__descriptor); + protobuf_c_message_free_unpacked((ProtobufCMessage *)message, allocator); +} +void paddle_mobile__framework__proto__op_proto__var__init( + PaddleMobile__Framework__Proto__OpProto__Var *message) { + static const PaddleMobile__Framework__Proto__OpProto__Var init_value = + PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__VAR__INIT; + *message = init_value; +} +void paddle_mobile__framework__proto__op_proto__attr__init( + PaddleMobile__Framework__Proto__OpProto__Attr *message) { + static const PaddleMobile__Framework__Proto__OpProto__Attr init_value = + PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__ATTR__INIT; + *message = init_value; +} +void paddle_mobile__framework__proto__op_proto__init( + PaddleMobile__Framework__Proto__OpProto *message) { + static const PaddleMobile__Framework__Proto__OpProto init_value = + PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__INIT; + *message = init_value; +} +size_t paddle_mobile__framework__proto__op_proto__get_packed_size( + const PaddleMobile__Framework__Proto__OpProto *message) { + assert(message->base.descriptor == + &paddle_mobile__framework__proto__op_proto__descriptor); + return protobuf_c_message_get_packed_size( + (const ProtobufCMessage *)(message)); +} + +PaddleMobile__Framework__Proto__OpProto * +paddle_mobile__framework__proto__op_proto__unpack(ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) { + return (PaddleMobile__Framework__Proto__OpProto *)protobuf_c_message_unpack( + &paddle_mobile__framework__proto__op_proto__descriptor, allocator, len, + data); +} +void paddle_mobile__framework__proto__op_proto__free_unpacked( + PaddleMobile__Framework__Proto__OpProto *message, + ProtobufCAllocator *allocator) { + if (!message) return; + assert(message->base.descriptor == + &paddle_mobile__framework__proto__op_proto__descriptor); + protobuf_c_message_free_unpacked((ProtobufCMessage *)message, allocator); +} +void paddle_mobile__framework__proto__var_type__tensor_desc__init( + PaddleMobile__Framework__Proto__VarType__TensorDesc *message) { + static const PaddleMobile__Framework__Proto__VarType__TensorDesc init_value = + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TENSOR_DESC__INIT; + *message = init_value; +} +void paddle_mobile__framework__proto__var_type__lo_dtensor_desc__init( + PaddleMobile__Framework__Proto__VarType__LoDTensorDesc *message) { + static const PaddleMobile__Framework__Proto__VarType__LoDTensorDesc + init_value = + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__LO_DTENSOR_DESC__INIT; + *message = init_value; +} +void paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__init( + PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc *message) { + static const PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc + init_value = + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__LO_DTENSOR_ARRAY_DESC__INIT; + *message = init_value; +} +void paddle_mobile__framework__proto__var_type__reader_desc__init( + PaddleMobile__Framework__Proto__VarType__ReaderDesc *message) { + static const PaddleMobile__Framework__Proto__VarType__ReaderDesc init_value = + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__READER_DESC__INIT; + *message = init_value; +} +void paddle_mobile__framework__proto__var_type__channel_desc__init( + PaddleMobile__Framework__Proto__VarType__ChannelDesc *message) { + static const PaddleMobile__Framework__Proto__VarType__ChannelDesc init_value = + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__CHANNEL_DESC__INIT; + *message = init_value; +} +void paddle_mobile__framework__proto__var_type__tuple__init( + PaddleMobile__Framework__Proto__VarType__Tuple *message) { + static const PaddleMobile__Framework__Proto__VarType__Tuple init_value = + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TUPLE__INIT; + *message = init_value; +} +void paddle_mobile__framework__proto__var_type__init( + PaddleMobile__Framework__Proto__VarType *message) { + static const PaddleMobile__Framework__Proto__VarType init_value = + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__INIT; + *message = init_value; +} +size_t paddle_mobile__framework__proto__var_type__get_packed_size( + const PaddleMobile__Framework__Proto__VarType *message) { + assert(message->base.descriptor == + &paddle_mobile__framework__proto__var_type__descriptor); + return protobuf_c_message_get_packed_size( + (const ProtobufCMessage *)(message)); +} +PaddleMobile__Framework__Proto__VarType * +paddle_mobile__framework__proto__var_type__unpack(ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) { + return (PaddleMobile__Framework__Proto__VarType *)protobuf_c_message_unpack( + &paddle_mobile__framework__proto__var_type__descriptor, allocator, len, + data); +} +void paddle_mobile__framework__proto__var_type__free_unpacked( + PaddleMobile__Framework__Proto__VarType *message, + ProtobufCAllocator *allocator) { + if (!message) return; + assert(message->base.descriptor == + &paddle_mobile__framework__proto__var_type__descriptor); + protobuf_c_message_free_unpacked((ProtobufCMessage *)message, allocator); +} +void paddle_mobile__framework__proto__var_desc__init( + PaddleMobile__Framework__Proto__VarDesc *message) { + static const PaddleMobile__Framework__Proto__VarDesc init_value = + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_DESC__INIT; + *message = init_value; +} +size_t paddle_mobile__framework__proto__var_desc__get_packed_size( + const PaddleMobile__Framework__Proto__VarDesc *message) { + assert(message->base.descriptor == + &paddle_mobile__framework__proto__var_desc__descriptor); + return protobuf_c_message_get_packed_size( + (const ProtobufCMessage *)(message)); +} + +PaddleMobile__Framework__Proto__VarDesc * +paddle_mobile__framework__proto__var_desc__unpack(ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data) { + return (PaddleMobile__Framework__Proto__VarDesc *)protobuf_c_message_unpack( + &paddle_mobile__framework__proto__var_desc__descriptor, allocator, len, + data); +} +void paddle_mobile__framework__proto__var_desc__free_unpacked( + PaddleMobile__Framework__Proto__VarDesc *message, + ProtobufCAllocator *allocator) { + if (!message) return; + assert(message->base.descriptor == + &paddle_mobile__framework__proto__var_desc__descriptor); + protobuf_c_message_free_unpacked((ProtobufCMessage *)message, allocator); +} +void paddle_mobile__framework__proto__block_desc__init( + PaddleMobile__Framework__Proto__BlockDesc *message) { + static const PaddleMobile__Framework__Proto__BlockDesc init_value = + PADDLE_MOBILE__FRAMEWORK__PROTO__BLOCK_DESC__INIT; + *message = init_value; +} +size_t paddle_mobile__framework__proto__block_desc__get_packed_size( + const PaddleMobile__Framework__Proto__BlockDesc *message) { + assert(message->base.descriptor == + &paddle_mobile__framework__proto__block_desc__descriptor); + return protobuf_c_message_get_packed_size( + (const ProtobufCMessage *)(message)); +} + +PaddleMobile__Framework__Proto__BlockDesc * +paddle_mobile__framework__proto__block_desc__unpack( + ProtobufCAllocator *allocator, size_t len, const uint8_t *data) { + return (PaddleMobile__Framework__Proto__BlockDesc *)protobuf_c_message_unpack( + &paddle_mobile__framework__proto__block_desc__descriptor, allocator, len, + data); +} +void paddle_mobile__framework__proto__block_desc__free_unpacked( + PaddleMobile__Framework__Proto__BlockDesc *message, + ProtobufCAllocator *allocator) { + if (!message) return; + assert(message->base.descriptor == + &paddle_mobile__framework__proto__block_desc__descriptor); + protobuf_c_message_free_unpacked((ProtobufCMessage *)message, allocator); +} +void paddle_mobile__framework__proto__program_desc__init( + PaddleMobile__Framework__Proto__ProgramDesc *message) { + static const PaddleMobile__Framework__Proto__ProgramDesc init_value = + PADDLE_MOBILE__FRAMEWORK__PROTO__PROGRAM_DESC__INIT; + *message = init_value; +} +size_t paddle_mobile__framework__proto__program_desc__get_packed_size( + const PaddleMobile__Framework__Proto__ProgramDesc *message) { + assert(message->base.descriptor == + &paddle_mobile__framework__proto__program_desc__descriptor); + return protobuf_c_message_get_packed_size( + (const ProtobufCMessage *)(message)); +} + +PaddleMobile__Framework__Proto__ProgramDesc * +paddle_mobile__framework__proto__program_desc__unpack( + ProtobufCAllocator *allocator, size_t len, const uint8_t *data) { + return (PaddleMobile__Framework__Proto__ProgramDesc *) + protobuf_c_message_unpack( + &paddle_mobile__framework__proto__program_desc__descriptor, allocator, + len, data); +} +void paddle_mobile__framework__proto__program_desc__free_unpacked( + PaddleMobile__Framework__Proto__ProgramDesc *message, + ProtobufCAllocator *allocator) { + if (!message) return; + assert(message->base.descriptor == + &paddle_mobile__framework__proto__program_desc__descriptor); + protobuf_c_message_free_unpacked((ProtobufCMessage *)message, allocator); +} +static const ProtobufCFieldDescriptor + paddle_mobile__framework__proto__op_desc__attr__field_descriptors[12] = { + { + "name", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, name), NULL, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "type", 2, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_ENUM, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, type), + &paddle_mobile__framework__proto__attr_type__descriptor, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "i", 3, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_INT32, + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, has_i), + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, i), NULL, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "f", 4, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_FLOAT, + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, has_f), + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, f), NULL, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "s", 5, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, s), NULL, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "ints", 6, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_INT32, + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, n_ints), + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, ints), NULL, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "floats", 7, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_FLOAT, + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, n_floats), + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, floats), + NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "strings", 8, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_STRING, + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, n_strings), + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, strings), + NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "b", 10, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_BOOL, + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, has_b), + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, b), NULL, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "bools", 11, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_BOOL, + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, n_bools), + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, bools), NULL, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "block_idx", 12, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_INT32, + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, + has_block_idx), + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, block_idx), + NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "l", 13, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_INT64, + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, has_l), + offsetof(PaddleMobile__Framework__Proto__OpDesc__Attr, l), NULL, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned + paddle_mobile__framework__proto__op_desc__attr__field_indices_by_name[] = { + 8, /* field[8] = b */ + 10, /* field[10] = block_idx */ + 9, /* field[9] = bools */ + 3, /* field[3] = f */ + 6, /* field[6] = floats */ + 2, /* field[2] = i */ + 5, /* field[5] = ints */ + 11, /* field[11] = l */ + 0, /* field[0] = name */ + 4, /* field[4] = s */ + 7, /* field[7] = strings */ + 1, /* field[1] = type */ +}; +static const ProtobufCIntRange + paddle_mobile__framework__proto__op_desc__attr__number_ranges[2 + 1] = { + {1, 0}, {10, 8}, {0, 12}}; +const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__op_desc__attr__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "paddle_mobile.framework.proto.OpDesc.Attr", + "Attr", + "PaddleMobile__Framework__Proto__OpDesc__Attr", + "paddle_mobile.framework.proto", + sizeof(PaddleMobile__Framework__Proto__OpDesc__Attr), + 12, + paddle_mobile__framework__proto__op_desc__attr__field_descriptors, + paddle_mobile__framework__proto__op_desc__attr__field_indices_by_name, + 2, + paddle_mobile__framework__proto__op_desc__attr__number_ranges, + (ProtobufCMessageInit) + paddle_mobile__framework__proto__op_desc__attr__init, + NULL, + NULL, + NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor + paddle_mobile__framework__proto__op_desc__var__field_descriptors[2] = { + { + "parameter", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__OpDesc__Var, parameter), + NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "arguments", 2, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_STRING, + offsetof(PaddleMobile__Framework__Proto__OpDesc__Var, n_arguments), + offsetof(PaddleMobile__Framework__Proto__OpDesc__Var, arguments), + NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned + paddle_mobile__framework__proto__op_desc__var__field_indices_by_name[] = { + 1, /* field[1] = arguments */ + 0, /* field[0] = parameter */ +}; +static const ProtobufCIntRange + paddle_mobile__framework__proto__op_desc__var__number_ranges[1 + 1] = { + {1, 0}, {0, 2}}; +const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__op_desc__var__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "paddle_mobile.framework.proto.OpDesc.Var", + "Var", + "PaddleMobile__Framework__Proto__OpDesc__Var", + "paddle_mobile.framework.proto", + sizeof(PaddleMobile__Framework__Proto__OpDesc__Var), + 2, + paddle_mobile__framework__proto__op_desc__var__field_descriptors, + paddle_mobile__framework__proto__op_desc__var__field_indices_by_name, + 1, + paddle_mobile__framework__proto__op_desc__var__number_ranges, + (ProtobufCMessageInit) + paddle_mobile__framework__proto__op_desc__var__init, + NULL, + NULL, + NULL /* reserved[123] */ +}; +static const protobuf_c_boolean + paddle_mobile__framework__proto__op_desc__is_target__default_value = 0; +static const ProtobufCFieldDescriptor + paddle_mobile__framework__proto__op_desc__field_descriptors[5] = { + { + "inputs", 1, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE, + offsetof(PaddleMobile__Framework__Proto__OpDesc, n_inputs), + offsetof(PaddleMobile__Framework__Proto__OpDesc, inputs), + &paddle_mobile__framework__proto__op_desc__var__descriptor, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "outputs", 2, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE, + offsetof(PaddleMobile__Framework__Proto__OpDesc, n_outputs), + offsetof(PaddleMobile__Framework__Proto__OpDesc, outputs), + &paddle_mobile__framework__proto__op_desc__var__descriptor, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "type", 3, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__OpDesc, type), NULL, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "attrs", 4, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE, + offsetof(PaddleMobile__Framework__Proto__OpDesc, n_attrs), + offsetof(PaddleMobile__Framework__Proto__OpDesc, attrs), + &paddle_mobile__framework__proto__op_desc__attr__descriptor, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "is_target", 5, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_BOOL, + offsetof(PaddleMobile__Framework__Proto__OpDesc, has_is_target), + offsetof(PaddleMobile__Framework__Proto__OpDesc, is_target), NULL, + &paddle_mobile__framework__proto__op_desc__is_target__default_value, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned + paddle_mobile__framework__proto__op_desc__field_indices_by_name[] = { + 3, /* field[3] = attrs */ + 0, /* field[0] = inputs */ + 4, /* field[4] = is_target */ + 1, /* field[1] = outputs */ + 2, /* field[2] = type */ +}; +static const ProtobufCIntRange + paddle_mobile__framework__proto__op_desc__number_ranges[1 + 1] = {{1, 0}, + {0, 5}}; +const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__op_desc__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "paddle_mobile.framework.proto.OpDesc", + "OpDesc", + "PaddleMobile__Framework__Proto__OpDesc", + "paddle_mobile.framework.proto", + sizeof(PaddleMobile__Framework__Proto__OpDesc), + 5, + paddle_mobile__framework__proto__op_desc__field_descriptors, + paddle_mobile__framework__proto__op_desc__field_indices_by_name, + 1, + paddle_mobile__framework__proto__op_desc__number_ranges, + (ProtobufCMessageInit)paddle_mobile__framework__proto__op_desc__init, + NULL, + NULL, + NULL /* reserved[123] */ +}; +static const protobuf_c_boolean + paddle_mobile__framework__proto__op_proto__var__duplicable__default_value = + 0; +static const protobuf_c_boolean + paddle_mobile__framework__proto__op_proto__var__intermediate__default_value = + 0; +static const protobuf_c_boolean + paddle_mobile__framework__proto__op_proto__var__dispensable__default_value = + 0; +static const ProtobufCFieldDescriptor + paddle_mobile__framework__proto__op_proto__var__field_descriptors[5] = { + { + "name", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__OpProto__Var, name), NULL, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "comment", 2, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__OpProto__Var, comment), + NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "duplicable", 3, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_BOOL, + offsetof(PaddleMobile__Framework__Proto__OpProto__Var, + has_duplicable), + offsetof(PaddleMobile__Framework__Proto__OpProto__Var, duplicable), + NULL, + &paddle_mobile__framework__proto__op_proto__var__duplicable__default_value, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "intermediate", 4, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_BOOL, + offsetof(PaddleMobile__Framework__Proto__OpProto__Var, + has_intermediate), + offsetof(PaddleMobile__Framework__Proto__OpProto__Var, + intermediate), + NULL, + &paddle_mobile__framework__proto__op_proto__var__intermediate__default_value, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "dispensable", 5, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_BOOL, + offsetof(PaddleMobile__Framework__Proto__OpProto__Var, + has_dispensable), + offsetof(PaddleMobile__Framework__Proto__OpProto__Var, dispensable), + NULL, + &paddle_mobile__framework__proto__op_proto__var__dispensable__default_value, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned + paddle_mobile__framework__proto__op_proto__var__field_indices_by_name[] = { + 1, /* field[1] = comment */ + 4, /* field[4] = dispensable */ + 2, /* field[2] = duplicable */ + 3, /* field[3] = intermediate */ + 0, /* field[0] = name */ +}; +static const ProtobufCIntRange + paddle_mobile__framework__proto__op_proto__var__number_ranges[1 + 1] = { + {1, 0}, {0, 5}}; +const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__op_proto__var__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "paddle_mobile.framework.proto.OpProto.Var", + "Var", + "PaddleMobile__Framework__Proto__OpProto__Var", + "paddle_mobile.framework.proto", + sizeof(PaddleMobile__Framework__Proto__OpProto__Var), + 5, + paddle_mobile__framework__proto__op_proto__var__field_descriptors, + paddle_mobile__framework__proto__op_proto__var__field_indices_by_name, + 1, + paddle_mobile__framework__proto__op_proto__var__number_ranges, + (ProtobufCMessageInit) + paddle_mobile__framework__proto__op_proto__var__init, + NULL, + NULL, + NULL /* reserved[123] */ +}; +static const protobuf_c_boolean + paddle_mobile__framework__proto__op_proto__attr__generated__default_value = + 0; +static const ProtobufCFieldDescriptor + paddle_mobile__framework__proto__op_proto__attr__field_descriptors[4] = { + { + "name", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__OpProto__Attr, name), NULL, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "type", 2, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_ENUM, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__OpProto__Attr, type), + &paddle_mobile__framework__proto__attr_type__descriptor, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "comment", 3, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__OpProto__Attr, comment), + NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "generated", 4, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_BOOL, + offsetof(PaddleMobile__Framework__Proto__OpProto__Attr, + has_generated), + offsetof(PaddleMobile__Framework__Proto__OpProto__Attr, generated), + NULL, + &paddle_mobile__framework__proto__op_proto__attr__generated__default_value, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned + paddle_mobile__framework__proto__op_proto__attr__field_indices_by_name[] = { + 2, /* field[2] = comment */ + 3, /* field[3] = generated */ + 0, /* field[0] = name */ + 1, /* field[1] = type */ +}; +static const ProtobufCIntRange + paddle_mobile__framework__proto__op_proto__attr__number_ranges[1 + 1] = { + {1, 0}, {0, 4}}; +const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__op_proto__attr__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "paddle_mobile.framework.proto.OpProto.Attr", + "Attr", + "PaddleMobile__Framework__Proto__OpProto__Attr", + "paddle_mobile.framework.proto", + sizeof(PaddleMobile__Framework__Proto__OpProto__Attr), + 4, + paddle_mobile__framework__proto__op_proto__attr__field_descriptors, + paddle_mobile__framework__proto__op_proto__attr__field_indices_by_name, + 1, + paddle_mobile__framework__proto__op_proto__attr__number_ranges, + (ProtobufCMessageInit) + paddle_mobile__framework__proto__op_proto__attr__init, + NULL, + NULL, + NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor + paddle_mobile__framework__proto__op_proto__field_descriptors[5] = { + { + "type", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__OpProto, type), NULL, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "inputs", 2, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE, + offsetof(PaddleMobile__Framework__Proto__OpProto, n_inputs), + offsetof(PaddleMobile__Framework__Proto__OpProto, inputs), + &paddle_mobile__framework__proto__op_proto__var__descriptor, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "outputs", 3, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE, + offsetof(PaddleMobile__Framework__Proto__OpProto, n_outputs), + offsetof(PaddleMobile__Framework__Proto__OpProto, outputs), + &paddle_mobile__framework__proto__op_proto__var__descriptor, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "attrs", 4, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE, + offsetof(PaddleMobile__Framework__Proto__OpProto, n_attrs), + offsetof(PaddleMobile__Framework__Proto__OpProto, attrs), + &paddle_mobile__framework__proto__op_proto__attr__descriptor, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "comment", 5, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__OpProto, comment), NULL, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned + paddle_mobile__framework__proto__op_proto__field_indices_by_name[] = { + 3, /* field[3] = attrs */ + 4, /* field[4] = comment */ + 1, /* field[1] = inputs */ + 2, /* field[2] = outputs */ + 0, /* field[0] = type */ +}; +static const ProtobufCIntRange + paddle_mobile__framework__proto__op_proto__number_ranges[1 + 1] = {{1, 0}, + {0, 5}}; +const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__op_proto__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "paddle_mobile.framework.proto.OpProto", + "OpProto", + "PaddleMobile__Framework__Proto__OpProto", + "paddle_mobile.framework.proto", + sizeof(PaddleMobile__Framework__Proto__OpProto), + 5, + paddle_mobile__framework__proto__op_proto__field_descriptors, + paddle_mobile__framework__proto__op_proto__field_indices_by_name, + 1, + paddle_mobile__framework__proto__op_proto__number_ranges, + (ProtobufCMessageInit)paddle_mobile__framework__proto__op_proto__init, + NULL, + NULL, + NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor + paddle_mobile__framework__proto__var_type__tensor_desc__field_descriptors + [2] = { + { + "data_type", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_ENUM, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__VarType__TensorDesc, + data_type), + &paddle_mobile__framework__proto__var_type__type__descriptor, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "dims", 2, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_INT64, + offsetof(PaddleMobile__Framework__Proto__VarType__TensorDesc, + n_dims), + offsetof(PaddleMobile__Framework__Proto__VarType__TensorDesc, + dims), + NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned + paddle_mobile__framework__proto__var_type__tensor_desc__field_indices_by_name + [] = { + 0, /* field[0] = data_type */ + 1, /* field[1] = dims */ +}; +static const ProtobufCIntRange + paddle_mobile__framework__proto__var_type__tensor_desc__number_ranges[1 + + 1] = { + {1, 0}, {0, 2}}; +const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__var_type__tensor_desc__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "paddle_mobile.framework.proto.VarType.TensorDesc", + "TensorDesc", + "PaddleMobile__Framework__Proto__VarType__TensorDesc", + "paddle_mobile.framework.proto", + sizeof(PaddleMobile__Framework__Proto__VarType__TensorDesc), + 2, + paddle_mobile__framework__proto__var_type__tensor_desc__field_descriptors, + paddle_mobile__framework__proto__var_type__tensor_desc__field_indices_by_name, + 1, + paddle_mobile__framework__proto__var_type__tensor_desc__number_ranges, + (ProtobufCMessageInit) + paddle_mobile__framework__proto__var_type__tensor_desc__init, + NULL, + NULL, + NULL /* reserved[123] */ +}; +static const int32_t + paddle_mobile__framework__proto__var_type__lo_dtensor_desc__lod_level__default_value = + 0; +static const ProtobufCFieldDescriptor + paddle_mobile__framework__proto__var_type__lo_dtensor_desc__field_descriptors + [2] = { + { + "tensor", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__VarType__LoDTensorDesc, + tensor), + &paddle_mobile__framework__proto__var_type__tensor_desc__descriptor, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "lod_level", 2, PROTOBUF_C_LABEL_OPTIONAL, + PROTOBUF_C_TYPE_INT32, + offsetof(PaddleMobile__Framework__Proto__VarType__LoDTensorDesc, + has_lod_level), + offsetof(PaddleMobile__Framework__Proto__VarType__LoDTensorDesc, + lod_level), + NULL, + &paddle_mobile__framework__proto__var_type__lo_dtensor_desc__lod_level__default_value, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned + paddle_mobile__framework__proto__var_type__lo_dtensor_desc__field_indices_by_name + [] = { + 1, /* field[1] = lod_level */ + 0, /* field[0] = tensor */ +}; +static const ProtobufCIntRange + paddle_mobile__framework__proto__var_type__lo_dtensor_desc__number_ranges + [1 + 1] = {{1, 0}, {0, 2}}; +const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__var_type__lo_dtensor_desc__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "paddle_mobile.framework.proto.VarType.LoDTensorDesc", + "LoDTensorDesc", + "PaddleMobile__Framework__Proto__VarType__LoDTensorDesc", + "paddle_mobile.framework.proto", + sizeof(PaddleMobile__Framework__Proto__VarType__LoDTensorDesc), + 2, + paddle_mobile__framework__proto__var_type__lo_dtensor_desc__field_descriptors, + paddle_mobile__framework__proto__var_type__lo_dtensor_desc__field_indices_by_name, + 1, + paddle_mobile__framework__proto__var_type__lo_dtensor_desc__number_ranges, + (ProtobufCMessageInit) + paddle_mobile__framework__proto__var_type__lo_dtensor_desc__init, + NULL, + NULL, + NULL /* reserved[123] */ +}; +static const int32_t + paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__lod_level__default_value = + 0; +static const ProtobufCFieldDescriptor + paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__field_descriptors + [2] = { + { + "tensor", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof( + PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc, + tensor), + &paddle_mobile__framework__proto__var_type__tensor_desc__descriptor, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "lod_level", 2, PROTOBUF_C_LABEL_OPTIONAL, + PROTOBUF_C_TYPE_INT32, + offsetof( + PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc, + has_lod_level), + offsetof( + PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc, + lod_level), + NULL, + &paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__lod_level__default_value, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned + paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__field_indices_by_name + [] = { + 1, /* field[1] = lod_level */ + 0, /* field[0] = tensor */ +}; +static const ProtobufCIntRange + paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__number_ranges + [1 + 1] = {{1, 0}, {0, 2}}; +const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "paddle_mobile.framework.proto.VarType.LoDTensorArrayDesc", + "LoDTensorArrayDesc", + "PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc", + "paddle_mobile.framework.proto", + sizeof(PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc), + 2, + paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__field_descriptors, + paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__field_indices_by_name, + 1, + paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__number_ranges, + (ProtobufCMessageInit) + paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__init, + NULL, + NULL, + NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor + paddle_mobile__framework__proto__var_type__reader_desc__field_descriptors[1] = { + { + "lod_tensor", 1, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE, + offsetof(PaddleMobile__Framework__Proto__VarType__ReaderDesc, + n_lod_tensor), + offsetof(PaddleMobile__Framework__Proto__VarType__ReaderDesc, + lod_tensor), + &paddle_mobile__framework__proto__var_type__lo_dtensor_desc__descriptor, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned + paddle_mobile__framework__proto__var_type__reader_desc__field_indices_by_name + [] = { + 0, /* field[0] = lod_tensor */ +}; +static const ProtobufCIntRange + paddle_mobile__framework__proto__var_type__reader_desc__number_ranges[1 + + 1] = { + {1, 0}, {0, 1}}; +const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__var_type__reader_desc__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "paddle_mobile.framework.proto.VarType.ReaderDesc", + "ReaderDesc", + "PaddleMobile__Framework__Proto__VarType__ReaderDesc", + "paddle_mobile.framework.proto", + sizeof(PaddleMobile__Framework__Proto__VarType__ReaderDesc), + 1, + paddle_mobile__framework__proto__var_type__reader_desc__field_descriptors, + paddle_mobile__framework__proto__var_type__reader_desc__field_indices_by_name, + 1, + paddle_mobile__framework__proto__var_type__reader_desc__number_ranges, + (ProtobufCMessageInit) + paddle_mobile__framework__proto__var_type__reader_desc__init, + NULL, + NULL, + NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor + paddle_mobile__framework__proto__var_type__channel_desc__field_descriptors + [2] = { + { + "data_type", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_ENUM, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__VarType__ChannelDesc, + data_type), + &paddle_mobile__framework__proto__var_type__type__descriptor, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "capacity", 2, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_INT64, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__VarType__ChannelDesc, + capacity), + NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned + paddle_mobile__framework__proto__var_type__channel_desc__field_indices_by_name + [] = { + 1, /* field[1] = capacity */ + 0, /* field[0] = data_type */ +}; +static const ProtobufCIntRange + paddle_mobile__framework__proto__var_type__channel_desc__number_ranges[1 + + 1] = + {{1, 0}, {0, 2}}; +const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__var_type__channel_desc__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "paddle_mobile.framework.proto.VarType.ChannelDesc", + "ChannelDesc", + "PaddleMobile__Framework__Proto__VarType__ChannelDesc", + "paddle_mobile.framework.proto", + sizeof(PaddleMobile__Framework__Proto__VarType__ChannelDesc), + 2, + paddle_mobile__framework__proto__var_type__channel_desc__field_descriptors, + paddle_mobile__framework__proto__var_type__channel_desc__field_indices_by_name, + 1, + paddle_mobile__framework__proto__var_type__channel_desc__number_ranges, + (ProtobufCMessageInit) + paddle_mobile__framework__proto__var_type__channel_desc__init, + NULL, + NULL, + NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor + paddle_mobile__framework__proto__var_type__tuple__field_descriptors[1] = { + { + "element_type", 1, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_ENUM, + offsetof(PaddleMobile__Framework__Proto__VarType__Tuple, + n_element_type), + offsetof(PaddleMobile__Framework__Proto__VarType__Tuple, + element_type), + &paddle_mobile__framework__proto__var_type__type__descriptor, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned + paddle_mobile__framework__proto__var_type__tuple__field_indices_by_name[] = + { + 0, /* field[0] = element_type */ +}; +static const ProtobufCIntRange + paddle_mobile__framework__proto__var_type__tuple__number_ranges[1 + 1] = { + {1, 0}, {0, 1}}; +const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__var_type__tuple__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "paddle_mobile.framework.proto.VarType.Tuple", + "Tuple", + "PaddleMobile__Framework__Proto__VarType__Tuple", + "paddle_mobile.framework.proto", + sizeof(PaddleMobile__Framework__Proto__VarType__Tuple), + 1, + paddle_mobile__framework__proto__var_type__tuple__field_descriptors, + paddle_mobile__framework__proto__var_type__tuple__field_indices_by_name, + 1, + paddle_mobile__framework__proto__var_type__tuple__number_ranges, + (ProtobufCMessageInit) + paddle_mobile__framework__proto__var_type__tuple__init, + NULL, + NULL, + NULL /* reserved[123] */ +}; +static const ProtobufCEnumValue + paddle_mobile__framework__proto__var_type__type__enum_values_by_number[19] = + { + {"BOOL", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__BOOL", + 0}, + {"INT16", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT16", + 1}, + {"INT32", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT32", + 2}, + {"INT64", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT64", + 3}, + {"FP16", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP16", + 4}, + {"FP32", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP32", + 5}, + {"FP64", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP64", + 6}, + {"LOD_TENSOR", + "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_TENSOR", 7}, + {"SELECTED_ROWS", + "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__SELECTED_ROWS", + 8}, + {"FEED_MINIBATCH", + "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FEED_MINIBATCH", + 9}, + {"FETCH_LIST", + "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FETCH_LIST", 10}, + {"STEP_SCOPES", + "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__STEP_SCOPES", + 11}, + {"LOD_RANK_TABLE", + "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_RANK_TABLE", + 12}, + {"LOD_TENSOR_ARRAY", + "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_TENSOR_" + "ARRAY", + 13}, + {"PLACE_LIST", + "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__PLACE_LIST", 14}, + {"READER", + "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__READER", 15}, + {"CHANNEL", + "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__CHANNEL", 16}, + {"RAW", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__RAW", 17}, + {"TUPLE", "PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__TUPLE", + 18}, +}; +static const ProtobufCIntRange + paddle_mobile__framework__proto__var_type__type__value_ranges[] = {{0, 0}, + {0, 19}}; +static const ProtobufCEnumValueIndex + paddle_mobile__framework__proto__var_type__type__enum_values_by_name[19] = { + {"BOOL", 0}, + {"CHANNEL", 16}, + {"FEED_MINIBATCH", 9}, + {"FETCH_LIST", 10}, + {"FP16", 4}, + {"FP32", 5}, + {"FP64", 6}, + {"INT16", 1}, + {"INT32", 2}, + {"INT64", 3}, + {"LOD_RANK_TABLE", 12}, + {"LOD_TENSOR", 7}, + {"LOD_TENSOR_ARRAY", 13}, + {"PLACE_LIST", 14}, + {"RAW", 17}, + {"READER", 15}, + {"SELECTED_ROWS", 8}, + {"STEP_SCOPES", 11}, + {"TUPLE", 18}, +}; +const ProtobufCEnumDescriptor + paddle_mobile__framework__proto__var_type__type__descriptor = { + PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC, + "paddle_mobile.framework.proto.VarType.Type", + "Type", + "PaddleMobile__Framework__Proto__VarType__Type", + "paddle_mobile.framework.proto", + 19, + paddle_mobile__framework__proto__var_type__type__enum_values_by_number, + 19, + paddle_mobile__framework__proto__var_type__type__enum_values_by_name, + 1, + paddle_mobile__framework__proto__var_type__type__value_ranges, + NULL, + NULL, + NULL, + NULL /* reserved[1234] */ +}; +static const ProtobufCFieldDescriptor + paddle_mobile__framework__proto__var_type__field_descriptors[7] = { + { + "type", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_ENUM, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__VarType, type), + &paddle_mobile__framework__proto__var_type__type__descriptor, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "selected_rows", 2, PROTOBUF_C_LABEL_OPTIONAL, + PROTOBUF_C_TYPE_MESSAGE, 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__VarType, selected_rows), + &paddle_mobile__framework__proto__var_type__tensor_desc__descriptor, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "lod_tensor", 3, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__VarType, lod_tensor), + &paddle_mobile__framework__proto__var_type__lo_dtensor_desc__descriptor, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "tensor_array", 4, PROTOBUF_C_LABEL_OPTIONAL, + PROTOBUF_C_TYPE_MESSAGE, 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__VarType, tensor_array), + &paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__descriptor, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "reader", 5, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__VarType, reader), + &paddle_mobile__framework__proto__var_type__reader_desc__descriptor, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "channel", 6, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__VarType, channel), + &paddle_mobile__framework__proto__var_type__channel_desc__descriptor, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "tuple", 7, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__VarType, tuple), + &paddle_mobile__framework__proto__var_type__tuple__descriptor, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned + paddle_mobile__framework__proto__var_type__field_indices_by_name[] = { + 5, /* field[5] = channel */ + 2, /* field[2] = lod_tensor */ + 4, /* field[4] = reader */ + 1, /* field[1] = selected_rows */ + 3, /* field[3] = tensor_array */ + 6, /* field[6] = tuple */ + 0, /* field[0] = type */ +}; +static const ProtobufCIntRange + paddle_mobile__framework__proto__var_type__number_ranges[1 + 1] = {{1, 0}, + {0, 7}}; +const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__var_type__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "paddle_mobile.framework.proto.VarType", + "VarType", + "PaddleMobile__Framework__Proto__VarType", + "paddle_mobile.framework.proto", + sizeof(PaddleMobile__Framework__Proto__VarType), + 7, + paddle_mobile__framework__proto__var_type__field_descriptors, + paddle_mobile__framework__proto__var_type__field_indices_by_name, + 1, + paddle_mobile__framework__proto__var_type__number_ranges, + (ProtobufCMessageInit)paddle_mobile__framework__proto__var_type__init, + NULL, + NULL, + NULL /* reserved[123] */ +}; +static const protobuf_c_boolean + paddle_mobile__framework__proto__var_desc__persistable__default_value = 0; +static const ProtobufCFieldDescriptor + paddle_mobile__framework__proto__var_desc__field_descriptors[3] = { + { + "name", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_STRING, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__VarDesc, name), NULL, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "type", 2, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_MESSAGE, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__VarDesc, type), + &paddle_mobile__framework__proto__var_type__descriptor, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "persistable", 3, PROTOBUF_C_LABEL_OPTIONAL, PROTOBUF_C_TYPE_BOOL, + offsetof(PaddleMobile__Framework__Proto__VarDesc, has_persistable), + offsetof(PaddleMobile__Framework__Proto__VarDesc, persistable), + NULL, + &paddle_mobile__framework__proto__var_desc__persistable__default_value, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned + paddle_mobile__framework__proto__var_desc__field_indices_by_name[] = { + 0, /* field[0] = name */ + 2, /* field[2] = persistable */ + 1, /* field[1] = type */ +}; +static const ProtobufCIntRange + paddle_mobile__framework__proto__var_desc__number_ranges[1 + 1] = {{1, 0}, + {0, 3}}; +const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__var_desc__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "paddle_mobile.framework.proto.VarDesc", + "VarDesc", + "PaddleMobile__Framework__Proto__VarDesc", + "paddle_mobile.framework.proto", + sizeof(PaddleMobile__Framework__Proto__VarDesc), + 3, + paddle_mobile__framework__proto__var_desc__field_descriptors, + paddle_mobile__framework__proto__var_desc__field_indices_by_name, + 1, + paddle_mobile__framework__proto__var_desc__number_ranges, + (ProtobufCMessageInit)paddle_mobile__framework__proto__var_desc__init, + NULL, + NULL, + NULL /* reserved[123] */ +}; +static const int32_t + paddle_mobile__framework__proto__block_desc__forward_block_idx__default_value = + -1; +static const ProtobufCFieldDescriptor + paddle_mobile__framework__proto__block_desc__field_descriptors[5] = { + { + "idx", 1, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_INT32, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__BlockDesc, idx), NULL, + NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "parent_idx", 2, PROTOBUF_C_LABEL_REQUIRED, PROTOBUF_C_TYPE_INT32, + 0, /* quantifier_offset */ + offsetof(PaddleMobile__Framework__Proto__BlockDesc, parent_idx), + NULL, NULL, 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "vars", 3, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE, + offsetof(PaddleMobile__Framework__Proto__BlockDesc, n_vars), + offsetof(PaddleMobile__Framework__Proto__BlockDesc, vars), + &paddle_mobile__framework__proto__var_desc__descriptor, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "ops", 4, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE, + offsetof(PaddleMobile__Framework__Proto__BlockDesc, n_ops), + offsetof(PaddleMobile__Framework__Proto__BlockDesc, ops), + &paddle_mobile__framework__proto__op_desc__descriptor, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, + { + "forward_block_idx", 5, PROTOBUF_C_LABEL_OPTIONAL, + PROTOBUF_C_TYPE_INT32, + offsetof(PaddleMobile__Framework__Proto__BlockDesc, + has_forward_block_idx), + offsetof(PaddleMobile__Framework__Proto__BlockDesc, + forward_block_idx), + NULL, + &paddle_mobile__framework__proto__block_desc__forward_block_idx__default_value, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned + paddle_mobile__framework__proto__block_desc__field_indices_by_name[] = { + 4, /* field[4] = forward_block_idx */ + 0, /* field[0] = idx */ + 3, /* field[3] = ops */ + 1, /* field[1] = parent_idx */ + 2, /* field[2] = vars */ +}; +static const ProtobufCIntRange + paddle_mobile__framework__proto__block_desc__number_ranges[1 + 1] = { + {1, 0}, {0, 5}}; +const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__block_desc__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "paddle_mobile.framework.proto.BlockDesc", + "BlockDesc", + "PaddleMobile__Framework__Proto__BlockDesc", + "paddle_mobile.framework.proto", + sizeof(PaddleMobile__Framework__Proto__BlockDesc), + 5, + paddle_mobile__framework__proto__block_desc__field_descriptors, + paddle_mobile__framework__proto__block_desc__field_indices_by_name, + 1, + paddle_mobile__framework__proto__block_desc__number_ranges, + (ProtobufCMessageInit)paddle_mobile__framework__proto__block_desc__init, + NULL, + NULL, + NULL /* reserved[123] */ +}; +static const ProtobufCFieldDescriptor + paddle_mobile__framework__proto__program_desc__field_descriptors[1] = { + { + "blocks", 1, PROTOBUF_C_LABEL_REPEATED, PROTOBUF_C_TYPE_MESSAGE, + offsetof(PaddleMobile__Framework__Proto__ProgramDesc, n_blocks), + offsetof(PaddleMobile__Framework__Proto__ProgramDesc, blocks), + &paddle_mobile__framework__proto__block_desc__descriptor, NULL, + 0, /* flags */ + 0, NULL, NULL /* reserved1,reserved2, etc */ + }, +}; +static const unsigned + paddle_mobile__framework__proto__program_desc__field_indices_by_name[] = { + 0, /* field[0] = blocks */ +}; +static const ProtobufCIntRange + paddle_mobile__framework__proto__program_desc__number_ranges[1 + 1] = { + {1, 0}, {0, 1}}; +const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__program_desc__descriptor = { + PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC, + "paddle_mobile.framework.proto.ProgramDesc", + "ProgramDesc", + "PaddleMobile__Framework__Proto__ProgramDesc", + "paddle_mobile.framework.proto", + sizeof(PaddleMobile__Framework__Proto__ProgramDesc), + 1, + paddle_mobile__framework__proto__program_desc__field_descriptors, + paddle_mobile__framework__proto__program_desc__field_indices_by_name, + 1, + paddle_mobile__framework__proto__program_desc__number_ranges, + (ProtobufCMessageInit) + paddle_mobile__framework__proto__program_desc__init, + NULL, + NULL, + NULL /* reserved[123] */ +}; +static const ProtobufCEnumValue + paddle_mobile__framework__proto__attr_type__enum_values_by_number[10] = { + {"INT", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT", 0}, + {"FLOAT", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOAT", 1}, + {"STRING", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRING", 2}, + {"INTS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INTS", 3}, + {"FLOATS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOATS", 4}, + {"STRINGS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRINGS", 5}, + {"BOOLEAN", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEAN", 6}, + {"BOOLEANS", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEANS", 7}, + {"BLOCK", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK", 8}, + {"LONG", "PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG", 9}, +}; +static const ProtobufCIntRange + paddle_mobile__framework__proto__attr_type__value_ranges[] = {{0, 0}, + {0, 10}}; +static const ProtobufCEnumValueIndex + paddle_mobile__framework__proto__attr_type__enum_values_by_name[10] = { + {"BLOCK", 8}, {"BOOLEAN", 6}, {"BOOLEANS", 7}, {"FLOAT", 1}, + {"FLOATS", 4}, {"INT", 0}, {"INTS", 3}, {"LONG", 9}, + {"STRING", 2}, {"STRINGS", 5}, +}; +const ProtobufCEnumDescriptor + paddle_mobile__framework__proto__attr_type__descriptor = { + PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC, + "paddle_mobile.framework.proto.AttrType", + "AttrType", + "PaddleMobile__Framework__Proto__AttrType", + "paddle_mobile.framework.proto", + 10, + paddle_mobile__framework__proto__attr_type__enum_values_by_number, + 10, + paddle_mobile__framework__proto__attr_type__enum_values_by_name, + 1, + paddle_mobile__framework__proto__attr_type__value_ranges, + NULL, + NULL, + NULL, + NULL /* reserved[1234] */ +}; diff --git a/tools/quantification/src/framework.pb-c.h b/tools/quantification/src/framework.pb-c.h new file mode 100644 index 0000000000000000000000000000000000000000..3d63bad76ad188d02986971bd911d8f30cf0af81 --- /dev/null +++ b/tools/quantification/src/framework.pb-c.h @@ -0,0 +1,579 @@ +/* Generated by the protocol buffer compiler. DO NOT EDIT! */ +/* Generated from: framework.proto */ + +#ifndef PROTOBUF_C_framework_2eproto__INCLUDED +#define PROTOBUF_C_framework_2eproto__INCLUDED + +#include "protobuf-c.h" + +PROTOBUF_C__BEGIN_DECLS + +#if PROTOBUF_C_VERSION_NUMBER < 1000000 +# error This file was generated by a newer version of protoc-c which is incompatible with your libprotobuf-c headers. Please update your headers. +#elif 1003000 < PROTOBUF_C_MIN_COMPILER_VERSION +# error This file was generated by an older version of protoc-c which is incompatible with your libprotobuf-c headers. Please regenerate this file with a newer version of protoc-c. +#endif + +typedef struct _PaddleMobile__Framework__Proto__OpDesc + PaddleMobile__Framework__Proto__OpDesc; +typedef struct _PaddleMobile__Framework__Proto__OpDesc__Attr + PaddleMobile__Framework__Proto__OpDesc__Attr; +typedef struct _PaddleMobile__Framework__Proto__OpDesc__Var + PaddleMobile__Framework__Proto__OpDesc__Var; +typedef struct _PaddleMobile__Framework__Proto__OpProto + PaddleMobile__Framework__Proto__OpProto; +typedef struct _PaddleMobile__Framework__Proto__OpProto__Var + PaddleMobile__Framework__Proto__OpProto__Var; +typedef struct _PaddleMobile__Framework__Proto__OpProto__Attr + PaddleMobile__Framework__Proto__OpProto__Attr; +typedef struct _PaddleMobile__Framework__Proto__VarType + PaddleMobile__Framework__Proto__VarType; +typedef struct _PaddleMobile__Framework__Proto__VarType__TensorDesc + PaddleMobile__Framework__Proto__VarType__TensorDesc; +typedef struct _PaddleMobile__Framework__Proto__VarType__LoDTensorDesc + PaddleMobile__Framework__Proto__VarType__LoDTensorDesc; +typedef struct _PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc + PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc; +typedef struct _PaddleMobile__Framework__Proto__VarType__ReaderDesc + PaddleMobile__Framework__Proto__VarType__ReaderDesc; +typedef struct _PaddleMobile__Framework__Proto__VarType__ChannelDesc + PaddleMobile__Framework__Proto__VarType__ChannelDesc; +typedef struct _PaddleMobile__Framework__Proto__VarType__Tuple + PaddleMobile__Framework__Proto__VarType__Tuple; +typedef struct _PaddleMobile__Framework__Proto__VarDesc + PaddleMobile__Framework__Proto__VarDesc; +typedef struct _PaddleMobile__Framework__Proto__BlockDesc + PaddleMobile__Framework__Proto__BlockDesc; +typedef struct _PaddleMobile__Framework__Proto__ProgramDesc + PaddleMobile__Framework__Proto__ProgramDesc; + +/* --- enums --- */ + +typedef enum _PaddleMobile__Framework__Proto__VarType__Type { + /* + * Pod Types + */ + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__BOOL = 0, + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT16 = 1, + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT32 = 2, + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT64 = 3, + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP16 = 4, + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP32 = 5, + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP64 = 6, + /* + * Other types that may need additional descriptions + */ + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_TENSOR = 7, + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__SELECTED_ROWS = 8, + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FEED_MINIBATCH = 9, + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FETCH_LIST = 10, + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__STEP_SCOPES = 11, + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_RANK_TABLE = 12, + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_TENSOR_ARRAY = 13, + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__PLACE_LIST = 14, + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__READER = 15, + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__CHANNEL = 16, + /* + * Any runtime decided variable type is raw + * raw variables should manage their own allocations + * in operators like nccl_op + */ + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__RAW = 17, + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__TUPLE = + 18 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE( + PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE) +} PaddleMobile__Framework__Proto__VarType__Type; +typedef enum _PaddleMobile__Framework__Proto__AttrType { + PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT = 0, + PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOAT = 1, + PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRING = 2, + PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INTS = 3, + PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOATS = 4, + PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRINGS = 5, + PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEAN = 6, + PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEANS = 7, + PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK = 8, + PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG = + 9 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE( + PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE) +} PaddleMobile__Framework__Proto__AttrType; + +/* --- messages --- */ + +struct _PaddleMobile__Framework__Proto__OpDesc__Attr { + ProtobufCMessage base; + char *name; + PaddleMobile__Framework__Proto__AttrType type; + protobuf_c_boolean has_i; + int32_t i; + protobuf_c_boolean has_f; + float f; + char *s; + size_t n_ints; + int32_t *ints; + size_t n_floats; + float *floats; + size_t n_strings; + char **strings; + protobuf_c_boolean has_b; + protobuf_c_boolean b; + size_t n_bools; + protobuf_c_boolean *bools; + protobuf_c_boolean has_block_idx; + int32_t block_idx; + protobuf_c_boolean has_l; + int64_t l; +}; +#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__ATTR__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT( \ + &paddle_mobile__framework__proto__op_desc__attr__descriptor) \ + , NULL, PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT, 0, 0, 0, 0, NULL, \ + 0, NULL, 0, NULL, 0, NULL, 0, 0, 0, NULL, 0, 0, 0, 0 \ + } + +struct _PaddleMobile__Framework__Proto__OpDesc__Var { + ProtobufCMessage base; + char *parameter; + size_t n_arguments; + char **arguments; +}; +#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__VAR__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT( \ + &paddle_mobile__framework__proto__op_desc__var__descriptor) \ + , NULL, 0, NULL \ + } + +/* + * OpDesc describes an instance of a C++ framework::OperatorBase + * derived class type. + */ +struct _PaddleMobile__Framework__Proto__OpDesc { + ProtobufCMessage base; + char *type; + size_t n_inputs; + PaddleMobile__Framework__Proto__OpDesc__Var **inputs; + size_t n_outputs; + PaddleMobile__Framework__Proto__OpDesc__Var **outputs; + size_t n_attrs; + PaddleMobile__Framework__Proto__OpDesc__Attr **attrs; + protobuf_c_boolean has_is_target; + protobuf_c_boolean is_target; +}; +#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_DESC__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT( \ + &paddle_mobile__framework__proto__op_desc__descriptor) \ + , NULL, 0, NULL, 0, NULL, 0, NULL, 0, 0 \ + } + +/* + * VarProto describes the C++ type framework::Variable. + */ +struct _PaddleMobile__Framework__Proto__OpProto__Var { + ProtobufCMessage base; + char *name; + char *comment; + protobuf_c_boolean has_duplicable; + protobuf_c_boolean duplicable; + protobuf_c_boolean has_intermediate; + protobuf_c_boolean intermediate; + protobuf_c_boolean has_dispensable; + protobuf_c_boolean dispensable; +}; +#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__VAR__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT( \ + &paddle_mobile__framework__proto__op_proto__var__descriptor) \ + , NULL, NULL, 0, 0, 0, 0, 0, 0 \ + } + +/* + * AttrProto describes the C++ type Attribute. + */ +struct _PaddleMobile__Framework__Proto__OpProto__Attr { + ProtobufCMessage base; + char *name; + PaddleMobile__Framework__Proto__AttrType type; + char *comment; + /* + * If that attribute is generated, it means the Paddle third + * language binding has responsibility to fill that + * attribute. End-User should not set that attribute. + */ + protobuf_c_boolean has_generated; + protobuf_c_boolean generated; +}; +#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__ATTR__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT( \ + &paddle_mobile__framework__proto__op_proto__attr__descriptor) \ + , NULL, PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT, NULL, 0, 0 \ + } + +/* + * OpProto describes a C++ framework::OperatorBase derived class. + */ +struct _PaddleMobile__Framework__Proto__OpProto { + ProtobufCMessage base; + char *type; + size_t n_inputs; + PaddleMobile__Framework__Proto__OpProto__Var **inputs; + size_t n_outputs; + PaddleMobile__Framework__Proto__OpProto__Var **outputs; + size_t n_attrs; + PaddleMobile__Framework__Proto__OpProto__Attr **attrs; + char *comment; +}; +#define PADDLE_MOBILE__FRAMEWORK__PROTO__OP_PROTO__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT( \ + &paddle_mobile__framework__proto__op_proto__descriptor) \ + , NULL, 0, NULL, 0, NULL, 0, NULL, NULL \ + } + +struct _PaddleMobile__Framework__Proto__VarType__TensorDesc { + ProtobufCMessage base; + /* + * Should only be PODType. Is enforced in C++ + */ + PaddleMobile__Framework__Proto__VarType__Type data_type; + /* + * [UNK, 640, 480] is saved as [-1, 640, 480] + */ + size_t n_dims; + int64_t *dims; +}; +#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TENSOR_DESC__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT( \ + &paddle_mobile__framework__proto__var_type__tensor_desc__descriptor) \ + , PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__BOOL, 0, NULL \ + } + +struct _PaddleMobile__Framework__Proto__VarType__LoDTensorDesc { + ProtobufCMessage base; + PaddleMobile__Framework__Proto__VarType__TensorDesc *tensor; + protobuf_c_boolean has_lod_level; + int32_t lod_level; +}; +#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__LO_DTENSOR_DESC__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT( \ + &paddle_mobile__framework__proto__var_type__lo_dtensor_desc__descriptor) \ + , NULL, 0, 0 \ + } + +struct _PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc { + ProtobufCMessage base; + PaddleMobile__Framework__Proto__VarType__TensorDesc *tensor; + protobuf_c_boolean has_lod_level; + int32_t lod_level; +}; +#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__LO_DTENSOR_ARRAY_DESC__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT( \ + &paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__descriptor) \ + , NULL, 0, 0 \ + } + +struct _PaddleMobile__Framework__Proto__VarType__ReaderDesc { + ProtobufCMessage base; + size_t n_lod_tensor; + PaddleMobile__Framework__Proto__VarType__LoDTensorDesc **lod_tensor; +}; +#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__READER_DESC__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT( \ + &paddle_mobile__framework__proto__var_type__reader_desc__descriptor) \ + , 0, NULL \ + } + +struct _PaddleMobile__Framework__Proto__VarType__ChannelDesc { + ProtobufCMessage base; + PaddleMobile__Framework__Proto__VarType__Type data_type; + int64_t capacity; +}; +#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__CHANNEL_DESC__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT( \ + &paddle_mobile__framework__proto__var_type__channel_desc__descriptor) \ + , PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__BOOL, 0 \ + } + +struct _PaddleMobile__Framework__Proto__VarType__Tuple { + ProtobufCMessage base; + size_t n_element_type; + PaddleMobile__Framework__Proto__VarType__Type *element_type; +}; +#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TUPLE__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT( \ + &paddle_mobile__framework__proto__var_type__tuple__descriptor) \ + , 0, NULL \ + } + +struct _PaddleMobile__Framework__Proto__VarType { + ProtobufCMessage base; + PaddleMobile__Framework__Proto__VarType__Type type; + PaddleMobile__Framework__Proto__VarType__TensorDesc *selected_rows; + PaddleMobile__Framework__Proto__VarType__LoDTensorDesc *lod_tensor; + PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc *tensor_array; + PaddleMobile__Framework__Proto__VarType__ReaderDesc *reader; + PaddleMobile__Framework__Proto__VarType__ChannelDesc *channel; + PaddleMobile__Framework__Proto__VarType__Tuple *tuple; +}; +#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT( \ + &paddle_mobile__framework__proto__var_type__descriptor) \ + , PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__BOOL, NULL, NULL, NULL, \ + NULL, NULL, NULL \ + } + +struct _PaddleMobile__Framework__Proto__VarDesc { + ProtobufCMessage base; + char *name; + PaddleMobile__Framework__Proto__VarType *type; + protobuf_c_boolean has_persistable; + protobuf_c_boolean persistable; +}; +#define PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_DESC__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT( \ + &paddle_mobile__framework__proto__var_desc__descriptor) \ + , NULL, NULL, 0, 0 \ + } + +struct _PaddleMobile__Framework__Proto__BlockDesc { + ProtobufCMessage base; + int32_t idx; + int32_t parent_idx; + size_t n_vars; + PaddleMobile__Framework__Proto__VarDesc **vars; + size_t n_ops; + PaddleMobile__Framework__Proto__OpDesc **ops; + protobuf_c_boolean has_forward_block_idx; + int32_t forward_block_idx; +}; +#define PADDLE_MOBILE__FRAMEWORK__PROTO__BLOCK_DESC__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT( \ + &paddle_mobile__framework__proto__block_desc__descriptor) \ + , 0, 0, 0, NULL, 0, NULL, 0, -1 \ + } + +/* + * Please refer to + * https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/program.md + * for more details. + * TODO(panyx0718): A model can have multiple programs. Need a + * way to distinguish them. Maybe ID or name? + */ +struct _PaddleMobile__Framework__Proto__ProgramDesc { + ProtobufCMessage base; + size_t n_blocks; + PaddleMobile__Framework__Proto__BlockDesc **blocks; +}; +#define PADDLE_MOBILE__FRAMEWORK__PROTO__PROGRAM_DESC__INIT \ + { \ + PROTOBUF_C_MESSAGE_INIT( \ + &paddle_mobile__framework__proto__program_desc__descriptor) \ + , 0, NULL \ + } + +/* PaddleMobile__Framework__Proto__OpDesc__Attr methods */ +void paddle_mobile__framework__proto__op_desc__attr__init( + PaddleMobile__Framework__Proto__OpDesc__Attr *message); +/* PaddleMobile__Framework__Proto__OpDesc__Var methods */ +void paddle_mobile__framework__proto__op_desc__var__init( + PaddleMobile__Framework__Proto__OpDesc__Var *message); +/* PaddleMobile__Framework__Proto__OpDesc methods */ +void paddle_mobile__framework__proto__op_desc__init( + PaddleMobile__Framework__Proto__OpDesc *message); + +size_t paddle_mobile__framework__proto__op_desc__get_packed_size( + const PaddleMobile__Framework__Proto__OpDesc *message); + +PaddleMobile__Framework__Proto__OpDesc * +paddle_mobile__framework__proto__op_desc__unpack(ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void paddle_mobile__framework__proto__op_desc__free_unpacked( + PaddleMobile__Framework__Proto__OpDesc *message, + ProtobufCAllocator *allocator); +/* PaddleMobile__Framework__Proto__OpProto__Var methods */ +void paddle_mobile__framework__proto__op_proto__var__init( + PaddleMobile__Framework__Proto__OpProto__Var *message); +/* PaddleMobile__Framework__Proto__OpProto__Attr methods */ +void paddle_mobile__framework__proto__op_proto__attr__init( + PaddleMobile__Framework__Proto__OpProto__Attr *message); +/* PaddleMobile__Framework__Proto__OpProto methods */ +void paddle_mobile__framework__proto__op_proto__init( + PaddleMobile__Framework__Proto__OpProto *message); +size_t paddle_mobile__framework__proto__op_proto__get_packed_size( + const PaddleMobile__Framework__Proto__OpProto *message); +PaddleMobile__Framework__Proto__OpProto * +paddle_mobile__framework__proto__op_proto__unpack(ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void paddle_mobile__framework__proto__op_proto__free_unpacked( + PaddleMobile__Framework__Proto__OpProto *message, + ProtobufCAllocator *allocator); +/* PaddleMobile__Framework__Proto__VarType__TensorDesc methods */ +void paddle_mobile__framework__proto__var_type__tensor_desc__init( + PaddleMobile__Framework__Proto__VarType__TensorDesc *message); +/* PaddleMobile__Framework__Proto__VarType__LoDTensorDesc methods */ +void paddle_mobile__framework__proto__var_type__lo_dtensor_desc__init( + PaddleMobile__Framework__Proto__VarType__LoDTensorDesc *message); +/* PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc methods */ +void paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__init( + PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc *message); +/* PaddleMobile__Framework__Proto__VarType__ReaderDesc methods */ +void paddle_mobile__framework__proto__var_type__reader_desc__init( + PaddleMobile__Framework__Proto__VarType__ReaderDesc *message); +/* PaddleMobile__Framework__Proto__VarType__ChannelDesc methods */ +void paddle_mobile__framework__proto__var_type__channel_desc__init( + PaddleMobile__Framework__Proto__VarType__ChannelDesc *message); +/* PaddleMobile__Framework__Proto__VarType__Tuple methods */ +void paddle_mobile__framework__proto__var_type__tuple__init( + PaddleMobile__Framework__Proto__VarType__Tuple *message); +/* PaddleMobile__Framework__Proto__VarType methods */ +void paddle_mobile__framework__proto__var_type__init( + PaddleMobile__Framework__Proto__VarType *message); +size_t paddle_mobile__framework__proto__var_type__get_packed_size( + const PaddleMobile__Framework__Proto__VarType *message); +PaddleMobile__Framework__Proto__VarType * +paddle_mobile__framework__proto__var_type__unpack(ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void paddle_mobile__framework__proto__var_type__free_unpacked( + PaddleMobile__Framework__Proto__VarType *message, + ProtobufCAllocator *allocator); +/* PaddleMobile__Framework__Proto__VarDesc methods */ +void paddle_mobile__framework__proto__var_desc__init( + PaddleMobile__Framework__Proto__VarDesc *message); +size_t paddle_mobile__framework__proto__var_desc__get_packed_size( + const PaddleMobile__Framework__Proto__VarDesc *message); +PaddleMobile__Framework__Proto__VarDesc * +paddle_mobile__framework__proto__var_desc__unpack(ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +void paddle_mobile__framework__proto__var_desc__free_unpacked( + PaddleMobile__Framework__Proto__VarDesc *message, + ProtobufCAllocator *allocator); +/* PaddleMobile__Framework__Proto__BlockDesc methods */ +void paddle_mobile__framework__proto__block_desc__init( + PaddleMobile__Framework__Proto__BlockDesc *message); +size_t paddle_mobile__framework__proto__block_desc__get_packed_size( + const PaddleMobile__Framework__Proto__BlockDesc *message); +PaddleMobile__Framework__Proto__BlockDesc * +paddle_mobile__framework__proto__block_desc__unpack( + ProtobufCAllocator *allocator, size_t len, const uint8_t *data); +void paddle_mobile__framework__proto__block_desc__free_unpacked( + PaddleMobile__Framework__Proto__BlockDesc *message, + ProtobufCAllocator *allocator); +/* PaddleMobile__Framework__Proto__ProgramDesc methods */ +void paddle_mobile__framework__proto__program_desc__init( + PaddleMobile__Framework__Proto__ProgramDesc *message); +size_t paddle_mobile__framework__proto__program_desc__get_packed_size( + const PaddleMobile__Framework__Proto__ProgramDesc *message); +PaddleMobile__Framework__Proto__ProgramDesc * +paddle_mobile__framework__proto__program_desc__unpack( + ProtobufCAllocator *allocator, size_t len, const uint8_t *data); +void paddle_mobile__framework__proto__program_desc__free_unpacked( + PaddleMobile__Framework__Proto__ProgramDesc *message, + ProtobufCAllocator *allocator); +/* --- per-message closures --- */ + +typedef void (*PaddleMobile__Framework__Proto__OpDesc__Attr_Closure)( + const PaddleMobile__Framework__Proto__OpDesc__Attr *message, + void *closure_data); +typedef void (*PaddleMobile__Framework__Proto__OpDesc__Var_Closure)( + const PaddleMobile__Framework__Proto__OpDesc__Var *message, + void *closure_data); +typedef void (*PaddleMobile__Framework__Proto__OpDesc_Closure)( + const PaddleMobile__Framework__Proto__OpDesc *message, void *closure_data); +typedef void (*PaddleMobile__Framework__Proto__OpProto__Var_Closure)( + const PaddleMobile__Framework__Proto__OpProto__Var *message, + void *closure_data); +typedef void (*PaddleMobile__Framework__Proto__OpProto__Attr_Closure)( + const PaddleMobile__Framework__Proto__OpProto__Attr *message, + void *closure_data); +typedef void (*PaddleMobile__Framework__Proto__OpProto_Closure)( + const PaddleMobile__Framework__Proto__OpProto *message, void *closure_data); +typedef void (*PaddleMobile__Framework__Proto__VarType__TensorDesc_Closure)( + const PaddleMobile__Framework__Proto__VarType__TensorDesc *message, + void *closure_data); +typedef void (*PaddleMobile__Framework__Proto__VarType__LoDTensorDesc_Closure)( + const PaddleMobile__Framework__Proto__VarType__LoDTensorDesc *message, + void *closure_data); +typedef void ( + *PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc_Closure)( + const PaddleMobile__Framework__Proto__VarType__LoDTensorArrayDesc *message, + void *closure_data); +typedef void (*PaddleMobile__Framework__Proto__VarType__ReaderDesc_Closure)( + const PaddleMobile__Framework__Proto__VarType__ReaderDesc *message, + void *closure_data); +typedef void (*PaddleMobile__Framework__Proto__VarType__ChannelDesc_Closure)( + const PaddleMobile__Framework__Proto__VarType__ChannelDesc *message, + void *closure_data); +typedef void (*PaddleMobile__Framework__Proto__VarType__Tuple_Closure)( + const PaddleMobile__Framework__Proto__VarType__Tuple *message, + void *closure_data); +typedef void (*PaddleMobile__Framework__Proto__VarType_Closure)( + const PaddleMobile__Framework__Proto__VarType *message, void *closure_data); +typedef void (*PaddleMobile__Framework__Proto__VarDesc_Closure)( + const PaddleMobile__Framework__Proto__VarDesc *message, void *closure_data); +typedef void (*PaddleMobile__Framework__Proto__BlockDesc_Closure)( + const PaddleMobile__Framework__Proto__BlockDesc *message, + void *closure_data); +typedef void (*PaddleMobile__Framework__Proto__ProgramDesc_Closure)( + const PaddleMobile__Framework__Proto__ProgramDesc *message, + void *closure_data); + +/* --- services --- */ + +/* --- descriptors --- */ + +extern const ProtobufCEnumDescriptor + paddle_mobile__framework__proto__attr_type__descriptor; +extern const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__op_desc__descriptor; +extern const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__op_desc__attr__descriptor; +extern const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__op_desc__var__descriptor; +extern const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__op_proto__descriptor; +extern const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__op_proto__var__descriptor; +extern const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__op_proto__attr__descriptor; +extern const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__var_type__descriptor; +extern const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__var_type__tensor_desc__descriptor; +extern const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__var_type__lo_dtensor_desc__descriptor; +extern const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__var_type__lo_dtensor_array_desc__descriptor; +extern const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__var_type__reader_desc__descriptor; +extern const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__var_type__channel_desc__descriptor; +extern const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__var_type__tuple__descriptor; +extern const ProtobufCEnumDescriptor + paddle_mobile__framework__proto__var_type__type__descriptor; +extern const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__var_desc__descriptor; +extern const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__block_desc__descriptor; +extern const ProtobufCMessageDescriptor + paddle_mobile__framework__proto__program_desc__descriptor; + +PROTOBUF_C__END_DECLS + +#endif /* PROTOBUF_C_framework_2eproto__INCLUDED */ diff --git a/tools/quantification/src/program_desc.cpp b/tools/quantification/src/program_desc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4f9984832ada5061c7691aeb7fadba86cb5b8c0c --- /dev/null +++ b/tools/quantification/src/program_desc.cpp @@ -0,0 +1,30 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +// +// Created by 谢柏渊 on 2018/7/25. +// + +#include "src/program_desc.h" +#include + +ProgramDesc::ProgramDesc(PaddleMobile__Framework__Proto__ProgramDesc *desc) { + for (int i = 0; i < desc->n_blocks; ++i) { + blocks_.emplace_back(std::make_shared(desc->blocks[i])); + } +} + +const std::vector> ProgramDesc::Blocks() { + return blocks_; +} diff --git a/tools/quantification/src/program_desc.h b/tools/quantification/src/program_desc.h new file mode 100644 index 0000000000000000000000000000000000000000..60a0f757b0c907165d7639a41e35a407ef083b59 --- /dev/null +++ b/tools/quantification/src/program_desc.h @@ -0,0 +1,41 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +// +// Created by 谢柏渊 on 2018/7/25. +// + +#ifndef TOOLS_QUANTIFICATION_SRC_PROGRAM_DESC_H_ +#define TOOLS_QUANTIFICATION_SRC_PROGRAM_DESC_H_ + +#include +#include +#include "src/block_desc_local.h" +#include "src/framework.pb-c.h" + +class ProgramDesc { + public: + // friend class Node; + // + // friend class ProgramOptimize; + + explicit ProgramDesc(PaddleMobile__Framework__Proto__ProgramDesc *desc); + + const std::vector> Blocks(); + + private: + std::vector> blocks_; +}; + +#endif // TOOLS_QUANTIFICATION_SRC_PROGRAM_DESC_H_ diff --git a/tools/quantification/src/protobuf-c.c b/tools/quantification/src/protobuf-c.c new file mode 100644 index 0000000000000000000000000000000000000000..1092e3f78b02a343d8c8965ea7b2d777a6fac9ae --- /dev/null +++ b/tools/quantification/src/protobuf-c.c @@ -0,0 +1,2098 @@ +/* + * Copyright (c) 2008-2015, Dave Benson and the protobuf-c authors. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/*! \file + * Support library for `protoc-c` generated code. + * + * This file implements the public API used by the code generated + * by `protoc-c`. + * + * \authors Dave Benson and the protobuf-c authors + * + * \copyright 2008-2014. Licensed under the terms of the [BSD-2-Clause] license. + */ + +/** + * \todo 64-BIT OPTIMIZATION: certain implementations use 32-bit math + * even on 64-bit platforms (uint64_size, uint64_pack, parse_uint64). + * + * \todo Use size_t consistently. + */ + +#include /* for malloc, free */ +#include /* for strcmp, strlen, memcpy, memmove, memset */ + +#include "protobuf-c.h" + +#define TRUE 1 +#define FALSE 0 + +#define PROTOBUF_C__ASSERT_NOT_REACHED() assert(0) + +/* Workaround for Microsoft compilers. */ +#ifdef _MSC_VER +#define inline __inline +#endif + +/** + * \defgroup internal Internal functions and macros + * + * These are not exported by the library but are useful to developers working + * on `libprotobuf-c` itself. + */ + +/** + * \defgroup macros Utility macros for manipulating structures + * + * Macros and constants used to manipulate the base "classes" generated by + * `protobuf-c`. They also define limits and check correctness. + * + * \ingroup internal + * @{ + */ + +/** The maximum length of a 64-bit integer in varint encoding. */ +#define MAX_UINT64_ENCODED_SIZE 10 + +#ifndef PROTOBUF_C_UNPACK_ERROR +#define PROTOBUF_C_UNPACK_ERROR(...) +#endif + +const char protobuf_c_empty_string[] = ""; + +/** + * Internal `ProtobufCMessage` manipulation macro. + * + * Base macro for manipulating a `ProtobufCMessage`. Used by STRUCT_MEMBER() and + * STRUCT_MEMBER_PTR(). + */ +#define STRUCT_MEMBER_P(struct_p, struct_offset) \ + ((void *)((uint8_t *)(struct_p) + (struct_offset))) + +/** + * Return field in a `ProtobufCMessage` based on offset. + * + * Take a pointer to a `ProtobufCMessage` and find the field at the offset. + * Cast it to the passed type. + */ +#define STRUCT_MEMBER(member_type, struct_p, struct_offset) \ + (*(member_type *)STRUCT_MEMBER_P((struct_p), (struct_offset))) + +/** + * Return field in a `ProtobufCMessage` based on offset. + * + * Take a pointer to a `ProtobufCMessage` and find the field at the offset. Cast + * it to a pointer to the passed type. + */ +#define STRUCT_MEMBER_PTR(member_type, struct_p, struct_offset) \ + ((member_type *)STRUCT_MEMBER_P((struct_p), (struct_offset))) + +/* Assertions for magic numbers. */ + +#define ASSERT_IS_ENUM_DESCRIPTOR(desc) \ + assert((desc)->magic == PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC) + +#define ASSERT_IS_MESSAGE_DESCRIPTOR(desc) \ + assert((desc)->magic == PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC) + +#define ASSERT_IS_MESSAGE(message) \ + ASSERT_IS_MESSAGE_DESCRIPTOR((message)->descriptor) + +#define ASSERT_IS_SERVICE_DESCRIPTOR(desc) \ + assert((desc)->magic == PROTOBUF_C__SERVICE_DESCRIPTOR_MAGIC) + +/**@}*/ + +/* --- version --- */ + +const char *protobuf_c_version(void) { return PROTOBUF_C_VERSION; } + +uint32_t protobuf_c_version_number(void) { return PROTOBUF_C_VERSION_NUMBER; } + +/* --- allocator --- */ + +static void *system_alloc(void *allocator_data, size_t size) { + return malloc(size); +} + +static void system_free(void *allocator_data, void *data) { free(data); } + +static inline void *do_alloc(ProtobufCAllocator *allocator, size_t size) { + return allocator->alloc(allocator->allocator_data, size); +} + +static inline void do_free(ProtobufCAllocator *allocator, void *data) { + if (data != NULL) allocator->free(allocator->allocator_data, data); +} + +/* + * This allocator uses the system's malloc() and free(). It is the default + * allocator used if NULL is passed as the ProtobufCAllocator to an exported + * function. + */ +static ProtobufCAllocator protobuf_c__allocator = { + .alloc = &system_alloc, + .free = &system_free, + .allocator_data = NULL, +}; + +/* === buffer-simple === */ + +void protobuf_c_buffer_simple_append(ProtobufCBuffer *buffer, size_t len, + const uint8_t *data) { + ProtobufCBufferSimple *simp = (ProtobufCBufferSimple *)buffer; + size_t new_len = simp->len + len; + + if (new_len > simp->alloced) { + ProtobufCAllocator *allocator = simp->allocator; + size_t new_alloced = simp->alloced * 2; + uint8_t *new_data; + + if (allocator == NULL) allocator = &protobuf_c__allocator; + while (new_alloced < new_len) new_alloced += new_alloced; + new_data = do_alloc(allocator, new_alloced); + if (!new_data) return; + memcpy(new_data, simp->data, simp->len); + if (simp->must_free_data) + do_free(allocator, simp->data); + else + simp->must_free_data = TRUE; + simp->data = new_data; + simp->alloced = new_alloced; + } + memcpy(simp->data + simp->len, data, len); + simp->len = new_len; +} + +/** + * \defgroup packedsz protobuf_c_message_get_packed_size() implementation + * + * Routines mainly used by protobuf_c_message_get_packed_size(). + * + * \ingroup internal + * @{ + */ + +/** + * Return the number of bytes required to store the tag for the field. Includes + * 3 bits for the wire-type, and a single bit that denotes the end-of-tag. + * + * \param number + * Field tag to encode. + * \return + * Number of bytes required. + */ +static inline size_t get_tag_size(uint32_t number) { + if (number < (1UL << 4)) { + return 1; + } else if (number < (1UL << 11)) { + return 2; + } else if (number < (1UL << 18)) { + return 3; + } else if (number < (1UL << 25)) { + return 4; + } else { + return 5; + } +} + +/** + * Return the number of bytes required to store a variable-length unsigned + * 32-bit integer in base-128 varint encoding. + * + * \param v + * Value to encode. + * \return + * Number of bytes required. + */ +static inline size_t uint32_size(uint32_t v) { + if (v < (1UL << 7)) { + return 1; + } else if (v < (1UL << 14)) { + return 2; + } else if (v < (1UL << 21)) { + return 3; + } else if (v < (1UL << 28)) { + return 4; + } else { + return 5; + } +} + +/** + * Return the number of bytes required to store a variable-length signed 32-bit + * integer in base-128 varint encoding. + * + * \param v + * Value to encode. + * \return + * Number of bytes required. + */ +static inline size_t int32_size(int32_t v) { + if (v < 0) { + return 10; + } else if (v < (1L << 7)) { + return 1; + } else if (v < (1L << 14)) { + return 2; + } else if (v < (1L << 21)) { + return 3; + } else if (v < (1L << 28)) { + return 4; + } else { + return 5; + } +} + +/** + * Return the ZigZag-encoded 32-bit unsigned integer form of a 32-bit signed + * integer. + * + * \param v + * Value to encode. + * \return + * ZigZag encoded integer. + */ +static inline uint32_t zigzag32(int32_t v) { + if (v < 0) + return (-(uint32_t)v) * 2 - 1; + else + return (uint32_t)(v)*2; +} + +/** + * Return the number of bytes required to store a signed 32-bit integer, + * converted to an unsigned 32-bit integer with ZigZag encoding, using base-128 + * varint encoding. + * + * \param v + * Value to encode. + * \return + * Number of bytes required. + */ +static inline size_t sint32_size(int32_t v) { return uint32_size(zigzag32(v)); } + +/** + * Return the number of bytes required to store a 64-bit unsigned integer in + * base-128 varint encoding. + * + * \param v + * Value to encode. + * \return + * Number of bytes required. + */ +static inline size_t uint64_size(uint64_t v) { + uint32_t upper_v = (uint32_t)(v >> 32); + + if (upper_v == 0) { + return uint32_size((uint32_t)v); + } else if (upper_v < (1UL << 3)) { + return 5; + } else if (upper_v < (1UL << 10)) { + return 6; + } else if (upper_v < (1UL << 17)) { + return 7; + } else if (upper_v < (1UL << 24)) { + return 8; + } else if (upper_v < (1UL << 31)) { + return 9; + } else { + return 10; + } +} + +/** + * Return the ZigZag-encoded 64-bit unsigned integer form of a 64-bit signed + * integer. + * + * \param v + * Value to encode. + * \return + * ZigZag encoded integer. + */ +static inline uint64_t zigzag64(int64_t v) { + if (v < 0) + return (-(uint64_t)v) * 2 - 1; + else + return (uint64_t)(v)*2; +} + +/** + * Return the number of bytes required to store a signed 64-bit integer, + * converted to an unsigned 64-bit integer with ZigZag encoding, using base-128 + * varint encoding. + * + * \param v + * Value to encode. + * \return + * Number of bytes required. + */ +static inline size_t sint64_size(int64_t v) { return uint64_size(zigzag64(v)); } + +/** + * Calculate the serialized size of a single required message field, including + * the space needed by the preceding tag. + * + * \param field + * Field descriptor for member. + * \param member + * Field to encode. + * \return + * Number of bytes required. + */ +static size_t required_field_get_packed_size( + const ProtobufCFieldDescriptor *field, const void *member) { + size_t rv = get_tag_size(field->id); + + switch (field->type) { + case PROTOBUF_C_TYPE_SINT32: + return rv + sint32_size(*(const int32_t *)member); + case PROTOBUF_C_TYPE_ENUM: + case PROTOBUF_C_TYPE_INT32: + return rv + int32_size(*(const int32_t *)member); + case PROTOBUF_C_TYPE_UINT32: + return rv + uint32_size(*(const uint32_t *)member); + case PROTOBUF_C_TYPE_SINT64: + return rv + sint64_size(*(const int64_t *)member); + case PROTOBUF_C_TYPE_INT64: + case PROTOBUF_C_TYPE_UINT64: + return rv + uint64_size(*(const uint64_t *)member); + case PROTOBUF_C_TYPE_SFIXED32: + case PROTOBUF_C_TYPE_FIXED32: + return rv + 4; + case PROTOBUF_C_TYPE_SFIXED64: + case PROTOBUF_C_TYPE_FIXED64: + return rv + 8; + case PROTOBUF_C_TYPE_BOOL: + return rv + 1; + case PROTOBUF_C_TYPE_FLOAT: + return rv + 4; + case PROTOBUF_C_TYPE_DOUBLE: + return rv + 8; + case PROTOBUF_C_TYPE_STRING: { + const char *str = *(char *const *)member; + size_t len = str ? strlen(str) : 0; + return rv + uint32_size(len) + len; + } + case PROTOBUF_C_TYPE_BYTES: { + size_t len = ((const ProtobufCBinaryData *)member)->len; + return rv + uint32_size(len) + len; + } + case PROTOBUF_C_TYPE_MESSAGE: { + const ProtobufCMessage *msg = *(ProtobufCMessage *const *)member; + size_t subrv = msg ? protobuf_c_message_get_packed_size(msg) : 0; + return rv + uint32_size(subrv) + subrv; + } + } + PROTOBUF_C__ASSERT_NOT_REACHED(); + return 0; +} + +/** + * Calculate the serialized size of a single oneof message field, including + * the space needed by the preceding tag. Returns 0 if the oneof field isn't + * selected or is not set. + * + * \param field + * Field descriptor for member. + * \param oneof_case + * Enum value that selects the field in the oneof. + * \param member + * Field to encode. + * \return + * Number of bytes required. + */ +static size_t oneof_field_get_packed_size(const ProtobufCFieldDescriptor *field, + uint32_t oneof_case, + const void *member) { + if (oneof_case != field->id) { + return 0; + } + if (field->type == PROTOBUF_C_TYPE_MESSAGE || + field->type == PROTOBUF_C_TYPE_STRING) { + const void *ptr = *(const void *const *)member; + if (ptr == NULL || ptr == field->default_value) return 0; + } + return required_field_get_packed_size(field, member); +} + +/** + * Calculate the serialized size of a single optional message field, including + * the space needed by the preceding tag. Returns 0 if the optional field isn't + * set. + * + * \param field + * Field descriptor for member. + * \param has + * True if the field exists, false if not. + * \param member + * Field to encode. + * \return + * Number of bytes required. + */ +static size_t optional_field_get_packed_size( + const ProtobufCFieldDescriptor *field, const protobuf_c_boolean has, + const void *member) { + if (field->type == PROTOBUF_C_TYPE_MESSAGE || + field->type == PROTOBUF_C_TYPE_STRING) { + const void *ptr = *(const void *const *)member; + if (ptr == NULL || ptr == field->default_value) return 0; + } else { + if (!has) return 0; + } + return required_field_get_packed_size(field, member); +} + +static protobuf_c_boolean field_is_zeroish( + const ProtobufCFieldDescriptor *field, const void *member) { + protobuf_c_boolean ret = FALSE; + + switch (field->type) { + case PROTOBUF_C_TYPE_BOOL: + ret = (0 == *(const protobuf_c_boolean *)member); + break; + case PROTOBUF_C_TYPE_ENUM: + case PROTOBUF_C_TYPE_SINT32: + case PROTOBUF_C_TYPE_INT32: + case PROTOBUF_C_TYPE_UINT32: + case PROTOBUF_C_TYPE_SFIXED32: + case PROTOBUF_C_TYPE_FIXED32: + ret = (0 == *(const uint32_t *)member); + break; + case PROTOBUF_C_TYPE_SINT64: + case PROTOBUF_C_TYPE_INT64: + case PROTOBUF_C_TYPE_UINT64: + case PROTOBUF_C_TYPE_SFIXED64: + case PROTOBUF_C_TYPE_FIXED64: + ret = (0 == *(const uint64_t *)member); + break; + case PROTOBUF_C_TYPE_FLOAT: + ret = (0 == *(const float *)member); + break; + case PROTOBUF_C_TYPE_DOUBLE: + ret = (0 == *(const double *)member); + break; + case PROTOBUF_C_TYPE_STRING: + ret = (NULL == *(const char *const *)member) || + ('\0' == **(const char *const *)member); + break; + case PROTOBUF_C_TYPE_BYTES: + case PROTOBUF_C_TYPE_MESSAGE: + ret = (NULL == *(const void *const *)member); + break; + default: + ret = TRUE; + break; + } + + return ret; +} + +/** + * Calculate the serialized size of a single unlabeled message field, including + * the space needed by the preceding tag. Returns 0 if the field isn't set or + * if it is set to a "zeroish" value (null pointer or 0 for numerical values). + * Unlabeled fields are supported only in proto3. + * + * \param field + * Field descriptor for member. + * \param member + * Field to encode. + * \return + * Number of bytes required. + */ +static size_t unlabeled_field_get_packed_size( + const ProtobufCFieldDescriptor *field, const void *member) { + if (field_is_zeroish(field, member)) return 0; + return required_field_get_packed_size(field, member); +} + +/** + * Calculate the serialized size of repeated message fields, which may consist + * of any number of values (including 0). Includes the space needed by the + * preceding tags (as needed). + * + * \param field + * Field descriptor for member. + * \param count + * Number of repeated field members. + * \param member + * Field to encode. + * \return + * Number of bytes required. + */ +static size_t repeated_field_get_packed_size( + const ProtobufCFieldDescriptor *field, size_t count, const void *member) { + size_t header_size; + size_t rv = 0; + unsigned i; + void *array = *(void *const *)member; + + if (count == 0) return 0; + header_size = get_tag_size(field->id); + if (0 == (field->flags & PROTOBUF_C_FIELD_FLAG_PACKED)) header_size *= count; + + switch (field->type) { + case PROTOBUF_C_TYPE_SINT32: + for (i = 0; i < count; i++) rv += sint32_size(((int32_t *)array)[i]); + break; + case PROTOBUF_C_TYPE_ENUM: + case PROTOBUF_C_TYPE_INT32: + for (i = 0; i < count; i++) rv += int32_size(((int32_t *)array)[i]); + break; + case PROTOBUF_C_TYPE_UINT32: + for (i = 0; i < count; i++) rv += uint32_size(((uint32_t *)array)[i]); + break; + case PROTOBUF_C_TYPE_SINT64: + for (i = 0; i < count; i++) rv += sint64_size(((int64_t *)array)[i]); + break; + case PROTOBUF_C_TYPE_INT64: + case PROTOBUF_C_TYPE_UINT64: + for (i = 0; i < count; i++) rv += uint64_size(((uint64_t *)array)[i]); + break; + case PROTOBUF_C_TYPE_SFIXED32: + case PROTOBUF_C_TYPE_FIXED32: + case PROTOBUF_C_TYPE_FLOAT: + rv += 4 * count; + break; + case PROTOBUF_C_TYPE_SFIXED64: + case PROTOBUF_C_TYPE_FIXED64: + case PROTOBUF_C_TYPE_DOUBLE: + rv += 8 * count; + break; + case PROTOBUF_C_TYPE_BOOL: + rv += count; + break; + case PROTOBUF_C_TYPE_STRING: + for (i = 0; i < count; i++) { + size_t len = strlen(((char **)array)[i]); + rv += uint32_size(len) + len; + } + break; + case PROTOBUF_C_TYPE_BYTES: + for (i = 0; i < count; i++) { + size_t len = ((ProtobufCBinaryData *)array)[i].len; + rv += uint32_size(len) + len; + } + break; + case PROTOBUF_C_TYPE_MESSAGE: + for (i = 0; i < count; i++) { + size_t len = + protobuf_c_message_get_packed_size(((ProtobufCMessage **)array)[i]); + rv += uint32_size(len) + len; + } + break; + } + + if (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_PACKED)) + header_size += uint32_size(rv); + return header_size + rv; +} + +/** + * Calculate the serialized size of an unknown field, i.e. one that is passed + * through mostly uninterpreted. This is required for forward compatibility if + * new fields are added to the message descriptor. + * + * \param field + * Unknown field type. + * \return + * Number of bytes required. + */ +static inline size_t unknown_field_get_packed_size( + const ProtobufCMessageUnknownField *field) { + return get_tag_size(field->tag) + field->len; +} + +/**@}*/ + +/* + * Calculate the serialized size of the message. + */ +size_t protobuf_c_message_get_packed_size(const ProtobufCMessage *message) { + unsigned i; + size_t rv = 0; + + ASSERT_IS_MESSAGE(message); + for (i = 0; i < message->descriptor->n_fields; i++) { + const ProtobufCFieldDescriptor *field = message->descriptor->fields + i; + const void *member = ((const char *)message) + field->offset; + const void *qmember = ((const char *)message) + field->quantifier_offset; + + if (field->label == PROTOBUF_C_LABEL_REQUIRED) { + rv += required_field_get_packed_size(field, member); + } else if ((field->label == PROTOBUF_C_LABEL_OPTIONAL || + field->label == PROTOBUF_C_LABEL_NONE) && + (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_ONEOF))) { + rv += oneof_field_get_packed_size(field, *(const uint32_t *)qmember, + member); + } else if (field->label == PROTOBUF_C_LABEL_OPTIONAL) { + rv += optional_field_get_packed_size( + field, *(protobuf_c_boolean *)qmember, member); + } else if (field->label == PROTOBUF_C_LABEL_NONE) { + rv += unlabeled_field_get_packed_size(field, member); + } else { + rv += repeated_field_get_packed_size(field, *(const size_t *)qmember, + member); + } + } + for (i = 0; i < message->n_unknown_fields; i++) + rv += unknown_field_get_packed_size(&message->unknown_fields[i]); + return rv; +} + +/** + * \defgroup pack protobuf_c_message_pack() implementation + * + * Routines mainly used by protobuf_c_message_pack(). + * + * \ingroup internal + * @{ + */ + +/** + * Pack an unsigned 32-bit integer in base-128 varint encoding and return the + * number of bytes written, which must be 5 or less. + * + * \param value + * Value to encode. + * \param[out] out + * Packed value. + * \return + * Number of bytes written to `out`. + */ +static inline size_t uint32_pack(uint32_t value, uint8_t *out) { + unsigned rv = 0; + + if (value >= 0x80) { + out[rv++] = value | 0x80; + value >>= 7; + if (value >= 0x80) { + out[rv++] = value | 0x80; + value >>= 7; + if (value >= 0x80) { + out[rv++] = value | 0x80; + value >>= 7; + if (value >= 0x80) { + out[rv++] = value | 0x80; + value >>= 7; + } + } + } + } + /* assert: value<128 */ + out[rv++] = value; + return rv; +} + +/** + * Pack a 64-bit unsigned integer using base-128 varint encoding and return the + * number of bytes written. + * + * \param value + * Value to encode. + * \param[out] out + * Packed value. + * \return + * Number of bytes written to `out`. + */ +static size_t uint64_pack(uint64_t value, uint8_t *out) { + uint32_t hi = (uint32_t)(value >> 32); + uint32_t lo = (uint32_t)value; + unsigned rv; + + if (hi == 0) return uint32_pack((uint32_t)lo, out); + out[0] = (lo) | 0x80; + out[1] = (lo >> 7) | 0x80; + out[2] = (lo >> 14) | 0x80; + out[3] = (lo >> 21) | 0x80; + if (hi < 8) { + out[4] = (hi << 4) | (lo >> 28); + return 5; + } else { + out[4] = ((hi & 7) << 4) | (lo >> 28) | 0x80; + hi >>= 3; + } + rv = 5; + while (hi >= 128) { + out[rv++] = hi | 0x80; + hi >>= 7; + } + out[rv++] = hi; + return rv; +} + +/** + * Pack a ProtobufCBinaryData and return the number of bytes written. The output + * includes a length delimiter. + * + * \param bd + * ProtobufCBinaryData to encode. + * \param[out] out + * Packed value. + * \return + * Number of bytes written to `out`. + */ +static inline size_t binary_data_pack(const ProtobufCBinaryData *bd, + uint8_t *out) { + size_t len = bd->len; + size_t rv = uint32_pack(len, out); + memcpy(out + rv, bd->data, len); + return rv + len; +} + +/** + * Pack a field tag. + * + * Wire-type will be added in required_field_pack(). + * + * \todo Just call uint64_pack on 64-bit platforms. + * + * \param id + * Tag value to encode. + * \param[out] out + * Packed value. + * \return + * Number of bytes written to `out`. + */ +static size_t tag_pack(uint32_t id, uint8_t *out) { + if (id < (1UL << (32 - 3))) + return uint32_pack(id << 3, out); + else + return uint64_pack(((uint64_t)id) << 3, out); +} + +/** + * Given a field type, return the in-memory size. + * + * \todo Implement as a table lookup. + * + * \param type + * Field type. + * \return + * Size of the field. + */ +static inline size_t sizeof_elt_in_repeated_array(ProtobufCType type) { + switch (type) { + case PROTOBUF_C_TYPE_SINT32: + case PROTOBUF_C_TYPE_INT32: + case PROTOBUF_C_TYPE_UINT32: + case PROTOBUF_C_TYPE_SFIXED32: + case PROTOBUF_C_TYPE_FIXED32: + case PROTOBUF_C_TYPE_FLOAT: + case PROTOBUF_C_TYPE_ENUM: + return 4; + case PROTOBUF_C_TYPE_SINT64: + case PROTOBUF_C_TYPE_INT64: + case PROTOBUF_C_TYPE_UINT64: + case PROTOBUF_C_TYPE_SFIXED64: + case PROTOBUF_C_TYPE_FIXED64: + case PROTOBUF_C_TYPE_DOUBLE: + return 8; + case PROTOBUF_C_TYPE_BOOL: + return sizeof(protobuf_c_boolean); + case PROTOBUF_C_TYPE_STRING: + case PROTOBUF_C_TYPE_MESSAGE: + return sizeof(void *); + case PROTOBUF_C_TYPE_BYTES: + return sizeof(ProtobufCBinaryData); + } + PROTOBUF_C__ASSERT_NOT_REACHED(); + return 0; +} + +static inline int int_range_lookup(unsigned n_ranges, + const ProtobufCIntRange *ranges, int value) { + unsigned n; + unsigned start; + + if (n_ranges == 0) return -1; + start = 0; + n = n_ranges; + while (n > 1) { + unsigned mid = start + n / 2; + + if (value < ranges[mid].start_value) { + n = mid - start; + } else if (value >= + ranges[mid].start_value + + (int)(ranges[mid + 1].orig_index - ranges[mid].orig_index)) { + unsigned new_start = mid + 1; + n = start + n - new_start; + start = new_start; + } else + return (value - ranges[mid].start_value) + ranges[mid].orig_index; + } + if (n > 0) { + unsigned start_orig_index = ranges[start].orig_index; + unsigned range_size = ranges[start + 1].orig_index - start_orig_index; + + if (ranges[start].start_value <= value && + value < (int)(ranges[start].start_value + range_size)) { + return (value - ranges[start].start_value) + start_orig_index; + } + } + return -1; +} + +static size_t parse_tag_and_wiretype(size_t len, const uint8_t *data, + uint32_t *tag_out, + ProtobufCWireType *wiretype_out) { + unsigned max_rv = len > 5 ? 5 : len; + uint32_t tag = (data[0] & 0x7f) >> 3; + unsigned shift = 4; + unsigned rv; + + *wiretype_out = data[0] & 7; + if ((data[0] & 0x80) == 0) { + *tag_out = tag; + return 1; + } + for (rv = 1; rv < max_rv; rv++) { + if (data[rv] & 0x80) { + tag |= (data[rv] & 0x7f) << shift; + shift += 7; + } else { + tag |= data[rv] << shift; + *tag_out = tag; + return rv + 1; + } + } + return 0; /* error: bad header */ +} + +/* sizeof(ScannedMember) must be <= (1UL< len) { + PROTOBUF_C_UNPACK_ERROR("data too short after length-prefix of %u", val); + return 0; + } + return hdr_len + val; +} + +static size_t max_b128_numbers(size_t len, const uint8_t *data) { + size_t rv = 0; + while (len--) + if ((*data++ & 0x80) == 0) ++rv; + return rv; +} + +/**@}*/ + +/** + * Merge earlier message into a latter message. + * + * For numeric types and strings, if the same value appears multiple + * times, the parser accepts the last value it sees. For embedded + * message fields, the parser merges multiple instances of the same + * field. That is, all singular scalar fields in the latter instance + * replace those in the former, singular embedded messages are merged, + * and repeated fields are concatenated. + * + * The earlier message should be freed after calling this function, as + * some of its fields may have been reused and changed to their default + * values during the merge. + */ +static protobuf_c_boolean merge_messages(ProtobufCMessage *earlier_msg, + ProtobufCMessage *latter_msg, + ProtobufCAllocator *allocator) { + unsigned i; + const ProtobufCFieldDescriptor *fields = latter_msg->descriptor->fields; + for (i = 0; i < latter_msg->descriptor->n_fields; i++) { + if (fields[i].label == PROTOBUF_C_LABEL_REPEATED) { + size_t *n_earlier = + STRUCT_MEMBER_PTR(size_t, earlier_msg, fields[i].quantifier_offset); + uint8_t **p_earlier = + STRUCT_MEMBER_PTR(uint8_t *, earlier_msg, fields[i].offset); + size_t *n_latter = + STRUCT_MEMBER_PTR(size_t, latter_msg, fields[i].quantifier_offset); + uint8_t **p_latter = + STRUCT_MEMBER_PTR(uint8_t *, latter_msg, fields[i].offset); + + if (*n_earlier > 0) { + if (*n_latter > 0) { + /* Concatenate the repeated field */ + size_t el_size = sizeof_elt_in_repeated_array(fields[i].type); + uint8_t *new_field; + + new_field = do_alloc(allocator, (*n_earlier + *n_latter) * el_size); + if (!new_field) return FALSE; + + memcpy(new_field, *p_earlier, *n_earlier * el_size); + memcpy(new_field + *n_earlier * el_size, *p_latter, + *n_latter * el_size); + + do_free(allocator, *p_latter); + do_free(allocator, *p_earlier); + *p_latter = new_field; + *n_latter = *n_earlier + *n_latter; + } else { + /* Zero copy the repeated field from the earlier message */ + *n_latter = *n_earlier; + *p_latter = *p_earlier; + } + /* Make sure the field does not get double freed */ + *n_earlier = 0; + *p_earlier = 0; + } + } else if (fields[i].label == PROTOBUF_C_LABEL_OPTIONAL || + fields[i].label == PROTOBUF_C_LABEL_NONE) { + const ProtobufCFieldDescriptor *field; + uint32_t *earlier_case_p = + STRUCT_MEMBER_PTR(uint32_t, earlier_msg, fields[i].quantifier_offset); + uint32_t *latter_case_p = + STRUCT_MEMBER_PTR(uint32_t, latter_msg, fields[i].quantifier_offset); + protobuf_c_boolean need_to_merge = FALSE; + void *earlier_elem; + void *latter_elem; + const void *def_val; + + if (fields[i].flags & PROTOBUF_C_FIELD_FLAG_ONEOF) { + if (*latter_case_p == 0) { + /* lookup correct oneof field */ + int field_index = int_range_lookup( + latter_msg->descriptor->n_field_ranges, + latter_msg->descriptor->field_ranges, *earlier_case_p); + field = latter_msg->descriptor->fields + field_index; + } else { + /* Oneof is present in the latter message, move on */ + continue; + } + } else { + field = &fields[i]; + } + + earlier_elem = STRUCT_MEMBER_P(earlier_msg, field->offset); + latter_elem = STRUCT_MEMBER_P(latter_msg, field->offset); + def_val = field->default_value; + + switch (field->type) { + case PROTOBUF_C_TYPE_MESSAGE: { + ProtobufCMessage *em = *(ProtobufCMessage **)earlier_elem; + ProtobufCMessage *lm = *(ProtobufCMessage **)latter_elem; + if (em != NULL) { + if (lm != NULL) { + if (!merge_messages(em, lm, allocator)) return FALSE; + /* Already merged */ + need_to_merge = FALSE; + } else { + /* Zero copy the message */ + need_to_merge = TRUE; + } + } + break; + } + case PROTOBUF_C_TYPE_BYTES: { + uint8_t *e_data = ((ProtobufCBinaryData *)earlier_elem)->data; + uint8_t *l_data = ((ProtobufCBinaryData *)latter_elem)->data; + const ProtobufCBinaryData *d_bd = (ProtobufCBinaryData *)def_val; + + need_to_merge = + (e_data != NULL && (d_bd == NULL || e_data != d_bd->data)) && + (l_data == NULL || (d_bd != NULL && l_data == d_bd->data)); + break; + } + case PROTOBUF_C_TYPE_STRING: { + char *e_str = *(char **)earlier_elem; + char *l_str = *(char **)latter_elem; + const char *d_str = def_val; + + need_to_merge = e_str != d_str && l_str == d_str; + break; + } + default: { + /* Could be has field or case enum, the logic is + * equivalent, since 0 (FALSE) means not set for + * oneof */ + need_to_merge = (*earlier_case_p != 0) && (*latter_case_p == 0); + break; + } + } + + if (need_to_merge) { + size_t el_size = sizeof_elt_in_repeated_array(field->type); + memcpy(latter_elem, earlier_elem, el_size); + /* + * Reset the element from the old message to 0 + * to make sure earlier message deallocation + * doesn't corrupt zero-copied data in the new + * message, earlier message will be freed after + * this function is called anyway + */ + memset(earlier_elem, 0, el_size); + + if (field->quantifier_offset != 0) { + /* Set the has field or the case enum, + * if applicable */ + *latter_case_p = *earlier_case_p; + *earlier_case_p = 0; + } + } + } + } + return TRUE; +} + +/** + * Count packed elements. + * + * Given a raw slab of packed-repeated values, determine the number of + * elements. This function detects certain kinds of errors but not + * others; the remaining error checking is done by + * parse_packed_repeated_member(). + */ +static protobuf_c_boolean count_packed_elements(ProtobufCType type, size_t len, + const uint8_t *data, + size_t *count_out) { + switch (type) { + case PROTOBUF_C_TYPE_SFIXED32: + case PROTOBUF_C_TYPE_FIXED32: + case PROTOBUF_C_TYPE_FLOAT: + if (len % 4 != 0) { + PROTOBUF_C_UNPACK_ERROR( + "length must be a multiple of 4 for fixed-length 32-bit types"); + return FALSE; + } + *count_out = len / 4; + return TRUE; + case PROTOBUF_C_TYPE_SFIXED64: + case PROTOBUF_C_TYPE_FIXED64: + case PROTOBUF_C_TYPE_DOUBLE: + if (len % 8 != 0) { + PROTOBUF_C_UNPACK_ERROR( + "length must be a multiple of 8 for fixed-length 64-bit types"); + return FALSE; + } + *count_out = len / 8; + return TRUE; + case PROTOBUF_C_TYPE_ENUM: + case PROTOBUF_C_TYPE_INT32: + case PROTOBUF_C_TYPE_SINT32: + case PROTOBUF_C_TYPE_UINT32: + case PROTOBUF_C_TYPE_INT64: + case PROTOBUF_C_TYPE_SINT64: + case PROTOBUF_C_TYPE_UINT64: + *count_out = max_b128_numbers(len, data); + return TRUE; + case PROTOBUF_C_TYPE_BOOL: + *count_out = len; + return TRUE; + case PROTOBUF_C_TYPE_STRING: + case PROTOBUF_C_TYPE_BYTES: + case PROTOBUF_C_TYPE_MESSAGE: + default: + PROTOBUF_C_UNPACK_ERROR("bad protobuf-c type %u for packed-repeated", + type); + return FALSE; + } +} + +static inline uint32_t parse_uint32(unsigned len, const uint8_t *data) { + uint32_t rv = data[0] & 0x7f; + if (len > 1) { + rv |= ((uint32_t)(data[1] & 0x7f) << 7); + if (len > 2) { + rv |= ((uint32_t)(data[2] & 0x7f) << 14); + if (len > 3) { + rv |= ((uint32_t)(data[3] & 0x7f) << 21); + if (len > 4) rv |= ((uint32_t)(data[4]) << 28); + } + } + } + return rv; +} + +static inline uint32_t parse_int32(unsigned len, const uint8_t *data) { + return parse_uint32(len, data); +} + +static inline int32_t unzigzag32(uint32_t v) { + if (v & 1) + return -(v >> 1) - 1; + else + return v >> 1; +} + +static inline uint32_t parse_fixed_uint32(const uint8_t *data) { +#if !defined(WORDS_BIGENDIAN) + uint32_t t; + memcpy(&t, data, 4); + return t; +#else + return data[0] | ((uint32_t)(data[1]) << 8) | ((uint32_t)(data[2]) << 16) | + ((uint32_t)(data[3]) << 24); +#endif +} + +static uint64_t parse_uint64(unsigned len, const uint8_t *data) { + unsigned shift, i; + uint64_t rv; + + if (len < 5) return parse_uint32(len, data); + rv = ((uint64_t)(data[0] & 0x7f)) | ((uint64_t)(data[1] & 0x7f) << 7) | + ((uint64_t)(data[2] & 0x7f) << 14) | ((uint64_t)(data[3] & 0x7f) << 21); + shift = 28; + for (i = 4; i < len; i++) { + rv |= (((uint64_t)(data[i] & 0x7f)) << shift); + shift += 7; + } + return rv; +} + +static inline int64_t unzigzag64(uint64_t v) { + if (v & 1) + return -(v >> 1) - 1; + else + return v >> 1; +} + +static inline uint64_t parse_fixed_uint64(const uint8_t *data) { +#if !defined(WORDS_BIGENDIAN) + uint64_t t; + memcpy(&t, data, 8); + return t; +#else + return (uint64_t)parse_fixed_uint32(data) | + (((uint64_t)parse_fixed_uint32(data + 4)) << 32); +#endif +} + +static protobuf_c_boolean parse_boolean(unsigned len, const uint8_t *data) { + unsigned i; + for (i = 0; i < len; i++) + if (data[i] & 0x7f) return TRUE; + return FALSE; +} + +static protobuf_c_boolean parse_required_member( + ScannedMember *scanned_member, void *member, ProtobufCAllocator *allocator, + protobuf_c_boolean maybe_clear) { + unsigned len = scanned_member->len; + const uint8_t *data = scanned_member->data; + ProtobufCWireType wire_type = scanned_member->wire_type; + + switch (scanned_member->field->type) { + case PROTOBUF_C_TYPE_ENUM: + case PROTOBUF_C_TYPE_INT32: + if (wire_type != PROTOBUF_C_WIRE_TYPE_VARINT) return FALSE; + *(int32_t *)member = parse_int32(len, data); + return TRUE; + case PROTOBUF_C_TYPE_UINT32: + if (wire_type != PROTOBUF_C_WIRE_TYPE_VARINT) return FALSE; + *(uint32_t *)member = parse_uint32(len, data); + return TRUE; + case PROTOBUF_C_TYPE_SINT32: + if (wire_type != PROTOBUF_C_WIRE_TYPE_VARINT) return FALSE; + *(int32_t *)member = unzigzag32(parse_uint32(len, data)); + return TRUE; + case PROTOBUF_C_TYPE_SFIXED32: + case PROTOBUF_C_TYPE_FIXED32: + case PROTOBUF_C_TYPE_FLOAT: + if (wire_type != PROTOBUF_C_WIRE_TYPE_32BIT) return FALSE; + *(uint32_t *)member = parse_fixed_uint32(data); + return TRUE; + case PROTOBUF_C_TYPE_INT64: + case PROTOBUF_C_TYPE_UINT64: + if (wire_type != PROTOBUF_C_WIRE_TYPE_VARINT) return FALSE; + *(uint64_t *)member = parse_uint64(len, data); + return TRUE; + case PROTOBUF_C_TYPE_SINT64: + if (wire_type != PROTOBUF_C_WIRE_TYPE_VARINT) return FALSE; + *(int64_t *)member = unzigzag64(parse_uint64(len, data)); + return TRUE; + case PROTOBUF_C_TYPE_SFIXED64: + case PROTOBUF_C_TYPE_FIXED64: + case PROTOBUF_C_TYPE_DOUBLE: + if (wire_type != PROTOBUF_C_WIRE_TYPE_64BIT) return FALSE; + *(uint64_t *)member = parse_fixed_uint64(data); + return TRUE; + case PROTOBUF_C_TYPE_BOOL: + *(protobuf_c_boolean *)member = parse_boolean(len, data); + return TRUE; + case PROTOBUF_C_TYPE_STRING: { + char **pstr = member; + unsigned pref_len = scanned_member->length_prefix_len; + + if (wire_type != PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED) return FALSE; + + if (maybe_clear && *pstr != NULL) { + const char *def = scanned_member->field->default_value; + if (*pstr != NULL && *pstr != def) do_free(allocator, *pstr); + } + *pstr = do_alloc(allocator, len - pref_len + 1); + if (*pstr == NULL) return FALSE; + memcpy(*pstr, data + pref_len, len - pref_len); + (*pstr)[len - pref_len] = 0; + return TRUE; + } + case PROTOBUF_C_TYPE_BYTES: { + ProtobufCBinaryData *bd = member; + const ProtobufCBinaryData *def_bd; + unsigned pref_len = scanned_member->length_prefix_len; + + if (wire_type != PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED) return FALSE; + + def_bd = scanned_member->field->default_value; + if (maybe_clear && bd->data != NULL && + (def_bd == NULL || bd->data != def_bd->data)) { + do_free(allocator, bd->data); + } + if (len - pref_len > 0) { + bd->data = do_alloc(allocator, len - pref_len); + if (bd->data == NULL) return FALSE; + memcpy(bd->data, data + pref_len, len - pref_len); + } else { + bd->data = NULL; + } + bd->len = len - pref_len; + return TRUE; + } + case PROTOBUF_C_TYPE_MESSAGE: { + ProtobufCMessage **pmessage = member; + ProtobufCMessage *subm; + const ProtobufCMessage *def_mess; + protobuf_c_boolean merge_successful = TRUE; + unsigned pref_len = scanned_member->length_prefix_len; + + if (wire_type != PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED) return FALSE; + + def_mess = scanned_member->field->default_value; + subm = + protobuf_c_message_unpack(scanned_member->field->descriptor, + allocator, len - pref_len, data + pref_len); + + if (maybe_clear && *pmessage != NULL && *pmessage != def_mess) { + if (subm != NULL) + merge_successful = merge_messages(*pmessage, subm, allocator); + /* Delete the previous message */ + protobuf_c_message_free_unpacked(*pmessage, allocator); + } + *pmessage = subm; + if (subm == NULL || !merge_successful) return FALSE; + return TRUE; + } + } + return FALSE; +} + +static protobuf_c_boolean parse_oneof_member(ScannedMember *scanned_member, + void *member, + ProtobufCMessage *message, + ProtobufCAllocator *allocator) { + uint32_t *oneof_case = STRUCT_MEMBER_PTR( + uint32_t, message, scanned_member->field->quantifier_offset); + + /* If we have already parsed a member of this oneof, free it. */ + if (*oneof_case != 0) { + /* lookup field */ + int field_index = + int_range_lookup(message->descriptor->n_field_ranges, + message->descriptor->field_ranges, *oneof_case); + const ProtobufCFieldDescriptor *old_field = + message->descriptor->fields + field_index; + size_t el_size = sizeof_elt_in_repeated_array(old_field->type); + + switch (old_field->type) { + case PROTOBUF_C_TYPE_STRING: { + char **pstr = member; + const char *def = old_field->default_value; + if (*pstr != NULL && *pstr != def) do_free(allocator, *pstr); + break; + } + case PROTOBUF_C_TYPE_BYTES: { + ProtobufCBinaryData *bd = member; + const ProtobufCBinaryData *def_bd = old_field->default_value; + if (bd->data != NULL && (def_bd == NULL || bd->data != def_bd->data)) { + do_free(allocator, bd->data); + } + break; + } + case PROTOBUF_C_TYPE_MESSAGE: { + ProtobufCMessage **pmessage = member; + const ProtobufCMessage *def_mess = old_field->default_value; + if (*pmessage != NULL && *pmessage != def_mess) + protobuf_c_message_free_unpacked(*pmessage, allocator); + break; + } + default: + break; + } + + memset(member, 0, el_size); + } + if (!parse_required_member(scanned_member, member, allocator, TRUE)) + return FALSE; + + *oneof_case = scanned_member->tag; + return TRUE; +} + +static protobuf_c_boolean parse_optional_member(ScannedMember *scanned_member, + void *member, + ProtobufCMessage *message, + ProtobufCAllocator *allocator) { + if (!parse_required_member(scanned_member, member, allocator, TRUE)) + return FALSE; + if (scanned_member->field->quantifier_offset != 0) + STRUCT_MEMBER(protobuf_c_boolean, message, + scanned_member->field->quantifier_offset) = TRUE; + return TRUE; +} + +static protobuf_c_boolean parse_repeated_member(ScannedMember *scanned_member, + void *member, + ProtobufCMessage *message, + ProtobufCAllocator *allocator) { + const ProtobufCFieldDescriptor *field = scanned_member->field; + size_t *p_n = STRUCT_MEMBER_PTR(size_t, message, field->quantifier_offset); + size_t siz = sizeof_elt_in_repeated_array(field->type); + char *array = *(char **)member; + + if (!parse_required_member(scanned_member, array + siz * (*p_n), allocator, + FALSE)) { + return FALSE; + } + *p_n += 1; + return TRUE; +} + +static unsigned scan_varint(unsigned len, const uint8_t *data) { + unsigned i; + if (len > 10) len = 10; + for (i = 0; i < len; i++) + if ((data[i] & 0x80) == 0) break; + if (i == len) return 0; + return i + 1; +} + +static protobuf_c_boolean parse_packed_repeated_member( + ScannedMember *scanned_member, void *member, ProtobufCMessage *message) { + const ProtobufCFieldDescriptor *field = scanned_member->field; + size_t *p_n = STRUCT_MEMBER_PTR(size_t, message, field->quantifier_offset); + size_t siz = sizeof_elt_in_repeated_array(field->type); + void *array = *(char **)member + siz * (*p_n); + const uint8_t *at = scanned_member->data + scanned_member->length_prefix_len; + size_t rem = scanned_member->len - scanned_member->length_prefix_len; + size_t count = 0; + unsigned i; + + switch (field->type) { + case PROTOBUF_C_TYPE_SFIXED32: + case PROTOBUF_C_TYPE_FIXED32: + case PROTOBUF_C_TYPE_FLOAT: + count = (scanned_member->len - scanned_member->length_prefix_len) / 4; +#if !defined(WORDS_BIGENDIAN) + goto no_unpacking_needed; +#else + for (i = 0; i < count; i++) { + ((uint32_t *)array)[i] = parse_fixed_uint32(at); + at += 4; + } + break; +#endif + case PROTOBUF_C_TYPE_SFIXED64: + case PROTOBUF_C_TYPE_FIXED64: + case PROTOBUF_C_TYPE_DOUBLE: + count = (scanned_member->len - scanned_member->length_prefix_len) / 8; +#if !defined(WORDS_BIGENDIAN) + goto no_unpacking_needed; +#else + for (i = 0; i < count; i++) { + ((uint64_t *)array)[i] = parse_fixed_uint64(at); + at += 8; + } + break; +#endif + case PROTOBUF_C_TYPE_ENUM: + case PROTOBUF_C_TYPE_INT32: + while (rem > 0) { + unsigned s = scan_varint(rem, at); + if (s == 0) { + PROTOBUF_C_UNPACK_ERROR("bad packed-repeated int32 value"); + return FALSE; + } + ((int32_t *)array)[count++] = parse_int32(s, at); + at += s; + rem -= s; + } + break; + case PROTOBUF_C_TYPE_SINT32: + while (rem > 0) { + unsigned s = scan_varint(rem, at); + if (s == 0) { + PROTOBUF_C_UNPACK_ERROR("bad packed-repeated sint32 value"); + return FALSE; + } + ((int32_t *)array)[count++] = unzigzag32(parse_uint32(s, at)); + at += s; + rem -= s; + } + break; + case PROTOBUF_C_TYPE_UINT32: + while (rem > 0) { + unsigned s = scan_varint(rem, at); + if (s == 0) { + PROTOBUF_C_UNPACK_ERROR("bad packed-repeated enum or uint32 value"); + return FALSE; + } + ((uint32_t *)array)[count++] = parse_uint32(s, at); + at += s; + rem -= s; + } + break; + + case PROTOBUF_C_TYPE_SINT64: + while (rem > 0) { + unsigned s = scan_varint(rem, at); + if (s == 0) { + PROTOBUF_C_UNPACK_ERROR("bad packed-repeated sint64 value"); + return FALSE; + } + ((int64_t *)array)[count++] = unzigzag64(parse_uint64(s, at)); + at += s; + rem -= s; + } + break; + case PROTOBUF_C_TYPE_INT64: + case PROTOBUF_C_TYPE_UINT64: + while (rem > 0) { + unsigned s = scan_varint(rem, at); + if (s == 0) { + PROTOBUF_C_UNPACK_ERROR("bad packed-repeated int64/uint64 value"); + return FALSE; + } + ((int64_t *)array)[count++] = parse_uint64(s, at); + at += s; + rem -= s; + } + break; + case PROTOBUF_C_TYPE_BOOL: + count = rem; + for (i = 0; i < count; i++) { + if (at[i] > 1) { + PROTOBUF_C_UNPACK_ERROR("bad packed-repeated boolean value"); + return FALSE; + } + ((protobuf_c_boolean *)array)[i] = at[i]; + } + break; + default: + PROTOBUF_C__ASSERT_NOT_REACHED(); + } + *p_n += count; + return TRUE; + +#if !defined(WORDS_BIGENDIAN) +no_unpacking_needed: + memcpy(array, at, count * siz); + *p_n += count; + return TRUE; +#endif +} + +static protobuf_c_boolean is_packable_type(ProtobufCType type) { + return type != PROTOBUF_C_TYPE_STRING && type != PROTOBUF_C_TYPE_BYTES && + type != PROTOBUF_C_TYPE_MESSAGE; +} + +static protobuf_c_boolean parse_member(ScannedMember *scanned_member, + ProtobufCMessage *message, + ProtobufCAllocator *allocator) { + const ProtobufCFieldDescriptor *field = scanned_member->field; + void *member; + + if (field == NULL) { + ProtobufCMessageUnknownField *ufield = + message->unknown_fields + (message->n_unknown_fields++); + ufield->tag = scanned_member->tag; + ufield->wire_type = scanned_member->wire_type; + ufield->len = scanned_member->len; + ufield->data = do_alloc(allocator, scanned_member->len); + if (ufield->data == NULL) return FALSE; + memcpy(ufield->data, scanned_member->data, ufield->len); + return TRUE; + } + member = (char *)message + field->offset; + switch (field->label) { + case PROTOBUF_C_LABEL_REQUIRED: + return parse_required_member(scanned_member, member, allocator, TRUE); + case PROTOBUF_C_LABEL_OPTIONAL: + case PROTOBUF_C_LABEL_NONE: + if (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_ONEOF)) { + return parse_oneof_member(scanned_member, member, message, allocator); + } else { + return parse_optional_member(scanned_member, member, message, + allocator); + } + case PROTOBUF_C_LABEL_REPEATED: + if (scanned_member->wire_type == PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED && + (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_PACKED) || + is_packable_type(field->type))) { + return parse_packed_repeated_member(scanned_member, member, message); + } else { + return parse_repeated_member(scanned_member, member, message, + allocator); + } + } + PROTOBUF_C__ASSERT_NOT_REACHED(); + return 0; +} + +/** + * Initialise messages generated by old code. + * + * This function is used if desc->message_init == NULL (which occurs + * for old code, and which would be useful to support allocating + * descriptors dynamically). + */ +static void message_init_generic(const ProtobufCMessageDescriptor *desc, + ProtobufCMessage *message) { + unsigned i; + + memset(message, 0, desc->sizeof_message); + message->descriptor = desc; + for (i = 0; i < desc->n_fields; i++) { + if (desc->fields[i].default_value != NULL && + desc->fields[i].label != PROTOBUF_C_LABEL_REPEATED) { + void *field = STRUCT_MEMBER_P(message, desc->fields[i].offset); + const void *dv = desc->fields[i].default_value; + + switch (desc->fields[i].type) { + case PROTOBUF_C_TYPE_INT32: + case PROTOBUF_C_TYPE_SINT32: + case PROTOBUF_C_TYPE_SFIXED32: + case PROTOBUF_C_TYPE_UINT32: + case PROTOBUF_C_TYPE_FIXED32: + case PROTOBUF_C_TYPE_FLOAT: + case PROTOBUF_C_TYPE_ENUM: + memcpy(field, dv, 4); + break; + case PROTOBUF_C_TYPE_INT64: + case PROTOBUF_C_TYPE_SINT64: + case PROTOBUF_C_TYPE_SFIXED64: + case PROTOBUF_C_TYPE_UINT64: + case PROTOBUF_C_TYPE_FIXED64: + case PROTOBUF_C_TYPE_DOUBLE: + memcpy(field, dv, 8); + break; + case PROTOBUF_C_TYPE_BOOL: + memcpy(field, dv, sizeof(protobuf_c_boolean)); + break; + case PROTOBUF_C_TYPE_BYTES: + memcpy(field, dv, sizeof(ProtobufCBinaryData)); + break; + + case PROTOBUF_C_TYPE_STRING: + case PROTOBUF_C_TYPE_MESSAGE: + /* + * The next line essentially implements a cast + * from const, which is totally unavoidable. + */ + *(const void **)field = dv; + break; + } + } + } +} + +/**@}*/ + +/* + * ScannedMember slabs (an unpacking implementation detail). Before doing real + * unpacking, we first scan through the elements to see how many there are (for + * repeated fields), and which field to use (for non-repeated fields given + * twice). + * + * In order to avoid allocations for small messages, we keep a stack-allocated + * slab of ScannedMembers of size FIRST_SCANNED_MEMBER_SLAB_SIZE (16). After we + * fill that up, we allocate each slab twice as large as the previous one. + */ +#define FIRST_SCANNED_MEMBER_SLAB_SIZE_LOG2 4 + +/* + * The number of slabs, including the stack-allocated ones; choose the number so + * that we would overflow if we needed a slab larger than provided. + */ +#define MAX_SCANNED_MEMBER_SLAB \ + (sizeof(unsigned int) * 8 - 1 - BOUND_SIZEOF_SCANNED_MEMBER_LOG2 - \ + FIRST_SCANNED_MEMBER_SLAB_SIZE_LOG2) + +#define REQUIRED_FIELD_BITMAP_SET(index) \ + (required_fields_bitmap[(index) / 8] |= (1UL << ((index) % 8))) + +#define REQUIRED_FIELD_BITMAP_IS_SET(index) \ + (required_fields_bitmap[(index) / 8] & (1UL << ((index) % 8))) + +ProtobufCMessage *protobuf_c_message_unpack( + const ProtobufCMessageDescriptor *desc, ProtobufCAllocator *allocator, + size_t len, const uint8_t *data) { + ProtobufCMessage *rv; + size_t rem = len; + const uint8_t *at = data; + const ProtobufCFieldDescriptor *last_field = desc->fields + 0; + ScannedMember first_member_slab[1UL << FIRST_SCANNED_MEMBER_SLAB_SIZE_LOG2]; + + /* + * scanned_member_slabs[i] is an array of arrays of ScannedMember. + * The first slab (scanned_member_slabs[0] is just a pointer to + * first_member_slab), above. All subsequent slabs will be allocated + * using the allocator. + */ + ScannedMember *scanned_member_slabs[MAX_SCANNED_MEMBER_SLAB + 1]; + unsigned which_slab = 0; /* the slab we are currently populating */ + unsigned in_slab_index = 0; /* number of members in the slab */ + size_t n_unknown = 0; + unsigned f; + unsigned j; + unsigned i_slab; + unsigned last_field_index = 0; + unsigned required_fields_bitmap_len; + unsigned char required_fields_bitmap_stack[16]; + unsigned char *required_fields_bitmap = required_fields_bitmap_stack; + protobuf_c_boolean required_fields_bitmap_alloced = FALSE; + + ASSERT_IS_MESSAGE_DESCRIPTOR(desc); + + if (allocator == NULL) allocator = &protobuf_c__allocator; + + rv = do_alloc(allocator, desc->sizeof_message); + if (!rv) return (NULL); + scanned_member_slabs[0] = first_member_slab; + + required_fields_bitmap_len = (desc->n_fields + 7) / 8; + if (required_fields_bitmap_len > sizeof(required_fields_bitmap_stack)) { + required_fields_bitmap = do_alloc(allocator, required_fields_bitmap_len); + if (!required_fields_bitmap) { + do_free(allocator, rv); + return (NULL); + } + required_fields_bitmap_alloced = TRUE; + } + memset(required_fields_bitmap, 0, required_fields_bitmap_len); + + /* + * Generated code always defines "message_init". However, we provide a + * fallback for (1) users of old protobuf-c generated-code that do not + * provide the function, and (2) descriptors constructed from some other + * source (most likely, direct construction from the .proto file). + */ + if (desc->message_init != NULL) + protobuf_c_message_init(desc, rv); + else + message_init_generic(desc, rv); + + while (rem > 0) { + uint32_t tag; + ProtobufCWireType wire_type; + size_t used = parse_tag_and_wiretype(rem, at, &tag, &wire_type); + const ProtobufCFieldDescriptor *field; + ScannedMember tmp; + + if (used == 0) { + PROTOBUF_C_UNPACK_ERROR("error parsing tag/wiretype at offset %u", + (unsigned)(at - data)); + goto error_cleanup_during_scan; + } + /* + * \todo Consider optimizing for field[1].id == tag, if field[1] + * exists! + */ + if (last_field == NULL || last_field->id != tag) { + /* lookup field */ + int field_index = + int_range_lookup(desc->n_field_ranges, desc->field_ranges, tag); + if (field_index < 0) { + field = NULL; + n_unknown++; + } else { + field = desc->fields + field_index; + last_field = field; + last_field_index = field_index; + } + } else { + field = last_field; + } + + if (field != NULL && field->label == PROTOBUF_C_LABEL_REQUIRED) + REQUIRED_FIELD_BITMAP_SET(last_field_index); + + at += used; + rem -= used; + tmp.tag = tag; + tmp.wire_type = wire_type; + tmp.field = field; + tmp.data = at; + tmp.length_prefix_len = 0; + + switch (wire_type) { + case PROTOBUF_C_WIRE_TYPE_VARINT: { + unsigned max_len = rem < 10 ? rem : 10; + unsigned i; + + for (i = 0; i < max_len; i++) + if ((at[i] & 0x80) == 0) break; + if (i == max_len) { + PROTOBUF_C_UNPACK_ERROR("unterminated varint at offset %u", + (unsigned)(at - data)); + goto error_cleanup_during_scan; + } + tmp.len = i + 1; + break; + } + case PROTOBUF_C_WIRE_TYPE_64BIT: + if (rem < 8) { + PROTOBUF_C_UNPACK_ERROR("too short after 64bit wiretype at offset %u", + (unsigned)(at - data)); + goto error_cleanup_during_scan; + } + tmp.len = 8; + break; + case PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED: { + size_t pref_len; + + tmp.len = scan_length_prefixed_data(rem, at, &pref_len); + if (tmp.len == 0) { + /* NOTE: scan_length_prefixed_data calls UNPACK_ERROR */ + goto error_cleanup_during_scan; + } + tmp.length_prefix_len = pref_len; + break; + } + case PROTOBUF_C_WIRE_TYPE_32BIT: + if (rem < 4) { + PROTOBUF_C_UNPACK_ERROR("too short after 32bit wiretype at offset %u", + (unsigned)(at - data)); + goto error_cleanup_during_scan; + } + tmp.len = 4; + break; + default: + PROTOBUF_C_UNPACK_ERROR("unsupported tag %u at offset %u", wire_type, + (unsigned)(at - data)); + goto error_cleanup_during_scan; + } + + if (in_slab_index == + (1UL << (which_slab + FIRST_SCANNED_MEMBER_SLAB_SIZE_LOG2))) { + size_t size; + + in_slab_index = 0; + if (which_slab == MAX_SCANNED_MEMBER_SLAB) { + PROTOBUF_C_UNPACK_ERROR("too many fields"); + goto error_cleanup_during_scan; + } + which_slab++; + size = sizeof(ScannedMember) + << (which_slab + FIRST_SCANNED_MEMBER_SLAB_SIZE_LOG2); + scanned_member_slabs[which_slab] = do_alloc(allocator, size); + if (scanned_member_slabs[which_slab] == NULL) + goto error_cleanup_during_scan; + } + scanned_member_slabs[which_slab][in_slab_index++] = tmp; + + if (field != NULL && field->label == PROTOBUF_C_LABEL_REPEATED) { + size_t *n = STRUCT_MEMBER_PTR(size_t, rv, field->quantifier_offset); + if (wire_type == PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED && + (0 != (field->flags & PROTOBUF_C_FIELD_FLAG_PACKED) || + is_packable_type(field->type))) { + size_t count; + if (!count_packed_elements(field->type, tmp.len - tmp.length_prefix_len, + tmp.data + tmp.length_prefix_len, &count)) { + PROTOBUF_C_UNPACK_ERROR("counting packed elements"); + goto error_cleanup_during_scan; + } + *n += count; + } else { + *n += 1; + } + } + + at += tmp.len; + rem -= tmp.len; + } + + /* allocate space for repeated fields, also check that all required fields + * have been set */ + for (f = 0; f < desc->n_fields; f++) { + const ProtobufCFieldDescriptor *field = desc->fields + f; + if (field->label == PROTOBUF_C_LABEL_REPEATED) { + size_t siz = sizeof_elt_in_repeated_array(field->type); + size_t *n_ptr = STRUCT_MEMBER_PTR(size_t, rv, field->quantifier_offset); + if (*n_ptr != 0) { + unsigned n = *n_ptr; + void *a; + *n_ptr = 0; + assert(rv->descriptor != NULL); +#define CLEAR_REMAINING_N_PTRS() \ + for (f++; f < desc->n_fields; f++) { \ + field = desc->fields + f; \ + if (field->label == PROTOBUF_C_LABEL_REPEATED) \ + STRUCT_MEMBER(size_t, rv, field->quantifier_offset) = 0; \ + } + a = do_alloc(allocator, siz * n); + if (!a) { + CLEAR_REMAINING_N_PTRS(); + goto error_cleanup; + } + STRUCT_MEMBER(void *, rv, field->offset) = a; + } + } else if (field->label == PROTOBUF_C_LABEL_REQUIRED) { + if (field->default_value == NULL && !REQUIRED_FIELD_BITMAP_IS_SET(f)) { + CLEAR_REMAINING_N_PTRS(); + PROTOBUF_C_UNPACK_ERROR("message '%s': missing required field '%s'", + desc->name, field->name); + goto error_cleanup; + } + } + } +#undef CLEAR_REMAINING_N_PTRS + + /* allocate space for unknown fields */ + if (n_unknown) { + rv->unknown_fields = + do_alloc(allocator, n_unknown * sizeof(ProtobufCMessageUnknownField)); + if (rv->unknown_fields == NULL) goto error_cleanup; + } + + /* do real parsing */ + for (i_slab = 0; i_slab <= which_slab; i_slab++) { + unsigned max = + (i_slab == which_slab) ? in_slab_index : (1UL << (i_slab + 4)); + ScannedMember *slab = scanned_member_slabs[i_slab]; + + for (j = 0; j < max; j++) { + if (!parse_member(slab + j, rv, allocator)) { + PROTOBUF_C_UNPACK_ERROR( + "error parsing member %s of %s", + slab->field ? slab->field->name : "*unknown-field*", desc->name); + goto error_cleanup; + } + } + } + + /* cleanup */ + for (j = 1; j <= which_slab; j++) do_free(allocator, scanned_member_slabs[j]); + if (required_fields_bitmap_alloced) + do_free(allocator, required_fields_bitmap); + return rv; + +error_cleanup: + protobuf_c_message_free_unpacked(rv, allocator); + for (j = 1; j <= which_slab; j++) do_free(allocator, scanned_member_slabs[j]); + if (required_fields_bitmap_alloced) + do_free(allocator, required_fields_bitmap); + return NULL; + +error_cleanup_during_scan: + do_free(allocator, rv); + for (j = 1; j <= which_slab; j++) do_free(allocator, scanned_member_slabs[j]); + if (required_fields_bitmap_alloced) + do_free(allocator, required_fields_bitmap); + return NULL; +} + +void protobuf_c_message_free_unpacked(ProtobufCMessage *message, + ProtobufCAllocator *allocator) { + const ProtobufCMessageDescriptor *desc; + unsigned f; + + if (message == NULL) return; + + desc = message->descriptor; + + ASSERT_IS_MESSAGE(message); + + if (allocator == NULL) allocator = &protobuf_c__allocator; + message->descriptor = NULL; + for (f = 0; f < desc->n_fields; f++) { + if (0 != (desc->fields[f].flags & PROTOBUF_C_FIELD_FLAG_ONEOF) && + desc->fields[f].id != + STRUCT_MEMBER(uint32_t, message, + desc->fields[f].quantifier_offset)) { + /* This is not the selected oneof, skip it */ + continue; + } + + if (desc->fields[f].label == PROTOBUF_C_LABEL_REPEATED) { + size_t n = + STRUCT_MEMBER(size_t, message, desc->fields[f].quantifier_offset); + void *arr = STRUCT_MEMBER(void *, message, desc->fields[f].offset); + + if (arr != NULL) { + if (desc->fields[f].type == PROTOBUF_C_TYPE_STRING) { + unsigned i; + for (i = 0; i < n; i++) do_free(allocator, ((char **)arr)[i]); + } else if (desc->fields[f].type == PROTOBUF_C_TYPE_BYTES) { + unsigned i; + for (i = 0; i < n; i++) + do_free(allocator, ((ProtobufCBinaryData *)arr)[i].data); + } else if (desc->fields[f].type == PROTOBUF_C_TYPE_MESSAGE) { + unsigned i; + for (i = 0; i < n; i++) + protobuf_c_message_free_unpacked(((ProtobufCMessage **)arr)[i], + allocator); + } + do_free(allocator, arr); + } + } else if (desc->fields[f].type == PROTOBUF_C_TYPE_STRING) { + char *str = STRUCT_MEMBER(char *, message, desc->fields[f].offset); + + if (str && str != desc->fields[f].default_value) do_free(allocator, str); + } else if (desc->fields[f].type == PROTOBUF_C_TYPE_BYTES) { + void *data = + STRUCT_MEMBER(ProtobufCBinaryData, message, desc->fields[f].offset) + .data; + const ProtobufCBinaryData *default_bd; + + default_bd = desc->fields[f].default_value; + if (data != NULL && (default_bd == NULL || default_bd->data != data)) { + do_free(allocator, data); + } + } else if (desc->fields[f].type == PROTOBUF_C_TYPE_MESSAGE) { + ProtobufCMessage *sm; + + sm = STRUCT_MEMBER(ProtobufCMessage *, message, desc->fields[f].offset); + if (sm && sm != desc->fields[f].default_value) + protobuf_c_message_free_unpacked(sm, allocator); + } + } + + for (f = 0; f < message->n_unknown_fields; f++) + do_free(allocator, message->unknown_fields[f].data); + if (message->unknown_fields != NULL) + do_free(allocator, message->unknown_fields); + + do_free(allocator, message); +} + +void protobuf_c_message_init(const ProtobufCMessageDescriptor *descriptor, + void *message) { + descriptor->message_init((ProtobufCMessage *)(message)); +} + +protobuf_c_boolean protobuf_c_message_check(const ProtobufCMessage *message) { + unsigned i; + + if (!message || !message->descriptor || + message->descriptor->magic != PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC) { + return FALSE; + } + + for (i = 0; i < message->descriptor->n_fields; i++) { + const ProtobufCFieldDescriptor *f = message->descriptor->fields + i; + ProtobufCType type = f->type; + ProtobufCLabel label = f->label; + void *field = STRUCT_MEMBER_P(message, f->offset); + + if (label == PROTOBUF_C_LABEL_REPEATED) { + size_t *quantity = STRUCT_MEMBER_P(message, f->quantifier_offset); + + if (*quantity > 0 && *(void **)field == NULL) { + return FALSE; + } + + if (type == PROTOBUF_C_TYPE_MESSAGE) { + ProtobufCMessage **submessage = *(ProtobufCMessage ***)field; + unsigned j; + for (j = 0; j < *quantity; j++) { + if (!protobuf_c_message_check(submessage[j])) return FALSE; + } + } else if (type == PROTOBUF_C_TYPE_STRING) { + char **string = *(char ***)field; + unsigned j; + for (j = 0; j < *quantity; j++) { + if (!string[j]) return FALSE; + } + } else if (type == PROTOBUF_C_TYPE_BYTES) { + ProtobufCBinaryData *bd = *(ProtobufCBinaryData **)field; + unsigned j; + for (j = 0; j < *quantity; j++) { + if (bd[j].len > 0 && bd[j].data == NULL) return FALSE; + } + } + + } else { /* PROTOBUF_C_LABEL_REQUIRED or PROTOBUF_C_LABEL_OPTIONAL */ + + if (type == PROTOBUF_C_TYPE_MESSAGE) { + ProtobufCMessage *submessage = *(ProtobufCMessage **)field; + if (label == PROTOBUF_C_LABEL_REQUIRED || submessage != NULL) { + if (!protobuf_c_message_check(submessage)) return FALSE; + } + } else if (type == PROTOBUF_C_TYPE_STRING) { + char *string = *(char **)field; + if (label == PROTOBUF_C_LABEL_REQUIRED && string == NULL) return FALSE; + } else if (type == PROTOBUF_C_TYPE_BYTES) { + protobuf_c_boolean *has = + STRUCT_MEMBER_P(message, f->quantifier_offset); + ProtobufCBinaryData *bd = field; + if (label == PROTOBUF_C_LABEL_REQUIRED || *has == TRUE) { + if (bd->len > 0 && bd->data == NULL) return FALSE; + } + } + } + } + + return TRUE; +} + +/* === services === */ + +typedef void (*GenericHandler)(void *service, const ProtobufCMessage *input, + ProtobufCClosure closure, void *closure_data); diff --git a/tools/quantification/src/protobuf-c.h b/tools/quantification/src/protobuf-c.h new file mode 100644 index 0000000000000000000000000000000000000000..bd85695b868af6c7b91590196339bc4f7826a256 --- /dev/null +++ b/tools/quantification/src/protobuf-c.h @@ -0,0 +1,921 @@ +/* + * Copyright (c) 2008-2017, Dave Benson and the protobuf-c authors. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/*! \file + * \mainpage Introduction + * + * This is [protobuf-c], a C implementation of [Protocol Buffers]. + * + * This file defines the public API for the `libprotobuf-c` support library. + * This API includes interfaces that can be used directly by client code as well + * as the interfaces used by the code generated by the `protoc-c` compiler. + * + * The `libprotobuf-c` support library performs the actual serialization and + * deserialization of Protocol Buffers messages. It interacts with structures, + * definitions, and metadata generated by the `protoc-c` compiler from .proto + * files. + * + * \authors Dave Benson and the `protobuf-c` authors. + * + * \copyright 2008-2014. Licensed under the terms of the [BSD-2-Clause] license. + * + * [protobuf-c]: https://github.com/protobuf-c/protobuf-c + * [Protocol Buffers]: https://developers.google.com/protocol-buffers/ + * [BSD-2-Clause]: http://opensource.org/licenses/BSD-2-Clause + * + * \page gencode Generated Code + * + * For each enum, we generate a C enum. For each message, we generate a C + * structure which can be cast to a `ProtobufCMessage`. + * + * For each enum and message, we generate a descriptor object that allows us to + * implement a kind of reflection on the structures. + * + * First, some naming conventions: + * + * - The name of the type for enums and messages and services is camel case + * (meaning WordsAreCrammedTogether) except that double underscores are used + * to delimit scopes. For example, the following `.proto` file: + * +~~~{.proto} + package foo.bar; + message BazBah { + optional int32 val = 1; + } +~~~ + * + * would generate a C type `Foo__Bar__BazBah`. + * + * - Identifiers for functions and globals are all lowercase, with camel case + * words separated by single underscores. For example, one of the function + * prototypes generated by `protoc-c` for the above example: + * +~~~{.c} +Foo__Bar__BazBah * + foo__bar__baz_bah__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +~~~ + * + * - Identifiers for enum values contain an uppercase prefix which embeds the + * package name and the enum type name. + * + * - A double underscore is used to separate further components of identifier + * names. + * + * For example, in the name of the unpack function above, the package name + * `foo.bar` has become `foo__bar`, the message name BazBah has become + * `baz_bah`, and the method name is `unpack`. These are all joined with double + * underscores to form the C identifier `foo__bar__baz_bah__unpack`. + * + * We also generate descriptor objects for messages and enums. These are + * declared in the `.pb-c.h` files: + * +~~~{.c} +extern const ProtobufCMessageDescriptor foo__bar__baz_bah__descriptor; +~~~ + * + * The message structures all begin with `ProtobufCMessageDescriptor *` which is + * sufficient to allow them to be cast to `ProtobufCMessage`. + * + * For each message defined in a `.proto` file, we generate a number of + * functions and macros. Each function name contains a prefix based on the + * package name and message name in order to make it a unique C identifier. + * + * - `INIT`. Statically initializes a message object, initializing its + * descriptor and setting its fields to default values. Uninitialized + * messages cannot be processed by the protobuf-c library. + * +~~~{.c} +#define FOO__BAR__BAZ_BAH__INIT \ + { PROTOBUF_C_MESSAGE_INIT (&foo__bar__baz_bah__descriptor), 0 } +~~~ + * - `init()`. Initializes a message object, initializing its descriptor and + * setting its fields to default values. Uninitialized messages cannot be + * processed by the protobuf-c library. + * +~~~{.c} +void foo__bar__baz_bah__init + (Foo__Bar__BazBah *message); +~~~ + * - `unpack()`. Unpacks data for a particular message format. Note that the + * `allocator` parameter is usually `NULL` to indicate that the system's + * `malloc()` and `free()` functions should be used for dynamically allocating + * memory. + * +~~~{.c} +Foo__Bar__BazBah * + foo__bar__baz_bah__unpack + (ProtobufCAllocator *allocator, + size_t len, + const uint8_t *data); +~~~ + * + * - `free_unpacked()`. Frees a message object obtained with the `unpack()` + * method. Freeing `NULL` is allowed (the same as with `free()`). + * +~~~{.c} +void foo__bar__baz_bah__free_unpacked + (Foo__Bar__BazBah *message, + ProtobufCAllocator *allocator); +~~~ + * + * - `get_packed_size()`. Calculates the length in bytes of the serialized + * representation of the message object. + * +~~~{.c} +size_t foo__bar__baz_bah__get_packed_size + (const Foo__Bar__BazBah *message); +~~~ + * + * - `pack()`. Pack a message object into a preallocated buffer. Assumes that + * the buffer is large enough. (Use `get_packed_size()` first.) + * +~~~{.c} +size_t foo__bar__baz_bah__pack + (const Foo__Bar__BazBah *message, + uint8_t *out); +~~~ + * + * - `pack_to_buffer()`. Packs a message into a "virtual buffer". This is an + * object which defines an "append bytes" callback to consume data as it is + * serialized. + * +~~~{.c} +size_t foo__bar__baz_bah__pack_to_buffer + (const Foo__Bar__BazBah *message, + ProtobufCBuffer *buffer); +~~~ + * + * \page pack Packing and unpacking messages + * + * To pack a message, first compute the packed size of the message with + * protobuf_c_message_get_packed_size(), then allocate a buffer of at least + * that size, then call protobuf_c_message_pack(). + * + * Alternatively, a message can be serialized without calculating the final size + * first. Use the protobuf_c_message_pack_to_buffer() function and provide a + * ProtobufCBuffer object which implements an "append" method that consumes + * data. + * + * To unpack a message, call the protobuf_c_message_unpack() function. The + * result can be cast to an object of the type that matches the descriptor for + * the message. + * + * The result of unpacking a message should be freed with + * protobuf_c_message_free_unpacked(). + */ + +#ifndef PROTOBUF_C_H +#define PROTOBUF_C_H + +#include +#include +#include +#include + +#ifdef __cplusplus +#define PROTOBUF_C__BEGIN_DECLS extern "C" { +#define PROTOBUF_C__END_DECLS } +#else +#define PROTOBUF_C__BEGIN_DECLS +#define PROTOBUF_C__END_DECLS +#endif + +PROTOBUF_C__BEGIN_DECLS + +#if defined(_WIN32) && defined(PROTOBUF_C_USE_SHARED_LIB) +#ifdef PROTOBUF_C_EXPORT +#define PROTOBUF_C__API __declspec(dllexport) +#else +#define PROTOBUF_C__API __declspec(dllimport) +#endif +#else +#define PROTOBUF_C__API +#endif + +#if !defined(PROTOBUF_C__NO_DEPRECATED) && \ + ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) +#define PROTOBUF_C__DEPRECATED __attribute__((__deprecated__)) +#else +#define PROTOBUF_C__DEPRECATED +#endif + +#ifndef PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE +#define PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(enum_name) \ + , _##enum_name##_IS_INT_SIZE = INT_MAX +#endif + +#define PROTOBUF_C__SERVICE_DESCRIPTOR_MAGIC 0x14159bc3 +#define PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC 0x28aaeef9 +#define PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC 0x114315af + +/* Empty string used for initializers */ +extern const char protobuf_c_empty_string[]; + +/** + * \defgroup api Public API + * + * This is the public API for `libprotobuf-c`. These interfaces are stable and + * subject to Semantic Versioning guarantees. + * + * @{ + */ + +/** + * Values for the `flags` word in `ProtobufCFieldDescriptor`. + */ +typedef enum { + /** Set if the field is repeated and marked with the `packed` option. */ + PROTOBUF_C_FIELD_FLAG_PACKED = (1 << 0), + + /** Set if the field is marked with the `deprecated` option. */ + PROTOBUF_C_FIELD_FLAG_DEPRECATED = (1 << 1), + + /** Set if the field is a member of a oneof (union). */ + PROTOBUF_C_FIELD_FLAG_ONEOF = (1 << 2), +} ProtobufCFieldFlag; + +/** + * Message field rules. + * + * \see [Defining A Message Type] in the Protocol Buffers documentation. + * + * [Defining A Message Type]: + * https://developers.google.com/protocol-buffers/docs/proto#simple + */ +typedef enum { + /** A well-formed message must have exactly one of this field. */ + PROTOBUF_C_LABEL_REQUIRED, + + /** + * A well-formed message can have zero or one of this field (but not + * more than one). + */ + PROTOBUF_C_LABEL_OPTIONAL, + + /** + * This field can be repeated any number of times (including zero) in a + * well-formed message. The order of the repeated values will be + * preserved. + */ + PROTOBUF_C_LABEL_REPEATED, + + /** + * This field has no label. This is valid only in proto3 and is + * equivalent to OPTIONAL but no "has" quantifier will be consulted. + */ + PROTOBUF_C_LABEL_NONE, +} ProtobufCLabel; + +/** + * Field value types. + * + * \see [Scalar Value Types] in the Protocol Buffers documentation. + * + * [Scalar Value Types]: + * https://developers.google.com/protocol-buffers/docs/proto#scalar + */ +typedef enum { + PROTOBUF_C_TYPE_INT32, /**< int32 */ + PROTOBUF_C_TYPE_SINT32, /**< signed int32 */ + PROTOBUF_C_TYPE_SFIXED32, /**< signed int32 (4 bytes) */ + PROTOBUF_C_TYPE_INT64, /**< int64 */ + PROTOBUF_C_TYPE_SINT64, /**< signed int64 */ + PROTOBUF_C_TYPE_SFIXED64, /**< signed int64 (8 bytes) */ + PROTOBUF_C_TYPE_UINT32, /**< unsigned int32 */ + PROTOBUF_C_TYPE_FIXED32, /**< unsigned int32 (4 bytes) */ + PROTOBUF_C_TYPE_UINT64, /**< unsigned int64 */ + PROTOBUF_C_TYPE_FIXED64, /**< unsigned int64 (8 bytes) */ + PROTOBUF_C_TYPE_FLOAT, /**< float */ + PROTOBUF_C_TYPE_DOUBLE, /**< double */ + PROTOBUF_C_TYPE_BOOL, /**< boolean */ + PROTOBUF_C_TYPE_ENUM, /**< enumerated type */ + PROTOBUF_C_TYPE_STRING, /**< UTF-8 or ASCII string */ + PROTOBUF_C_TYPE_BYTES, /**< arbitrary byte sequence */ + PROTOBUF_C_TYPE_MESSAGE, /**< nested message */ +} ProtobufCType; + +/** + * Field wire types. + * + * \see [Message Structure] in the Protocol Buffers documentation. + * + * [Message Structure]: + * https://developers.google.com/protocol-buffers/docs/encoding#structure + */ +typedef enum { + PROTOBUF_C_WIRE_TYPE_VARINT = 0, + PROTOBUF_C_WIRE_TYPE_64BIT = 1, + PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED = 2, + /* "Start group" and "end group" wire types are unsupported. */ + PROTOBUF_C_WIRE_TYPE_32BIT = 5, +} ProtobufCWireType; + +struct ProtobufCAllocator; +struct ProtobufCBinaryData; +struct ProtobufCBuffer; +struct ProtobufCBufferSimple; +struct ProtobufCEnumDescriptor; +struct ProtobufCEnumValue; +struct ProtobufCEnumValueIndex; +struct ProtobufCFieldDescriptor; +struct ProtobufCIntRange; +struct ProtobufCMessage; +struct ProtobufCMessageDescriptor; +struct ProtobufCMessageUnknownField; +struct ProtobufCMethodDescriptor; +struct ProtobufCService; +struct ProtobufCServiceDescriptor; + +typedef struct ProtobufCAllocator ProtobufCAllocator; +typedef struct ProtobufCBinaryData ProtobufCBinaryData; +typedef struct ProtobufCBuffer ProtobufCBuffer; +typedef struct ProtobufCBufferSimple ProtobufCBufferSimple; +typedef struct ProtobufCEnumDescriptor ProtobufCEnumDescriptor; +typedef struct ProtobufCEnumValue ProtobufCEnumValue; +typedef struct ProtobufCEnumValueIndex ProtobufCEnumValueIndex; +typedef struct ProtobufCFieldDescriptor ProtobufCFieldDescriptor; +typedef struct ProtobufCIntRange ProtobufCIntRange; +typedef struct ProtobufCMessage ProtobufCMessage; +typedef struct ProtobufCMessageDescriptor ProtobufCMessageDescriptor; +typedef struct ProtobufCMessageUnknownField ProtobufCMessageUnknownField; +typedef struct ProtobufCMethodDescriptor ProtobufCMethodDescriptor; +typedef struct ProtobufCService ProtobufCService; +typedef struct ProtobufCServiceDescriptor ProtobufCServiceDescriptor; + +/** Boolean type. */ +typedef int protobuf_c_boolean; + +typedef void (*ProtobufCClosure)(const ProtobufCMessage *, void *closure_data); +typedef void (*ProtobufCMessageInit)(ProtobufCMessage *); +typedef void (*ProtobufCServiceDestroy)(ProtobufCService *); + +/** + * Structure for defining a custom memory allocator. + */ +struct ProtobufCAllocator { + /** Function to allocate memory. */ + void *(*alloc)(void *allocator_data, size_t size); + + /** Function to free memory. */ + void (*free)(void *allocator_data, void *pointer); + + /** Opaque pointer passed to `alloc` and `free` functions. */ + void *allocator_data; +}; + +/** + * Structure for the protobuf `bytes` scalar type. + * + * The data contained in a `ProtobufCBinaryData` is an arbitrary sequence of + * bytes. It may contain embedded `NUL` characters and is not required to be + * `NUL`-terminated. + */ +struct ProtobufCBinaryData { + size_t len; /**< Number of bytes in the `data` field. */ + uint8_t *data; /**< Data bytes. */ +}; + +/** + * Structure for defining a virtual append-only buffer. Used by + * protobuf_c_message_pack_to_buffer() to abstract the consumption of serialized + * bytes. + * + * `ProtobufCBuffer` "subclasses" may be defined on the stack. For example, to + * write to a `FILE` object: + * +~~~{.c} +typedef struct { + ProtobufCBuffer base; + FILE *fp; +} BufferAppendToFile; + +static void +my_buffer_file_append(ProtobufCBuffer *buffer, + size_t len, + const uint8_t *data) +{ + BufferAppendToFile *file_buf = (BufferAppendToFile *) buffer; + fwrite(data, len, 1, file_buf->fp); // XXX: No error handling! +} +~~~ + * + * To use this new type of ProtobufCBuffer, it could be called as follows: + * +~~~{.c} +... +BufferAppendToFile tmp = {0}; +tmp.base.append = my_buffer_file_append; +tmp.fp = fp; +protobuf_c_message_pack_to_buffer(&message, &tmp); +... +~~~ + */ +struct ProtobufCBuffer { + /** Append function. Consumes the `len` bytes stored at `data`. */ + void (*append)(ProtobufCBuffer *buffer, size_t len, const uint8_t *data); +}; + +/** + * Simple buffer "subclass" of `ProtobufCBuffer`. + * + * A `ProtobufCBufferSimple` object is declared on the stack and uses a + * scratch buffer provided by the user for the initial allocation. It performs + * exponential resizing, using dynamically allocated memory. A + * `ProtobufCBufferSimple` object can be created and used as follows: + * +~~~{.c} +uint8_t pad[128]; +ProtobufCBufferSimple simple = PROTOBUF_C_BUFFER_SIMPLE_INIT(pad); +ProtobufCBuffer *buffer = (ProtobufCBuffer *) &simple; +~~~ + * + * `buffer` can now be used with `protobuf_c_message_pack_to_buffer()`. Once a + * message has been serialized to a `ProtobufCBufferSimple` object, the + * serialized data bytes can be accessed from the `.data` field. + * + * To free the memory allocated by a `ProtobufCBufferSimple` object, if any, + * call PROTOBUF_C_BUFFER_SIMPLE_CLEAR() on the object, for example: + * +~~~{.c} +PROTOBUF_C_BUFFER_SIMPLE_CLEAR(&simple); +~~~ + * + * \see PROTOBUF_C_BUFFER_SIMPLE_INIT + * \see PROTOBUF_C_BUFFER_SIMPLE_CLEAR + */ +struct ProtobufCBufferSimple { + /** "Base class". */ + ProtobufCBuffer base; + /** Number of bytes allocated in `data`. */ + size_t alloced; + /** Number of bytes currently stored in `data`. */ + size_t len; + /** Data bytes. */ + uint8_t *data; + /** Whether `data` must be freed. */ + protobuf_c_boolean must_free_data; + /** Allocator to use. May be NULL to indicate the system allocator. */ + ProtobufCAllocator *allocator; +}; + +/** + * Describes an enumeration as a whole, with all of its values. + */ +struct ProtobufCEnumDescriptor { + /** Magic value checked to ensure that the API is used correctly. */ + uint32_t magic; + + /** The qualified name (e.g., "namespace.Type"). */ + const char *name; + /** The unqualified name as given in the .proto file (e.g., "Type"). */ + const char *short_name; + /** Identifier used in generated C code. */ + const char *c_name; + /** The dot-separated namespace. */ + const char *package_name; + + /** Number elements in `values`. */ + unsigned n_values; + /** Array of distinct values, sorted by numeric value. */ + const ProtobufCEnumValue *values; + + /** Number of elements in `values_by_name`. */ + unsigned n_value_names; + /** Array of named values, including aliases, sorted by name. */ + const ProtobufCEnumValueIndex *values_by_name; + + /** Number of elements in `value_ranges`. */ + unsigned n_value_ranges; + /** Value ranges, for faster lookups by numeric value. */ + const ProtobufCIntRange *value_ranges; + + /** Reserved for future use. */ + void *reserved1; + /** Reserved for future use. */ + void *reserved2; + /** Reserved for future use. */ + void *reserved3; + /** Reserved for future use. */ + void *reserved4; +}; + +/** + * Represents a single value of an enumeration. + */ +struct ProtobufCEnumValue { + /** The string identifying this value in the .proto file. */ + const char *name; + + /** The string identifying this value in generated C code. */ + const char *c_name; + + /** The numeric value assigned in the .proto file. */ + int value; +}; + +/** + * Used by `ProtobufCEnumDescriptor` to look up enum values. + */ +struct ProtobufCEnumValueIndex { + /** Name of the enum value. */ + const char *name; + /** Index into values[] array. */ + unsigned index; +}; + +/** + * Describes a single field in a message. + */ +struct ProtobufCFieldDescriptor { + /** Name of the field as given in the .proto file. */ + const char *name; + + /** Tag value of the field as given in the .proto file. */ + uint32_t id; + + /** Whether the field is `REQUIRED`, `OPTIONAL`, or `REPEATED`. */ + ProtobufCLabel label; + + /** The type of the field. */ + ProtobufCType type; + + /** + * The offset in bytes of the message's C structure's quantifier field + * (the `has_MEMBER` field for optional members or the `n_MEMBER` field + * for repeated members or the case enum for oneofs). + */ + unsigned quantifier_offset; + + /** + * The offset in bytes into the message's C structure for the member + * itself. + */ + unsigned offset; + + /** + * A type-specific descriptor. + * + * If `type` is `PROTOBUF_C_TYPE_ENUM`, then `descriptor` points to the + * corresponding `ProtobufCEnumDescriptor`. + * + * If `type` is `PROTOBUF_C_TYPE_MESSAGE`, then `descriptor` points to + * the corresponding `ProtobufCMessageDescriptor`. + * + * Otherwise this field is NULL. + */ + const void *descriptor; /* for MESSAGE and ENUM types */ + + /** The default value for this field, if defined. May be NULL. */ + const void *default_value; + + /** + * A flag word. Zero or more of the bits defined in the + * `ProtobufCFieldFlag` enum may be set. + */ + uint32_t flags; + + /** Reserved for future use. */ + unsigned reserved_flags; + /** Reserved for future use. */ + void *reserved2; + /** Reserved for future use. */ + void *reserved3; +}; + +/** + * Helper structure for optimizing int => index lookups in the case + * where the keys are mostly consecutive values, as they presumably are for + * enums and fields. + * + * The data structures requires that the values in the original array are + * sorted. + */ +struct ProtobufCIntRange { + int start_value; + unsigned orig_index; + /* + * NOTE: the number of values in the range can be inferred by looking + * at the next element's orig_index. A dummy element is added to make + * this simple. + */ +}; + +/** + * An instance of a message. + * + * `ProtobufCMessage` is a light-weight "base class" for all messages. + * + * In particular, `ProtobufCMessage` doesn't have any allocation policy + * associated with it. That's because it's common to create `ProtobufCMessage` + * objects on the stack. In fact, that's what we recommend for sending messages. + * If the object is allocated from the stack, you can't really have a memory + * leak. + * + * This means that calls to functions like protobuf_c_message_unpack() which + * return a `ProtobufCMessage` must be paired with a call to a free function, + * like protobuf_c_message_free_unpacked(). + */ +struct ProtobufCMessage { + /** The descriptor for this message type. */ + const ProtobufCMessageDescriptor *descriptor; + /** The number of elements in `unknown_fields`. */ + unsigned n_unknown_fields; + /** The fields that weren't recognized by the parser. */ + ProtobufCMessageUnknownField *unknown_fields; +}; + +/** + * Describes a message. + */ +struct ProtobufCMessageDescriptor { + /** Magic value checked to ensure that the API is used correctly. */ + uint32_t magic; + + /** The qualified name (e.g., "namespace.Type"). */ + const char *name; + /** The unqualified name as given in the .proto file (e.g., "Type"). */ + const char *short_name; + /** Identifier used in generated C code. */ + const char *c_name; + /** The dot-separated namespace. */ + const char *package_name; + + /** + * Size in bytes of the C structure representing an instance of this + * type of message. + */ + size_t sizeof_message; + + /** Number of elements in `fields`. */ + unsigned n_fields; + /** Field descriptors, sorted by tag number. */ + const ProtobufCFieldDescriptor *fields; + /** Used for looking up fields by name. */ + const unsigned *fields_sorted_by_name; + + /** Number of elements in `field_ranges`. */ + unsigned n_field_ranges; + /** Used for looking up fields by id. */ + const ProtobufCIntRange *field_ranges; + + /** Message initialisation function. */ + ProtobufCMessageInit message_init; + + /** Reserved for future use. */ + void *reserved1; + /** Reserved for future use. */ + void *reserved2; + /** Reserved for future use. */ + void *reserved3; +}; + +/** + * An unknown message field. + */ +struct ProtobufCMessageUnknownField { + /** The tag number. */ + uint32_t tag; + /** The wire type of the field. */ + ProtobufCWireType wire_type; + /** Number of bytes in `data`. */ + size_t len; + /** Field data. */ + uint8_t *data; +}; + +/** + * Method descriptor. + */ +struct ProtobufCMethodDescriptor { + /** Method name. */ + const char *name; + /** Input message descriptor. */ + const ProtobufCMessageDescriptor *input; + /** Output message descriptor. */ + const ProtobufCMessageDescriptor *output; +}; + +/** + * Service. + */ +struct ProtobufCService { + /** Service descriptor. */ + const ProtobufCServiceDescriptor *descriptor; + /** Function to invoke the service. */ + void (*invoke)(ProtobufCService *service, unsigned method_index, + const ProtobufCMessage *input, ProtobufCClosure closure, + void *closure_data); + /** Function to destroy the service. */ + void (*destroy)(ProtobufCService *service); +}; + +/** + * Service descriptor. + */ +struct ProtobufCServiceDescriptor { + /** Magic value checked to ensure that the API is used correctly. */ + uint32_t magic; + + /** Service name. */ + const char *name; + /** Short version of service name. */ + const char *short_name; + /** C identifier for the service name. */ + const char *c_name; + /** Package name. */ + const char *package; + /** Number of elements in `methods`. */ + unsigned n_methods; + /** Method descriptors, in the order defined in the .proto file. */ + const ProtobufCMethodDescriptor *methods; + /** Sort index of methods. */ + const unsigned *method_indices_by_name; +}; + +/** + * Get the version of the protobuf-c library. Note that this is the version of + * the library linked against, not the version of the headers compiled against. + * + * \return A string containing the version number of protobuf-c. + */ +PROTOBUF_C__API +const char *protobuf_c_version(void); + +/** + * Get the version of the protobuf-c library. Note that this is the version of + * the library linked against, not the version of the headers compiled against. + * + * \return A 32 bit unsigned integer containing the version number of + * protobuf-c, represented in base-10 as (MAJOR*1E6) + (MINOR*1E3) + PATCH. + */ +PROTOBUF_C__API +uint32_t protobuf_c_version_number(void); + +/** + * The version of the protobuf-c headers, represented as a string using the same + * format as protobuf_c_version(). + */ +#define PROTOBUF_C_VERSION "1.3.0" + +/** + * The version of the protobuf-c headers, represented as an integer using the + * same format as protobuf_c_version_number(). + */ +#define PROTOBUF_C_VERSION_NUMBER 1003000 + +/** + * The minimum protoc-c version which works with the current version of the + * protobuf-c headers. + */ +#define PROTOBUF_C_MIN_COMPILER_VERSION 1000000 + +/** + * Determine the number of bytes required to store the serialised message. + * + * \param message + * The message object to serialise. + * \return + * Number of bytes. + */ +PROTOBUF_C__API +size_t protobuf_c_message_get_packed_size(const ProtobufCMessage *message); + +/** + * Unpack a serialised message into an in-memory representation. + * + * \param descriptor + * The message descriptor. + * \param allocator + * `ProtobufCAllocator` to use for memory allocation. May be NULL to + * specify the default allocator. + * \param len + * Length in bytes of the serialised message. + * \param data + * Pointer to the serialised message. + * \return + * An unpacked message object. + * \retval NULL + * If an error occurred during unpacking. + */ +PROTOBUF_C__API +ProtobufCMessage *protobuf_c_message_unpack( + const ProtobufCMessageDescriptor *descriptor, ProtobufCAllocator *allocator, + size_t len, const uint8_t *data); + +/** + * Free an unpacked message object. + * + * This function should be used to deallocate the memory used by a call to + * protobuf_c_message_unpack(). + * + * \param message + * The message object to free. May be NULL. + * \param allocator + * `ProtobufCAllocator` to use for memory deallocation. May be NULL to + * specify the default allocator. + */ +PROTOBUF_C__API +void protobuf_c_message_free_unpacked(ProtobufCMessage *message, + ProtobufCAllocator *allocator); + +/** + * Check the validity of a message object. + * + * Makes sure all required fields (`PROTOBUF_C_LABEL_REQUIRED`) are present. + * Recursively checks nested messages. + * + * \retval TRUE + * Message is valid. + * \retval FALSE + * Message is invalid. + */ +PROTOBUF_C__API +protobuf_c_boolean protobuf_c_message_check(const ProtobufCMessage *); + +/** Message initialiser. */ +#define PROTOBUF_C_MESSAGE_INIT(descriptor) \ + { descriptor, 0, NULL } + +/** + * Initialise a message object from a message descriptor. + * + * \param descriptor + * Message descriptor. + * \param message + * Allocated block of memory of size `descriptor->sizeof_message`. + */ +PROTOBUF_C__API +void protobuf_c_message_init(const ProtobufCMessageDescriptor *descriptor, + void *message); + +/** + * Initialise a `ProtobufCBufferSimple` object. + */ +#define PROTOBUF_C_BUFFER_SIMPLE_INIT(array_of_bytes) \ + { \ + {protobuf_c_buffer_simple_append}, sizeof(array_of_bytes), 0, \ + (array_of_bytes), 0, NULL \ + } + +/** + * Clear a `ProtobufCBufferSimple` object, freeing any allocated memory. + */ +#define PROTOBUF_C_BUFFER_SIMPLE_CLEAR(simp_buf) \ + do { \ + if ((simp_buf)->must_free_data) { \ + if ((simp_buf)->allocator != NULL) \ + (simp_buf)->allocator->free((simp_buf)->allocator, (simp_buf)->data); \ + else \ + free((simp_buf)->data); \ + } \ + } while (0) + +/** + * The `append` method for `ProtobufCBufferSimple`. + * + * \param buffer + * The buffer object to append to. Must actually be a + * `ProtobufCBufferSimple` object. + * \param len + * Number of bytes in `data`. + * \param data + * Data to append. + */ +PROTOBUF_C__API +void protobuf_c_buffer_simple_append(ProtobufCBuffer *buffer, size_t len, + const unsigned char *data); + +/**@}*/ + +PROTOBUF_C__END_DECLS + +#endif /* PROTOBUF_C_H */ diff --git a/tools/quantification/src/tensor_desc.h b/tools/quantification/src/tensor_desc.h new file mode 100644 index 0000000000000000000000000000000000000000..4eadf341db998ae12939d252d585051ba54c3bf0 --- /dev/null +++ b/tools/quantification/src/tensor_desc.h @@ -0,0 +1,72 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +#include "src/framework.pb-c.h" + +namespace paddle_mobile { +namespace framework { + +enum VarType_Type { + VARTYPE_TYPE_BOOL = 0, + VARTYPE_TYPE_INT16 = 1, + VARTYPE_TYPE_INT32 = 2, + VARTYPE_TYPE_INT64 = 3, + VARTYPE_TYPE_FP16 = 4, + VARTYPE_TYPE_FP32 = 5, + VARTYPE_TYPE_FP64 = 6, + VARTYPE_TYPE_LOD_TENSOR = 7, + VARTYPE_TYPE_SELECTED_ROWS = 8, + VARTYPE_TYPE_FEED_MINIBATCH = 9, + VARTYPE_TYPE_FETCH_LIST = 10, + VARTYPE_TYPE_STEP_SCOPES = 11, + VARTYPE_TYPE_STEP_LOD_RANK_TABLE = 12, + VARTYPE_TYPE_STEP_LOD_TENSOR_ARRAY = 13, + VARTYPE_TYPE_STEP_PLACE_LIST = 14, + VARTYPE_TYPE_READER = 15, + VARTYPE_TYPE_CHANNEL = 16, + VARTYPE_TYPE_RAW = 17, + VARTYPE_TYPE_TUPLE = 18 +}; + +class TensorDesc { + public: + TensorDesc() = default; + TensorDesc(const TensorDesc &desc) { + this->dims_ = desc.dims_; + this->data_type_ = desc.data_type_; + } + + explicit TensorDesc( + PaddleMobile__Framework__Proto__VarType__TensorDesc *desc) { + for (int i = 0; i < desc->n_dims; ++i) { + int64_t d = desc->dims[i]; + dims_.emplace_back(d); + } + data_type_ = (VarType_Type)desc->data_type; + } + + std::vector Dims() const { return dims_; } + VarType_Type DataType() const { return data_type_; } + + private: + std::vector dims_; + VarType_Type data_type_; +}; + +} // namespace framework +} // namespace paddle_mobile diff --git a/tools/quantification/src/var_desc.h b/tools/quantification/src/var_desc.h new file mode 100644 index 0000000000000000000000000000000000000000..0b9c5ac4d672be2dd8a8a2a2695c2816f9cae05a --- /dev/null +++ b/tools/quantification/src/var_desc.h @@ -0,0 +1,80 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +#include "src/framework.pb-c.h" +#include "src/tensor_desc.h" + +namespace paddle_mobile { +namespace framework { + +class VarDesc { + public: + VarDesc(const VarDesc &var_desc) { + this->data_type_ = var_desc.data_type_; + this->name_ = var_desc.name_; + this->persistable_ = var_desc.persistable_; + this->tensor_desc_ = var_desc.tensor_desc_; + this->type_ = var_desc.type_; + } + explicit VarDesc(PaddleMobile__Framework__Proto__VarDesc *desc) { + type_ = (VarType_Type)desc->type->type; + name_ = std::string(desc->name); + persistable_ = static_cast(desc->persistable); + + switch (type_) { + case VARTYPE_TYPE_SELECTED_ROWS: + tensor_desc_ = TensorDesc(desc->type->selected_rows); + break; + case VARTYPE_TYPE_LOD_TENSOR: + tensor_desc_ = TensorDesc(desc->type->lod_tensor->tensor); + break; + case VARTYPE_TYPE_STEP_LOD_TENSOR_ARRAY: + // desc->type->tensor_array->tensor->data_type; + tensor_desc_ = TensorDesc(desc->type->tensor_array->tensor); + + break; + default: + break; + } + switch (type_) { + case VARTYPE_TYPE_CHANNEL: + data_type_ = (VarType_Type)desc->type->channel->data_type; + break; + default: + data_type_ = tensor_desc_.DataType(); + break; + } + } + std::string Name() const { return name_; } + + VarType_Type Type() const { return type_; } + + bool Persistable() const { return persistable_; } + + const TensorDesc &Tensor_desc() const { return tensor_desc_; } + + private: + std::string name_; + bool persistable_; + TensorDesc tensor_desc_; + VarType_Type type_; + VarType_Type data_type_; +}; + +} // namespace framework +} // namespace paddle_mobile diff --git a/tools/toolchains/arm-android-neon.cmake b/tools/toolchains/arm-android-neon.cmake index f2fa600b90fb54886838e953e61c1e940569dee6..5e431059a974810b2fd0481e0942447f57bf1286 100644 --- a/tools/toolchains/arm-android-neon.cmake +++ b/tools/toolchains/arm-android-neon.cmake @@ -1,2 +1,5 @@ set(ANDROID_ARM_NEON ON) -include("${CMAKE_CURRENT_LIST_DIR}/../android-cmake/android.toolchain.cmake") \ No newline at end of file +set(ANDROID_PIE TRUE) +set(ANDROID_STL "c++_static") +set(ANDROID_PLATFORM "android-22") +include("${CMAKE_CURRENT_LIST_DIR}/../android-cmake/android.toolchain.cmake")