diff --git a/.gitignore b/.gitignore index 8f92118b08bb30531869c28d32d335cc47116350..8c4450181d82116620d880c93789dee9dcda9d73 100644 --- a/.gitignore +++ b/.gitignore @@ -92,3 +92,4 @@ metal/images/ metal/paddle-mobile/paddle-mobile/CPU/libpaddle-mobile.a *.xcuserdatad/ */xcuserdata/ +/venv/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 3f9fbcbc18d0bfe1d634dd6815b16a5f1862e846..9268c9a2d1ab3791805c539eb408560bc3aaff26 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,7 @@ set(CMAKE_CXX_FLAGS "-O3 -s -DNDEBUG ${CMAKE_CXX_FLAGS}") if(IS_IOS) set(CMAKE_CXX_FLAGS "-mfpu=neon -marm -fobjc-abi-version=2 -fobjc-arc \ -std=gnu++11 -stdlib=libc++ -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CXX_FLAGS}") + add_compile_options(-fembed-bitcode) else() set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}") endif() diff --git a/README.md b/README.md index de7dd530c94b4a3055cbf07a4a19a55c21457ed0..b86860830066cf1b622ff3b449803b0446794b74 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,8 @@ Paddle-Mobile是PaddlePaddle组织下的项目,是一个致力于嵌入式平 ### 开发文档 开发文档主要是关于编译、运行等问题。做为开发者,它可以和贡献文档共同结合使用。 -[开发文档链接](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/doc/development_doc.md) +* [iOS](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/doc/development_ios.md) +* [Android](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/doc/development_android.md) ### 贡献文档 - [贡献文档链接](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/CONTRIBUTING.md) diff --git a/benchmark/arm_benchmark.md b/benchmark/arm_benchmark.md new file mode 100644 index 0000000000000000000000000000000000000000..280bec16e4baf035eb30138d49b2d31d038aa4c7 --- /dev/null +++ b/benchmark/arm_benchmark.md @@ -0,0 +1,36 @@ +|mobilenet arm v7|1线程|2线程|4线程| +|------------|----|-----|-----| +|麒麟970(ms)|108.180|63.935|37.545| +|麒麟960(ms)|108.588|63.073|36.822| +|高通845(ms)|85.952|48.890|28.641| +|高通835(ms)|105.434|62.752|37.131| +||||| +|mobilenetssd arm v7|1线程|2线程|4线程| +|麒麟970(ms)|212.686|127.205|77.485| +|麒麟960(ms)|212.641|125.338|75.250| +|高通845(ms)|182.863|95.671|56.857| +|高通835(ms)|213.849|127.717|77.006| +||||| +|googlenet(v1) arm v7|1线程|2线程|4线程| +|麒麟970(ms)|335.288|234.559|161.295| +|麒麟960(ms)|354.443|232.642|157.815| +|高通845(ms)|282.007|173.146|122.148| +|高通835(ms)|341.250|233.354|158.554| +||||| +|squeezenet arm v7|1线程|2线程|4线程| +|麒麟970(ms)|83.726|57.944|36.923| +|麒麟960(ms)|85.835|55.762|36.496| +|高通845(ms)|71.301|41.618|28.785| +|高通835(ms)|82.407|56.176|36.455| +||||| +|yolo arm v7|1线程|2线程|4线程| +|麒麟970(ms)|129.658|79.993|49.969| +|麒麟960(ms)|130.208|78.791|48.390| +|高通845(ms)|109.244|61.736|40.600| +|高通835(ms)|130.402|80.863|50.359| + + 测试机型信息: + 麒麟970:荣耀v10 (2.36GHz * 4 + 1.8GHz * 4) + 麒麟960:华为mate9 (2.36GHz * 4 + 1.8GHz * 4) + 骁龙835:小米6 (2.45GHz * 4 + 1.9GHz * 4) + 骁龙845:OPPO FindX (2.80GHz * 4 + 1.8GHz * 4) \ No newline at end of file diff --git a/benchmark/metal_benchmark.md b/benchmark/metal_benchmark.md new file mode 100644 index 0000000000000000000000000000000000000000..e3e5d0750f72fc395c402d516aa9fee02a0fcd7f --- /dev/null +++ b/benchmark/metal_benchmark.md @@ -0,0 +1,10 @@ +|mobilenetfssd|速度| +|------------|-----| +|A9(ms)|33.78| +|A10(ms)|24.05| +|A11(ms)|17.15| +||| +|genet|速度| +|A9(ms) |3.49| +|A10(ms)|2.54| +|A11(ms)|1.43| \ No newline at end of file diff --git a/doc/design_doc.md b/doc/design_doc.md index bf5f78e8d805465418cad8989945f2afa7ab5587..70292c6b0bd617930a9c9458b87cef34dee3347e 100644 --- a/doc/design_doc.md +++ b/doc/design_doc.md @@ -3,7 +3,7 @@ #### 以下是 paddle-mobile 代码的执行流程图: -![执行流程图](http://otkwwi4x8.bkt.clouddn.com/2018-07-02-15305189473720.png) +![执行流程图](http://mms-graph.bj.bcebos.com/paddle-mobile/git_images/flow_chart.png) #### 主要分为: Loader 模块、 Program 模块、 Executor 模块、 op 模块、 kernel 模块、scope variable Tensor 模块 @@ -14,12 +14,12 @@ 先来看一下模型, 模型分为两种结构: 一种为参数文件是散开的, 如下图, 红框为模型结构的 protobuf 文件, 其余为参数文件 -![模型描述](http://otkwwi4x8.bkt.clouddn.com/2018-07-02-15305190629577.png) +![模型描述](http://mms-graph.bj.bcebos.com/paddle-mobile/git_images/model_desc.png) 另一种为参数文件结合在一起的, 如下图, 红框内为模型结构描述的 protobuf 文件, 另一个文件为结合在一起的参数文件 -![模型描述combined](http://otkwwi4x8.bkt.clouddn.com/2018-07-02-15305191057130.png) +![模型描述combined](http://mms-graph.bj.bcebos.com/paddle-mobile/git_images/model_desc_combined.png) loader 模块的作用是将模型结构信息 load 进内存, 将红框内的 protobuf 文件 load 进内存, 并对模型结构进行优化(如将几个细粒度的 op 融合成 粗粒度的 op, 如将 conv、 add、 batchnorm、 relu 融合为 conv\_add\_batchnorm\_relu). @@ -161,7 +161,7 @@ sh build.sh android yolo ### 五. kernel kernel 为 op 的底层运算实现, 主要有两个函数, Init 和 Compute, 分别用来初始化、预处理 和 运算操作, 值得提出的是, kernel 会根据泛型特化到不同的平台, 如图所示: -![设备特化]![](http://otkwwi4x8.bkt.clouddn.com/2018-07-02-15305191401976.png) +![设备特化](http://mms-graph.bj.bcebos.com/paddle-mobile/git_images/devices.png) 不同平台的 kernel 实现, 为同一个 kernel 类不同泛型的特化实现, 目前有三个平台, arm、mali、fpga, 图中的 central-arm-func\ 目录为 op kernel 的 arm 实现, 它承担了 arm\ 目录下 kernel 的底层实现, 同时 arm 处理器作为中央处理器, central-arm-func\ 也可以作为其他协处理器的底层实现, 如: fpga 的某一个 op kernel 还没有 fpga 协处理器的实现, 就可以直接调用使用这里的 arm 实现. diff --git a/doc/development_doc.md b/doc/development_android.md similarity index 79% rename from doc/development_doc.md rename to doc/development_android.md index 3f45f956f00e78c23b60b4c108b8c90cf4065e04..528d7aa2def78103b8dbdcf0329279f029c85cac 100644 --- a/doc/development_doc.md +++ b/doc/development_android.md @@ -1,74 +1,3 @@ -### iOS&Android开发文档 - -# iOS开发文档 - -## 编译 - -```sh - -# 在 paddle-mobile 目录下: -cd tools - -sh build.sh ios - -# 如果只想编译某个特定模型的 op, 则需执行以下命令 -sh build.sh ios googlenet - -# 在这个文件夹下, 你可以拿到生成的 .a 库 -cd ../build/release/ios/build - -``` -#### 常见问题: - -1. No iOS SDK's found in default search path ... - - 这个问题是因为 tools/ios-cmake/ios.toolchain.cmake 找不到你最近使用的 iOS SDK 路径, 所以需要自己进行指定, - 以我当前的环境为例: 在 tools/ios-cmake/ios.toolchain.cmake 143行前添加我本地的 iOS SDK 路径: set(CMAKE_IOS_SDK_ROOT "/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk") - -## 集成 - -``` -将上一步生成的: -libpaddle-mobile.a - -/src/ios_io/ 下的 -PaddleMobile.h -``` -拖入工程 - -#### oc 接口 - -接口如下: - -``` -/* - 创建对象 -*/ -- (instancetype)init; - -/* - load 模型, 开辟内存 -*/ -- (BOOL)load:(NSString *)modelPath andWeightsPath:(NSString *)weighsPath; - -/* - 进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict -*/ -- (NSArray *)predict:(CGImageRef)image dim:(NSArray *)dim means:(NSArray *)means scale:(float)scale; - -/* - 进行预测 -*/ -- (NSArray *)predict:(CGImageRef)image dim:(NSArray *)dim; - -/* - 清理内存 -*/ -- (void)clear; - -``` - - # Android开发文档 用户可通过如下两种方式,交叉编译Android平台上适用的paddle-mobile库: diff --git a/doc/development_arm_linux.md b/doc/development_arm_linux.md new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/doc/development_ios.md b/doc/development_ios.md new file mode 100644 index 0000000000000000000000000000000000000000..1d4f28bd5bcde1c3068ddeae87627ae6686d886a --- /dev/null +++ b/doc/development_ios.md @@ -0,0 +1,85 @@ +# iOS开发文档 + +## CPU + +需要: xcode + +### 编译 + +```sh + +# 在 paddle-mobile 目录下: +cd tools + +sh build.sh ios + +# 如果只想编译某个特定模型的 op, 则需执行以下命令 +sh build.sh ios googlenet + +# 在这个文件夹下, 你可以拿到生成的 .a 库 +cd ../build/release/ios/build + +``` +#### 常见问题: + +1. No iOS SDK's found in default search path ... + + 这个问题是因为 tools/ios-cmake/ios.toolchain.cmake 找不到你最近使用的 iOS SDK 路径, 所以需要自己进行指定, + 以我当前的环境为例: 在 tools/ios-cmake/ios.toolchain.cmake 143行前添加我本地的 iOS SDK 路径: set(CMAKE_IOS_SDK_ROOT "/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk") + +### 集成 + +``` +将上一步生成的: +libpaddle-mobile.a + +/src/ios_io/ 下的 +PaddleMobile.h +``` +拖入工程 + +#### oc 接口 + +接口如下: + +``` +/* + 创建对象 +*/ +- (instancetype)init; + +/* + load 模型, 开辟内存 +*/ +- (BOOL)load:(NSString *)modelPath andWeightsPath:(NSString *)weighsPath; + +/* + 进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict +*/ +- (NSArray *)predict:(CGImageRef)image dim:(NSArray *)dim means:(NSArray *)means scale:(float)scale; + +/* + 进行预测 +*/ +- (NSArray *)predict:(CGImageRef)image dim:(NSArray *)dim; + +/* + 清理内存 +*/ +- (void)clear; + +``` + +## GPU + +需要: xcode、cocoapods + +``` +# 在 paddle-mobile 目录下: +cd metal + +pod install + +open paddle-mobile.xcworkspace + +``` diff --git a/doc/images/devices.png b/doc/images/devices.png deleted file mode 100644 index 413d32c249972ee96f678d50a5cd0b36a2a03e29..0000000000000000000000000000000000000000 Binary files a/doc/images/devices.png and /dev/null differ diff --git a/doc/images/flow_chart.png b/doc/images/flow_chart.png deleted file mode 100644 index c747230da43e2e688d7460704268631758d34596..0000000000000000000000000000000000000000 Binary files a/doc/images/flow_chart.png and /dev/null differ diff --git a/doc/images/model_desc.png b/doc/images/model_desc.png deleted file mode 100644 index 3c026b6192c8e1d84b3a82c3db91e022f35358c2..0000000000000000000000000000000000000000 Binary files a/doc/images/model_desc.png and /dev/null differ diff --git a/doc/images/model_desc_combined.png b/doc/images/model_desc_combined.png deleted file mode 100644 index 38e7388efcfdcad53f4e80ce0ac5d3b993eb986c..0000000000000000000000000000000000000000 Binary files a/doc/images/model_desc_combined.png and /dev/null differ diff --git a/metal/README.md b/metal/README.md index 90c517a2c10c28a9fcf26357e65ce2178a2fd8ac..2da6558b05b051b8b476f259d49fa3845e397b29 100644 --- a/metal/README.md +++ b/metal/README.md @@ -1,3 +1,12 @@ ## Paddle-Mobile -This folder is used to develop metal version for ios gpu +需要: xcode、 cocoapods + +``` +pod install + +open paddle-mobile.xcworkspace + +``` + +Demo 所需依赖的模型可在[这里](http://mms-graph.bj.bcebos.com/paddle-mobile%2FmodelsAndImages.zip)下载 diff --git a/src/common/types.cpp b/src/common/types.cpp index 18b143a974d7bee7a79b9b14233b30a497882b94..46e5bfab3711ac81f5438cb21105843f52183e15 100644 --- a/src/common/types.cpp +++ b/src/common/types.cpp @@ -62,6 +62,8 @@ const char *G_OP_TYPE_CRF = "crf_decoding"; const char *G_OP_TYPE_BILINEAR_INTERP = "bilinear_interp"; const char *G_OP_TYPE_FLATTEN = "flatten"; const char *G_OP_TYPE_SHAPE = "shape"; +const char *G_OP_TYPE_ELEMENTWISE_MUL = "elementwise_mul"; +const char *G_OP_TYPE_SUM = "sum"; const char *G_OP_TYPE_QUANTIZE = "quantize"; const char *G_OP_TYPE_DEQUANTIZE = "dequantize"; @@ -115,7 +117,8 @@ std::unordered_map< {G_OP_TYPE_FLATTEN, {{"X"}, {"Out"}}}, {G_OP_TYPE_SHAPE, {{"Input"}, {"Out"}}}, {G_OP_TYPE_CONV_TRANSPOSE, {{"Input"}, {"Output"}}}, + {G_OP_TYPE_SUM, {{"X"}, {"Out"}}}, + {G_OP_TYPE_ELEMENTWISE_MUL, {{"X", "Y"}, {"Out"}}}, {G_OP_TYPE_QUANTIZE, {{"X"}, {"Out", "OutScale"}}}, {G_OP_TYPE_DEQUANTIZE, {{"X", "Scale"}, {"Out"}}}}; - } // namespace paddle_mobile diff --git a/src/common/types.h b/src/common/types.h index ec2e3ea2f2c818ca6ea7634ac1c564bbca492a34..0855bd053f0dc804b6f3289796f3818657675864 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -126,6 +126,8 @@ extern const char *G_OP_TYPE_REGION; extern const char *G_OP_TYPE_FUSION_CONV_BN; extern const char *G_OP_TYPE_CONV_TRANSPOSE; extern const char *G_OP_TYPE_PRELU; +extern const char *G_OP_TYPE_SUM; +extern const char *G_OP_TYPE_ELEMENTWISE_MUL; extern const char *G_OP_TYPE_QUANTIZE; extern const char *G_OP_TYPE_DEQUANTIZE; diff --git a/src/common/variant.h b/src/common/variant.h index 8ec9ccb7a92acb06417a74d9ebe95189ac9e547f..ca2fcc090769bc49603176dc361d5f8c8e22890c 100644 --- a/src/common/variant.h +++ b/src/common/variant.h @@ -79,13 +79,13 @@ struct Variant { template void Set(Args &&... args) { - helper::Destroy(type_id, &data.data); - new (&data.data) T(std::forward(args)...); + helper::Destroy(type_id, data.data); + new (data.data) T(std::forward(args)...); type_id = typeid(T).hash_code(); } void SetString(std::string &string) { - // helper::Destroy(type_id, &data); + helper::Destroy(type_id, data.data); type_id = typeid(std::string).hash_code(); strcpy(data.data, string.c_str()); } @@ -109,7 +109,7 @@ struct Variant { "stl lib with string copy)"); exit(0); } else if (type_id == typeid(T).hash_code()) { - return *const_cast(reinterpret_cast(&data)); + return *const_cast(reinterpret_cast(data.data)); } else { PADDLE_MOBILE_THROW_EXCEPTION(" bad cast in variant"); exit(0); @@ -122,7 +122,8 @@ struct Variant { static inline size_t invalid_type() { return typeid(void).hash_code(); } typedef VariantHelper helper; size_t type_id; - RawData data; + // todo use an anto size to suite this. + RawData<64> data; }; template diff --git a/src/framework/mixed_vector.h b/src/framework/mixed_vector.h new file mode 100644 index 0000000000000000000000000000000000000000..031d73179c991229ec99ebdde927b0ad1532d82b --- /dev/null +++ b/src/framework/mixed_vector.h @@ -0,0 +1,272 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include +#include + +#include "framework/tensor.h" +#include "framework/tensor_util.h" + +namespace paddle_mobile { +namespace framework { + +// Vector implements the std::vector interface, and can get Data or +// MutableData from any place. The data will be synced implicitly inside. +template +class Vector { + public: + using value_type = T; + // Default ctor. Create empty Vector + Vector() { InitEmpty(); } + + // Fill vector with value. The vector size is `count`. + explicit Vector(size_t count, const T& value = T()) { + InitEmpty(); + if (count != 0) { + resize(count); + T* ptr = begin(); + for (size_t i = 0; i < count; ++i) { + ptr[i] = value; + } + } + } + + // Ctor with init_list + Vector(std::initializer_list init) { + if (init.size() == 0) { + InitEmpty(); + } else { + InitByIter(init.size(), init.begin(), init.end()); + } + } + + // implicit cast from std::vector. + template + Vector(const std::vector& dat) { // NOLINT + if (dat.size() == 0) { + InitEmpty(); + } else { + InitByIter(dat.size(), dat.begin(), dat.end()); + } + } + + // Copy ctor + Vector(const Vector& other) { this->operator=(other); } + + // Copy operator + Vector& operator=(const Vector& other) { + if (other.size() != 0) { + this->InitByIter(other.size(), other.begin(), other.end()); + } else { + InitEmpty(); + } + return *this; + } + + // Move ctor + Vector(Vector&& other) { + this->size_ = other.size_; + this->flag_ = other.flag_; + if (other.cuda_vec_.memory_size()) { + this->cuda_vec_.ShareDataWith(other.cuda_vec_); + } + if (other.cpu_vec_.memory_size()) { + this->cpu_vec_.ShareDataWith(other.cpu_vec_); + } + } + + // CPU data access method. Mutable. + T& operator[](size_t i) { + MutableCPU(); + return const_cast(cpu_vec_.data())[i]; + } + + // CPU data access method. Immutable. + const T& operator[](size_t i) const { + // ImmutableCPU(); + return cpu_vec_.data()[i]; + } + + // std::vector iterator methods. Based on CPU data access method + size_t size() const { return size_; } + + T* begin() { return capacity() == 0 ? &EmptyDummy() : &this->operator[](0); } + + T* end() { + return capacity() == 0 ? &EmptyDummy() : &this->operator[](size()); + } + + T& front() { return *begin(); } + + T& back() { + auto it = end(); + --it; + return *it; + } + + const T* begin() const { + return capacity() == 0 ? &EmptyDummy() : &this->operator[](0); + } + + const T* end() const { + return capacity() == 0 ? &EmptyDummy() : &this->operator[](size()); + } + + const T* cbegin() const { return begin(); } + + const T* cend() const { return end(); } + + const T& back() const { + auto it = end(); + --it; + return *it; + } + + T* data() { return begin(); } + + const T* data() const { return begin(); } + + const T& front() const { return *begin(); } + // end of std::vector iterator methods + + // assign this from iterator. + // NOTE: the iterator must support `end-begin` + template + void assign(Iter begin, Iter end) { + InitByIter(end - begin, begin, end); + } + + // push_back. If the previous capacity is not enough, the memory will + // double. + void push_back(T elem) { + if (size_ + 1 > capacity()) { + reserve((size_ + 1) << 1); + } + *end() = elem; + ++size_; + } + + // extend a vector by iterator. + // NOTE: the iterator must support end-begin + template + void Extend(It begin, It end) { + size_t pre_size = size_; + resize(pre_size + (end - begin)); + T* ptr = this->begin() + pre_size; + for (; begin < end; ++begin, ++ptr) { + *ptr = *begin; + } + } + + // resize the vector + void resize(size_t size) { + if (size + 1 <= capacity()) { + size_ = size; + } else { + MutableCPU(); + Tensor cpu_tensor; + T* ptr = cpu_tensor.mutable_data( + framework::make_ddim({static_cast(size)})); + const T* old_ptr = + cpu_vec_.memory_size() == 0 ? nullptr : cpu_vec_.data(); + if (old_ptr != nullptr) { + std::copy(old_ptr, old_ptr + size_, ptr); + } + size_ = size; + cpu_vec_.ShareDataWith(cpu_tensor); + } + } + + // clear + void clear() { + size_ = 0; + flag_ = kDirty | kDataInCPU; + } + + size_t capacity() const { + return cpu_vec_.memory_size() / SizeOfType(typeid(T)); + } + + // reserve data + void reserve(size_t size) { + size_t pre_size = size_; + resize(size); + resize(pre_size); + } + + // implicit cast operator. Vector can be cast to std::vector implicitly. + operator std::vector() const { + std::vector result; + result.resize(size()); + std::copy(begin(), end(), result.begin()); + return result; + } + + bool operator==(const Vector& other) const { + if (size() != other.size()) return false; + auto it1 = cbegin(); + auto it2 = other.cbegin(); + for (; it1 < cend(); ++it1, ++it2) { + if (*it1 != *it2) { + return false; + } + } + return true; + } + + private: + void InitEmpty() { + size_ = 0; + flag_ = kDataInCPU; + } + + template + void InitByIter(size_t size, Iter begin, Iter end) { + T* ptr = this->cpu_vec_.template mutable_data( + framework::make_ddim({static_cast(size)})); + for (size_t i = 0; i < size; ++i) { + *ptr++ = *begin++; + } + flag_ = kDataInCPU | kDirty; + size_ = size; + } + + enum DataFlag { + kDataInCPU = 0x01, + kDataInCUDA = 0x02, + // kDirty means the data has been changed in one device. + kDirty = 0x10 + }; + + void MutableCPU() { flag_ = kDirty | kDataInCPU; } + + void UnsetFlag(int flag) const { flag_ &= ~flag; } + void SetFlag(int flag) const { flag_ |= flag; } + + static T& EmptyDummy() { + static T dummy = T(); + return dummy; + } + + mutable int flag_; + mutable Tensor cpu_vec_; + mutable Tensor cuda_vec_; + size_t size_; +}; + +} // namespace framework +} // namespace paddle_mobile diff --git a/src/framework/selected_rows.cpp b/src/framework/selected_rows.cpp new file mode 100644 index 0000000000000000000000000000000000000000..96e72051e5bf882c3549fb94cd8119ffc4fdfb9c --- /dev/null +++ b/src/framework/selected_rows.cpp @@ -0,0 +1,127 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "framework/selected_rows.h" + +namespace paddle_mobile { +namespace framework { + +struct ReAllocateVisitor { + ReAllocateVisitor(framework::Tensor* tensor, const framework::DDim& dims) + : tensor_(tensor), dims_(dims) {} + + template + void operator()() const { + framework::Tensor cpu_tensor; + T* ptr = cpu_tensor.mutable_data(dims_); + const T* old_ptr = + tensor_->memory_size() == 0 ? nullptr : tensor_->data(); + if (old_ptr != nullptr) { + std::copy(old_ptr, old_ptr + tensor_->numel(), ptr); + } + tensor_->ShareDataWith(cpu_tensor); + } + + framework::Tensor* tensor_; + framework::DDim dims_; +}; +// TensorCopyVisitor(value, i * value_width, *value_.get(), +// index * value_width, value_width)); +struct TensorCopyVisitor { + TensorCopyVisitor(framework::Tensor* dst, int64_t dst_offset, + const framework::Tensor src, int64_t src_offset, + int64_t size) + : dst_(dst), + dst_offset_(dst_offset), + src_(src), + src_offset_(src_offset), + size_(size) {} + + template + void operator()() const { + // TODO(Yancey1989): support other place + memory::Copy(dst_->mutable_data() + dst_offset_, + src_.data() + src_offset_, size_ * sizeof(T)); + } + + framework::Tensor* dst_; + int64_t dst_offset_; + framework::Tensor src_; + int64_t src_offset_; + int64_t size_; +}; + +bool SelectedRows::HasKey(int64_t key) const { + return std::find(rows_.begin(), rows_.end(), key) == rows_.end() ? false + : true; +} + +// std::vector SelectedRows::Get(std::vector keys, +// framework::Tensor* value) const { +// PADDLE_MOBILE_ENFORCE(value->IsInitialized(), +// "The value tensor should be initialized."); +// std::vector non_keys; +// int64_t value_width = value_->numel() / value_->dims()[0]; +// PADDLE_MOBILE_ENFORCE(value_width == value->numel() / value->dims()[0], +// "output tensor should have the same shape with table " +// "execpt the dims[0]."); +// +// for (size_t i = 0; i < keys.size(); ++i) { +// int64_t index = Index(keys[i]); +// if (index == -1) { +// non_keys.push_back(keys[i]); +// } else { +// framework::VisitDataType( +// framework::ToDataType(value_->type()), +// TensorCopyVisitor(value, i * value_width, *value_.get(), +// index * value_width, value_width)); +// } +// } +// return non_keys; +//} + +// bool SelectedRows::Set(int64_t key, const framework::Tensor& value) { +// PADDLE_MOBILE_ENFORCE(value.IsInitialized(), "The value should be +// initialized."); if (value_->IsInitialized()) { +// PADDLE_MOBILE_ENFORCE( +// value.type() == value_->type(), +// "The type of the value should be same with the original value"); +// } +// PADDLE_MOBILE_ENFORCE(value.dims()[0] == static_cast(1), +// "The first dim of value should be 1."); +// auto index = Index(key); +// bool is_new_key = false; +// if (index == -1) { +// rows_.push_back(key); +// index = rows_.size() - 1; +// is_new_key = true; +// // whether need to resize the table +// if (static_cast(rows_.size()) > value_->dims()[0]) { +// auto dims = value_->dims(); +// dims[0] = (dims[0] + 1) << 1; +// framework::VisitDataType(framework::ToDataType(value.type()), +// ReAllocateVisitor(value_.get(), dims)); +// } +// } +// +// framework::VisitDataType( +// framework::ToDataType(value.type()), +// TensorCopyVisitor(value_.get(), +// index * value_->numel() / value_->dims()[0], value, +// static_cast(0), value.numel())); +// return is_new_key; +//} + +} // namespace framework +} // namespace paddle_mobile diff --git a/src/framework/selected_rows.h b/src/framework/selected_rows.h new file mode 100644 index 0000000000000000000000000000000000000000..9c8176285278afa69679ac3471f7a4adb0aeea3f --- /dev/null +++ b/src/framework/selected_rows.h @@ -0,0 +1,138 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include + +#include "framework/lod_tensor.h" +#include "framework/tensor.h" +#include "memory/t_malloc.h" +#include "mixed_vector.h" + +namespace paddle_mobile { +namespace framework { + +class SelectedRows { + /* + * @brief We can use the SelectedRows structure to reproduce a sparse table. + * A sparse table is a key-value structure that the key is an `int64_t` + * number, + * and the value is a Tensor which the first dimension is 0. + * You can use the following interface to operate the sparse table, and you + * can find + * some detail information from the comments of each interface: + * + * HasKey(key), whether the sparse table has the specified key. + * Set(key, value), set a key-value pair into the sparse table. + * Get(keys, value*), get value by given key list and apply it to the given + * value pointer + * with the specified offset. + * + */ + public: + SelectedRows(const std::vector& rows, const int64_t& height) + : rows_(rows), height_(height) { + value_.reset(new Tensor()); + } + + SelectedRows() { + height_ = 0; + value_.reset(new Tensor()); + } + + // platform::Place place() const { return value_->place(); } + + const Tensor& value() const { return *value_; } + + Tensor* mutable_value() { return value_.get(); } + + int64_t height() const { return height_; } + + void set_height(int64_t height) { height_ = height; } + + const Vector& rows() const { return rows_; } + + Vector* mutable_rows() { return &rows_; } + + void set_rows(const Vector& rows) { rows_ = rows; } + + /* + * @brief wheter has the specified key in the table. + * + * @return true if the key is exists. + */ + bool HasKey(int64_t key) const; + + /* + * @brief Get value by the key list, if the + * + * @return a list of keys which does not exists in table + */ + std::vector Get(std::vector keys, + framework::Tensor* tensor) const; + + /* + * @brief Set a key-value pair into the table. + * This function will double the value memory if it's not engouth. + * + * @note: + * 1. The first dim of the value should be 1 + * 2. The value should be initialized and the data type + * should be the same with the table. + * + * @return true if the key is a new one, otherwise false + * + */ + bool Set(int64_t key, const Tensor& value); + + /* + * @brief Get the index of key in rows + * + * @return -1 if the key does not exists. + */ + int64_t Index(int64_t key) const { + auto it = std::find(rows_.begin(), rows_.end(), key); + if (it == rows_.end()) { + return static_cast(-1); + } + return static_cast(std::distance(rows_.begin(), it)); + } + + DDim GetCompleteDims() const { + std::vector dims = vectorize(value_->dims()); + dims[0] = height_; + return make_ddim(dims); + } + + private: + // Notice: rows can be duplicate. We can have {0, 4, 7, 0, 5, 7, 9} here. + // SelectedRows are simply concated when adding together. Until a + // SelectedRows add a Tensor, will the duplicate rows be handled. + Vector rows_; + std::unique_ptr value_{nullptr}; + int64_t height_; +}; + +/* + * Serialize/Desiralize SelectedRows to std::ostream + * You can pass ofstream or ostringstream to serilize to file + * or to a in memory string. GPU tensor will be copied to CPU. + */ +void SerializeToStream(std::ostream& os, const SelectedRows& selected_rows); +void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows); + +} // namespace framework +} // namespace paddle_mobile diff --git a/src/operators/elementwise_mul_op.cpp b/src/operators/elementwise_mul_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..920a9a546f5ea6d5ef4f41de361ba43cb9c1a7b1 --- /dev/null +++ b/src/operators/elementwise_mul_op.cpp @@ -0,0 +1,41 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef ELEMENTWISEMUL_OP + +#include "elementwise_mul_op.h" + +namespace paddle_mobile { +namespace operators { + +template +void ElementwiseMulOp::InferShape() const { + auto x_dim = this->param_.InputX()->dims(); + this->param_.Out()->Resize(x_dim); +} + +} // namespace operators +} // namespace paddle_mobile + +namespace ops = paddle_mobile::operators; +#ifdef PADDLE_MOBILE_CPU +REGISTER_OPERATOR_CPU(elementwise_mul, ops::ElementwiseMulOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +REGISTER_OPERATOR_MALI_GPU(elementwise_mul, ops::ElementwiseMulOp); +#endif +#ifdef PADDLE_MOBILE_FPGA +#endif + +#endif diff --git a/src/operators/elementwise_mul_op.h b/src/operators/elementwise_mul_op.h new file mode 100644 index 0000000000000000000000000000000000000000..991b03a486d65c720b88b80a1aece417b9919d3d --- /dev/null +++ b/src/operators/elementwise_mul_op.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef ELEMENTWISEMUL_OP + +#pragma once + +#include +#include "framework/operator.h" +#include "kernel/elementwise_mul_kernel.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { +using std::string; +template +class ElementwiseMulOp : public framework::OperatorWithKernel< + DeviceType, ElementwiseMulParam, + operators::ElementwiseMulKernel> { + public: + ElementwiseMulOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, + const framework::AttributeMap &attrs, + std::shared_ptr scope) + : framework::OperatorWithKernel< + DeviceType, ElementwiseMulParam, + operators::ElementwiseMulKernel>( + type, inputs, outputs, attrs, scope) {} + + using framework::OperatorWithKernel< + DeviceType, ElementwiseMulParam, + operators::ElementwiseMulKernel>::OperatorWithKernel; + void InferShape() const override; + + protected: +}; +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/arm/elementwise_mul_kernel.cpp b/src/operators/kernel/arm/elementwise_mul_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..00205952a2567aae5927e318c494c90bc4a5ffbb --- /dev/null +++ b/src/operators/kernel/arm/elementwise_mul_kernel.cpp @@ -0,0 +1,38 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef ELEMENTWISEMUL_OP + +#include "operators/kernel/elementwise_mul_kernel.h" +#include "operators/kernel/central-arm-func/elementwise_mul_arm_func.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool ElementwiseMulKernel::Init(ElementwiseMulParam *param) { + return true; +} + +template <> +void ElementwiseMulKernel::Compute( + const ElementwiseMulParam ¶m) const { + ElementwiseMulCompute(param); + param.Out()->set_lod(param.InputX()->lod()); +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/arm/sum_kernel.cpp b/src/operators/kernel/arm/sum_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0290037522a2bf3b3c88ce129eda277a401fecb5 --- /dev/null +++ b/src/operators/kernel/arm/sum_kernel.cpp @@ -0,0 +1,37 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef SUM_OP + +#include "operators/kernel/sum_kernel.h" +#include "operators/kernel/central-arm-func/sum_arm_func.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool SumKernel::Init(SumParam *param) { + return true; +} + +template <> +void SumKernel::Compute(const SumParam ¶m) const { + SumCompute(param); + param.Out()->set_lod(param.Inputs()[0]->lod()); +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/central-arm-func/elementwise_mul_arm_func.h b/src/operators/kernel/central-arm-func/elementwise_mul_arm_func.h new file mode 100644 index 0000000000000000000000000000000000000000..0aed7ff8d4f7abbe64de288e4f22d3b691a23bbc --- /dev/null +++ b/src/operators/kernel/central-arm-func/elementwise_mul_arm_func.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef ELEMENTWISEMUL_OP + +#pragma once +#include "operators/math/elementwise_op_function.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +template +struct MulFunctor { + inline T operator()(T a, T b) const { return a * b; } +}; + +template +void ElementwiseMulCompute(const ElementwiseMulParam ¶m) { + const Tensor *input_x = param.InputX(); + const Tensor *input_y = param.InputY(); + Tensor *Out = param.Out(); + Out->mutable_data(); + int axis = param.Axis(); + ElementwiseComputeEx, float>(input_x, input_y, axis, + MulFunctor(), Out); +} + +template class ElementwiseMulKernel; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/central-arm-func/sum_arm_func.h b/src/operators/kernel/central-arm-func/sum_arm_func.h new file mode 100644 index 0000000000000000000000000000000000000000..0319f2b23418f36670ca51993e97726879f12ec1 --- /dev/null +++ b/src/operators/kernel/central-arm-func/sum_arm_func.h @@ -0,0 +1,166 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef SUM_OP +#pragma once + +#include "operators/math/selected_rows_functor.h" + +namespace paddle_mobile { +namespace operators { +using LoDTensorArray = std::vector; +template +void SumCompute(const SumParam ¶m) { + auto inputsvars = param.InputsVars(); + int N = inputsvars.size(); + auto *outvar = param.OutVar(); + + bool in_place = outvar == inputsvars[0]; + DLOG << "11:"; + if (outvar->IsType()) { + auto *out = outvar->GetMutable(); + if (!in_place) { + out->mutable_data(); + } + DLOG << "1:"; + auto *outptr = out->data(); + // auto result = Flatten(*out); + + if (!in_place) { + std::fill(out->data(), out->data() + out->numel(), 0); + } + math::SelectedRowsAddToTensor functor; + for (int i = in_place ? 1 : 0; i < N; i++) { + if (inputsvars[i]->IsType()) { + auto *in_t = inputsvars[i]->Get(); + auto *inptr = in_t->data(); + if (in_t->numel() == 0) { + continue; + } + for (int j = 0; j < out->numel(); ++j) { + outptr[j] = outptr[j] + inptr[j]; + } + + } else if (inputsvars[i]->IsType()) { + auto *in_t = inputsvars[i]->Get(); + functor(*in_t, out); + } else { + PADDLE_MOBILE_THROW_EXCEPTION( + "Variable type must be LoDTensor/SelectedRows."); + } + } + + } else if (outvar->IsType()) { + DLOG << "2:"; + std::unique_ptr in0; + if (in_place) { + // If is in_place, we store the input[0] to in0 + auto *in_sel0 = inputsvars[0]->Get(); + auto &rows = in_sel0->rows(); + //#ifdef PADDLE_WITH_CUDA + // std::vector rows_in_cpu; + // rows_in_cpu.reserve(rows.size()); + // for (auto item : rows) { + // rows_in_cpu.push_back(item); + // } + // in0.reset(new framework::SelectedRows(rows_in_cpu, + // in_sel0.height())); + //#else + in0.reset(new framework::SelectedRows(rows, in_sel0->height())); + //#endif + in0->mutable_value()->ShareDataWith(in_sel0->value()); + } + + auto get_selected_row = [&](size_t i) -> const SelectedRows & { + if (i == 0 && in0) { + return *in0.get(); + } else { + return *(inputsvars[i]->Get()); + } + }; + + auto *out = outvar->GetMutable(); + out->mutable_rows()->clear(); + auto *out_value = out->mutable_value(); + + // Runtime InferShape + size_t first_dim = 0; + for (int i = 0; i < N; i++) { + auto &sel_row = get_selected_row(i); + first_dim += sel_row.rows().size(); + } + auto in_dim = framework::vectorize(get_selected_row(N - 1).value().dims()); + in_dim[0] = static_cast(first_dim); + + out_value->Resize(framework::make_ddim(in_dim)); + + // if all the input sparse vars are empty, no need to + // merge these vars. + if (first_dim == 0UL) { + return; + } + out_value->mutable_data(); + math::SelectedRowsAddTo functor; + + int64_t offset = 0; + for (int i = 0; i < N; i++) { + auto &sel_row = get_selected_row(i); + if (sel_row.rows().size() == 0) { + continue; + } + PADDLE_MOBILE_ENFORCE(out->height() == sel_row.height()); + functor(sel_row, offset, out); + offset += sel_row.value().numel(); + } + } else if (outvar->IsType()) { + DLOG << "3:"; + auto &out_array = *outvar->GetMutable(); + for (size_t i = in_place ? 1 : 0; i < inputsvars.size(); ++i) { + PADDLE_MOBILE_ENFORCE(inputsvars[i]->IsType(), + "Only support all inputs are TensorArray"); + auto *in_array = inputsvars[i]->Get(); + + for (size_t i = 0; i < in_array->size(); ++i) { + if ((*in_array)[i].numel() != 0) { + if (i >= out_array.size()) { + out_array.resize(i + 1); + } + if (out_array[i].numel() == 0) { + framework::TensorCopy((*in_array)[i], &out_array[i]); + out_array[i].set_lod((*in_array)[i].lod()); + } else { + PADDLE_MOBILE_ENFORCE(out_array[i].lod() == (*in_array)[i].lod()); + auto *inptr = (*in_array)[i].data(); + auto *outptr = out_array[i].data(); + + for (int j = 0; j < (*in_array)[i].numel(); ++j) { + outptr[j] = inptr[j] + outptr[j]; + } + } + } + } + } + } else { + DLOG << "2:"; + if (outvar->IsType()) { + DLOG << "3: "; + } + PADDLE_MOBILE_THROW_EXCEPTION( + "Unexpected branch, output variable type is %s", outvar->Type().name()); + } +} +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/elementwise_mul_kernel.h b/src/operators/kernel/elementwise_mul_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..d1e326c6c4e7830c11c387dca03da9858c9a37dd --- /dev/null +++ b/src/operators/kernel/elementwise_mul_kernel.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef ELEMENTWISEMUL_OP + +#pragma once + +#include "framework/operator.h" +#include "operators/math/elementwise_op_function.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +using namespace framework; + +template +class ElementwiseMulKernel + : public framework::OpKernelBase> { + public: + void Compute(const ElementwiseMulParam ¶m) const; + bool Init(ElementwiseMulParam *param); +}; +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/sum_kernel.h b/src/operators/kernel/sum_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..669db899b542a5231d685e098cf907e0b1b650ff --- /dev/null +++ b/src/operators/kernel/sum_kernel.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef SUM_OP + +#pragma once +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +using namespace framework; + +template +class SumKernel + : public framework::OpKernelBase> { + public: + void Compute(const SumParam ¶m) const; + bool Init(SumParam *param); +}; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/math/depthwise_conv_3x3.cpp b/src/operators/math/depthwise_conv_3x3.cpp index 91e11fa8ff0184e5321269167b5f4693de2245ac..b6cf28a9ca665a1496ee8032f87c013137deade8 100644 --- a/src/operators/math/depthwise_conv_3x3.cpp +++ b/src/operators/math/depthwise_conv_3x3.cpp @@ -1667,7 +1667,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, const int w_times = (out_w - 2) / 3; float32x4_t zero = vdupq_n_f32(0.0); for (int b = batch_size; b > 0; --b) { - #pragma omp parallel for +#pragma omp parallel for for (int j = 0; j < c; j++) { const float *input_row_ptr; float *output_row_ptr; @@ -1912,9 +1912,7 @@ void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter, float w20 = filter_data[6]; float w21 = filter_data[7]; float w22 = filter_data[8]; - float32x4_t biasv = vld1q_dup_f32(bias_data); - for (int i = 0; i < output_height; i += 1) { for (int m = 0; m < output_width - 2; m += 3) { float *output_ptr = output_data + i * output_width + m; @@ -1949,8 +1947,9 @@ void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter, out0 = vmlaq_n_f32(out0, in4, w20); out0 = vmlaq_n_f32(out0, tmp4, w21); out0 = vmlaq_n_f32(out0, tmp5, w22); - out0 = vaddq_f32(out0, biasv); - + if (if_bias) { + out0 = vaddq_f32(out0, biasv); + } vst1q_lane_f32(output_ptr, out0, 0); vst1q_lane_f32(output_ptr + 1, out0, 1); vst1q_lane_f32(output_ptr + 2, out0, 2); @@ -1960,16 +1959,18 @@ void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter, } for (int j = m; j < output_width; j++) { output_data[i * output_width + j] = - input_data[(2 * i - 1) * input_width + 2 * j - 1] * w00 + - input_data[(2 * i - 1) * input_width + 2 * j] * w01 + - input_data[(2 * i - 1) * input_width + 2 * j + 1] * w02 + - input_data[(2 * i) * input_width + 2 * j - 1] * w10 + - input_data[(2 * i) * input_width + 2 * j] * w11 + - input_data[(2 * i) * input_width + 2 * j + 1] * w12 + - input_data[(2 * i + 1) * input_width + 2 * j - 1] * w20 + - input_data[(2 * i + 1) * input_width + 2 * j] * w21 + - input_data[(2 * i + 1) * input_width + 2 * j + 1] * w22; - output_data[i * output_width + j] += *bias_data; + input_data[(2 * i) * input_width + 2 * j] * w00 + + input_data[(2 * i) * input_width + 2 * j + 1] * w01 + + input_data[(2 * i) * input_width + 2 * j + 2] * w02 + + input_data[(2 * i + 1) * input_width + 2 * j] * w10 + + input_data[(2 * i + 1) * input_width + 2 * j + 1] * w11 + + input_data[(2 * i + 1) * input_width + 2 * j + 2] * w12 + + input_data[(2 * i + 2) * input_width + 2 * j] * w20 + + input_data[(2 * i + 2) * input_width + 2 * j + 1] * w21 + + input_data[(2 * i + 2) * input_width + 2 * j + 2] * w22; + if (if_bias) { + output_data[i * output_width + j] += *bias_data; + } } } } diff --git a/src/operators/math/selected_rows_functor.h b/src/operators/math/selected_rows_functor.h new file mode 100644 index 0000000000000000000000000000000000000000..8cf1f5ca395d111ecca90f802773703ecb3286c9 --- /dev/null +++ b/src/operators/math/selected_rows_functor.h @@ -0,0 +1,173 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "framework/selected_rows.h" + +#define INLINE_FOR2(sizei, sizej) \ + for (int64_t i = 0; i < sizei; i++) \ + for (int64_t j = 0; j < sizej; j++) + +namespace paddle_mobile { +namespace operators { +namespace math { + +// SelectedRows + SelectedRows will simplely concat value and rows. +// The real computation happens in dealing with LoDTensor. +// template +// struct SelectedRowsAdd { +// void operator()( +// const framework::SelectedRows& input1, +// const framework::SelectedRows& input2, +// framework::SelectedRows* output); +//}; +// +// template +// struct SelectedRowsAddTensor { +// void operator()( +// const framework::SelectedRows& input1, +// const framework::Tensor& input2, framework::Tensor* output); +//}; + +// input2 = input1 + input2 +template +struct SelectedRowsAddTo { + void operator()(const framework::SelectedRows& input1, + const int64_t input2_offset, + framework::SelectedRows* input2) { + auto in1_height = input1.height(); + PADDLE_MOBILE_ENFORCE(in1_height == input2->height()); + + auto& in1_rows = input1.rows(); + auto& in2_rows = *(input2->mutable_rows()); + + auto& in1_value = input1.value(); + auto* in2_value = input2->mutable_value(); + + // concat rows + in2_rows.Extend(in1_rows.begin(), in1_rows.end()); + + // auto in1_place = input1.place(); + // PADDLE_ENFORCE(platform::is_cpu_place(in1_place)); + // auto in2_place = input2->place(); + // PADDLE_ENFORCE(platform::is_cpu_place(in2_place)); + + auto* in1_data = in1_value.data(); + auto* in2_data = in2_value->data(); + memory::Copy(in2_data + input2_offset, in1_data, + in1_value.numel() * sizeof(T)); + } +}; + +// input2 = input1 + input2 +template +struct SelectedRowsAddToTensor { + void operator()(const framework::SelectedRows& input1, + framework::Tensor* input2) { + auto in1_height = input1.height(); + auto in2_dims = input2->dims(); + PADDLE_MOBILE_ENFORCE(in1_height == in2_dims[0]); + + auto& in1_value = input1.value(); + auto& in1_rows = input1.rows(); + + int64_t in1_row_numel = in1_value.numel() / in1_rows.size(); + PADDLE_MOBILE_ENFORCE(in1_row_numel == input2->numel() / in1_height); + + auto* in1_data = in1_value.data(); + auto* input2_data = input2->data(); + + for (size_t i = 0; i < in1_rows.size(); i++) { + for (int64_t j = 0; j < in1_row_numel; j++) { + input2_data[in1_rows[i] * in1_row_numel + j] += + in1_data[i * in1_row_numel + j]; + } + } + } +}; + +// namespace scatter { +//// functors for manuplating SelectedRows data +// template +// struct MergeAdd { +// // unary functor, merge by adding duplicated rows in +// // the input SelectedRows object. +// framework::SelectedRows operator()( +// const framework::SelectedRows& input); +//}; + +// template +// struct Add { +// framework::SelectedRows operator()( +// const framework::SelectedRows& input1, +// const framework::SelectedRows& input2) { +// framework::SelectedRows out; +// out.set_rows(input1.rows()); +// out.set_height(input1.height()); +// out.mutable_value()->mutable_data(input1.value().dims(), +// ); +// auto e_out = framework::EigenVector::Flatten(*(out.mutable_value())); +// auto e_in1 = framework::EigenVector::Flatten(input1.value()); +// auto e_in2 = framework::EigenVector::Flatten(input2.value()); +// e_out.device(*context.eigen_device()) = e_in1 + e_in2; +// return out; +// } +//}; + +// template +// struct Mul { +// // multiply two SelectedRows +// framework::SelectedRows operator()( +// const framework::SelectedRows& input1, +// const framework::SelectedRows& input2) { +// framework::SelectedRows out; +// out.set_rows(input1.rows()); +// out.set_height(input1.height()); +// out.mutable_value()->mutable_data(input1.value().dims() +// ); +// auto e_out = framework::EigenVector::Flatten(*(out.mutable_value())); +// auto e_in1 = framework::EigenVector::Flatten(input1.value()); +// auto e_in2 = framework::EigenVector::Flatten(input2.value()); +// e_out.device(*context.eigen_device()) = e_in1 * e_in2; +// return out; +// } +// // multiply scalar to SelectedRows +// framework::SelectedRows operator()( +// const framework::SelectedRows& input1, +// const T input2) { +// framework::SelectedRows out; +// out.set_rows(input1.rows()); +// out.set_height(input1.height()); +// out.mutable_value()->mutable_data(input1.value().dims(), +// ); +// auto e_out = framework::EigenVector::Flatten(*(out.mutable_value())); +// auto e_in1 = framework::EigenVector::Flatten(input1.value()); +// e_out.device(*context.eigen_device()) = input2 * e_in1; +// return out; +// } +//}; + +enum class ScatterOps { ASSIGN, ADD, SUB, SUBBY, MUL, DIV, DIVBY }; + +// out = seleted_rows_in / tensor +template +struct UpdateToTensor { + void operator()(const ScatterOps& op, const framework::SelectedRows& input1, + framework::Tensor* input2); +}; + +// namespace scatter +} // namespace math +} // namespace operators +} // namespace paddle_mobile diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 1c707f960d7cfd3cbecb1146f08e6a4291da4a0b..27ab4629f011ba25390961b2679fd8f86d213fc3 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -35,6 +35,7 @@ using framework::AttributeMap; using framework::LoDTensor; using framework::Scope; using framework::Tensor; +using framework::Variable; using std::string; using std::vector; @@ -182,6 +183,11 @@ class OpParam { return GetMultiVarValue("X", inputs, scope); } + static vector InputMultiVarsFrom(const VariableNameMap &inputs, + const Scope &scope) { + return GetMultiVar("X", inputs, scope); + } + template static T *OutputBatchGateFrom(const VariableNameMap &outputs, const Scope &scope) { @@ -216,6 +222,11 @@ class OpParam { return GetVarValue("Output", outputs, scope); } + static Variable *OutVarFrom(const VariableNameMap &outputs, + const Scope &scope) { + return GetVar("Out", outputs, scope); + } + template static T *OutFrom(const VariableNameMap &outputs, const Scope &scope) { return GetVarValue("Out", outputs, scope); @@ -286,6 +297,19 @@ class OpParam { } } + static Variable *GetVar(const string &key, const VariableNameMap &var_map, + const Scope &scope) { + PADDLE_MOBILE_ENFORCE(var_map.count(key) > 0, + "%s is not contained in var_map", key.c_str()) + auto var_vec = var_map.at(key); + if (!var_vec.empty()) { + auto var = scope.FindVar(var_vec[0]); + return var; + } else { + return nullptr; + } + } + static std::string getkey(const string &key, const VariableNameMap &var_map, int index) { auto var_vec = var_map.at(key); @@ -319,6 +343,19 @@ class OpParam { } return var_res; } + + static vector GetMultiVar(const string &key, + const VariableNameMap &var_map, + const Scope &scope) { + auto var_vecs = var_map.at(key); + assert(var_vecs.size() > 1); + vector var_res; + for (auto &var_vec : var_vecs) { + auto var = scope.FindVar(var_vec); + var_res.push_back(var); + } + return var_res; + } }; template @@ -405,6 +442,47 @@ class ElementwiseAddParam : OpParam { #endif }; +#ifdef ELEMENTWISEMUL_OP +template +class ElementwiseMulParam : OpParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; + + public: + ElementwiseMulParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs, + const Scope &scope) { + input_x_ = InputXFrom(inputs, scope); + input_y_ = InputYFrom(inputs, scope); + out_ = OutFrom(outputs, scope); + axis_ = GetAttr("axis", attrs); + } + + const GType *InputX() const { return input_x_; } + + const GType *InputY() const { return input_y_; } + + GType *Out() const { return out_; } + + const int &Axis() const { return axis_; } + + private: + GType *input_x_; + GType *input_y_; + GType *out_; + int axis_; +#ifdef PADDLE_MOBILE_FPGA + + private: + fpga::EWMulArgs fpga_EW_mul_args; + + public: + const fpga::EWMulArgs &FpgaArgs() const { return fpga_EW_mul_args; } + void SetFpgaArgs(const fpga::EWMulArgs &args) { fpga_EW_mul_args = args; } +#endif +}; +#endif + #ifdef FUSION_ELEMENTWISEADDRELU_OP template using ElementwiseAddReluParam = ElementwiseAddParam; @@ -490,6 +568,46 @@ class ConcatParam : public OpParam { }; #endif +#ifdef SUM_OP +template +class SumParam : public OpParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; + + public: + SumParam(const VariableNameMap &inputs, const VariableNameMap &outputs, + const AttributeMap &attrs, const Scope &scope) { + inputs_vars_ = InputMultiVarsFrom(inputs, scope); + out_var_ = OutVarFrom(outputs, scope); + inputs_ = InputMultiFrom(inputs, scope); + out_ = OutFrom(outputs, scope); + } + + vector InputsVars() const { return inputs_vars_; } + + Variable *OutVar() const { return out_var_; } + + vector Inputs() const { return inputs_; } + + GType *Out() const { return out_; } + + private: + vector inputs_vars_; + Variable *out_var_; + vector inputs_; + GType *out_; +#ifdef PADDLE_MOBILE_FPGA + + private: + fpga::SumArgs fpga_sum_args; + + public: + const fpga::SumArgs &FpgaArgs() const { return fpga_sum_args; } + void SetFpgaArgs(const fpga::SumArgs &args) { fpga_sum_args = args; } +#endif +}; +#endif + #ifdef LRN_OP template class LrnParam : public OpParam { diff --git a/src/operators/sum_op.cpp b/src/operators/sum_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8c0638c63ca7cab01047b757476549cf3832bf8a --- /dev/null +++ b/src/operators/sum_op.cpp @@ -0,0 +1,71 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef SUM_OP + +#include + +#include "operators/sum_op.h" + +namespace paddle_mobile { +namespace operators { + +template +void SumOp::InferShape() const { + auto inputs = this->param_.Inputs(); + const size_t n = inputs.size(); + + std::vector inputs_dims; + inputs_dims.reserve(n); + for (int i = 0; i < n; i++) { + inputs_dims.push_back(inputs[i]->dims()); + } + + if (n == 1) { + DLOG << "Warning: sum op have only one input, " + "may waste memory"; + } + + framework::DDim in_dim({0}); + + for (auto& x_dim : inputs_dims) { + if (framework::product(x_dim) == 0) { + continue; + } + if (framework::product(in_dim) == 0) { + in_dim = x_dim; + } else { + PADDLE_MOBILE_ENFORCE(in_dim == x_dim, + "input tensors must have same shape"); + } + } + + this->param_.Out()->Resize(in_dim); +} + +} // namespace operators +} // namespace paddle_mobile + +namespace ops = paddle_mobile::operators; +#ifdef PADDLE_MOBILE_CPU +REGISTER_OPERATOR_CPU(sum, ops::SumOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +REGISTER_OPERATOR_MALI_GPU(sum, ops::ConcatOp); +#endif +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(sum, ops::ConcatOp); +#endif + +#endif diff --git a/src/operators/sum_op.h b/src/operators/sum_op.h new file mode 100644 index 0000000000000000000000000000000000000000..aad8e8322b60d0e931215c9d48d97862f9b14107 --- /dev/null +++ b/src/operators/sum_op.h @@ -0,0 +1,49 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef SUM_OP + +#pragma once + +#include +#include "framework/operator.h" +#include "operators/kernel/sum_kernel.h" +#include "operators/op_param.h" +namespace paddle_mobile { +namespace operators { +using std::string; +template +class SumOp : public framework::OperatorWithKernel< + DeviceType, SumParam, + operators::SumKernel> { + public: + SumOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, const framework::AttributeMap &attrs, + std::shared_ptr scope) + : framework::OperatorWithKernel, + operators::SumKernel>( + type, inputs, outputs, attrs, scope) {} + + using framework::OperatorWithKernel< + DeviceType, SumParam, + operators::SumKernel>::OperatorWithKernel; + void InferShape() const override; + + protected: +}; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 00bea4296305cd4cc3b2aca094f227fda5bd16fc..d258d20dcc037abc2754316a1d337288d55aa067 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -212,6 +212,10 @@ if (NOT FOUND_MATCH) ADD_EXECUTABLE(test-fc-op operators/test_fusion_fc_op.cpp test_helper.h test_include.h) target_link_libraries(test-fc-op paddle-mobile) + # gen test + ADD_EXECUTABLE(test-sum-op operators/test_sum_op.cpp test_helper.h test_include.h) + target_link_libraries(test-sum-op paddle-mobile) + # test quantize op ADD_EXECUTABLE(test-quantize-op operators/test_quantize_op.cpp test_helper.h test_include.h) target_link_libraries(test-quantize-op paddle-mobile) diff --git a/test/executor_for_test.h b/test/executor_for_test.h index 93847af20a6d48a6df33dc50f6c6a1db76facf51..60f1856bb9294c6f9b4bd5cfb7d44f984c6f0794 100644 --- a/test/executor_for_test.h +++ b/test/executor_for_test.h @@ -43,7 +43,7 @@ template class Executor4Test : public Executor { public: Executor4Test(Program p, string op_type, - bool use_optimize = false, int predict_op_count = 1) + bool use_optimize = false) : Executor() { this->use_optimize_ = use_optimize; this->program_ = p; @@ -64,7 +64,7 @@ class Executor4Test : public Executor { std::vector> ops = block_desc->Ops(); for (int i = 0; i < ops.size(); ++i) { auto op = ops[i]; - if (op->Type() == op_type && i < predict_op_count) { + if (op->Type() == op_type) { DLOG << "匹配到: " << op->Type(); /// test first meeting op in program @@ -74,6 +74,7 @@ class Executor4Test : public Executor { op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(), this->program_.scope); this->ops_of_block_[*block_desc.get()].push_back(op_ptr); + break; } } } diff --git a/test/operators/test_sum_op.cpp b/test/operators/test_sum_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e51d1cff5e99c5d9c444db046e78eee6a03f9243 --- /dev/null +++ b/test/operators/test_sum_op.cpp @@ -0,0 +1,133 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "../test_helper.h" +#include "../test_include.h" +#include "operators/sum_op.h" + +namespace paddle_mobile { +namespace framework { + +template +class TestSumOp { + public: + explicit TestSumOp(const Program p) : program_(p) { + if (use_optimize_) { + to_predict_program_ = program_.optimizeProgram; + } else { + to_predict_program_ = program_.originProgram; + } + + const std::vector> blocks = + to_predict_program_->Blocks(); + // DLOG << " **block size " << blocks.size(); + for (int i = 0; i < blocks.size(); ++i) { + std::shared_ptr block_desc = blocks[i]; + std::vector> ops = block_desc->Ops(); + // DLOG << " ops " << ops.size(); + for (int j = 0; j < ops.size(); ++j) { + std::shared_ptr op = ops[j]; + if (op->Type() == "sum" && op->Input("X")[0] == "fc_2.tmp_0") { + DLOG << " sum attr size: " << op->GetAttrMap().size(); + DLOG << " inputs size: " << op->GetInputs().size(); + DLOG << " outputs size: " << op->GetOutputs().size(); + + std::shared_ptr> lrn = + std::make_shared>( + op->Type(), op->GetInputs(), op->GetOutputs(), + op->GetAttrMap(), program_.scope); + ops_of_block_[*block_desc.get()].push_back(lrn); + } + } + } + } + + std::shared_ptr predict_bn(const Tensor &t1, const Tensor &t2) { + // feed + auto scope = program_.scope; + Variable *x1_feed_value = scope->Var("fc_2.tmp_0"); + auto tensor_x1 = x1_feed_value->GetMutable(); + tensor_x1->ShareDataWith(t1); + + Variable *x2_feed_value = scope->Var("fc_2.tmp_1"); + auto tensor_x2 = x2_feed_value->GetMutable(); + tensor_x2->ShareDataWith(t2); + + Variable *output = scope->Var("fc_2.tmp_2"); + auto *output_tensor = output->GetMutable(); + output_tensor->mutable_data({2, 96}); + // DLOG << typeid(output_tensor).name(); + // DLOG << "output_tensor dims: " << output_tensor->dims(); + + std::shared_ptr out_tensor = std::make_shared(); + out_tensor.reset(output_tensor); + + predict_bn(t1, t2, 0); + return out_tensor; + } + + private: + const framework::Program program_; + std::shared_ptr to_predict_program_; + std::map>>> + ops_of_block_; + bool use_optimize_ = false; + + void predict_bn(const Tensor &t1, const Tensor &t2, int block_id) { + std::shared_ptr to_predict_block = + to_predict_program_->Block(block_id); + for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) { + auto op = ops_of_block_[*to_predict_block.get()][j]; + DLOG << "op -> run()"; + op->Run(); + } + } +}; + +template class TestSumOp; +} // namespace framework +} // namespace paddle_mobile + +int main() { + DLOG << "----------**********----------"; + DLOG << "begin to run Sum Test"; + paddle_mobile::Loader loader; + auto program = loader.Load(std::string(g_eng) + "/model", + std::string(g_eng) + "/params"); + + /// input x (4,10,2,2) + paddle_mobile::framework::Tensor inputx1; + SetupTensor(&inputx1, {2, 96}, static_cast(0), + static_cast(1)); + auto *inputx1_ptr = inputx1.data(); + + paddle_mobile::framework::Tensor inputx2; + SetupTensor(&inputx2, {2, 96}, static_cast(0), + static_cast(1)); + auto *inputx2_ptr = inputx2.data(); + + paddle_mobile::framework::TestSumOp testSumOp(program); + + auto output_sum = testSumOp.predict_bn(inputx1, inputx2); + auto *output_sum_ptr = output_sum->data(); + + DLOG << "input1 44: " << inputx1_ptr[44]; + DLOG << "input2 44: " << inputx2_ptr[44]; + DLOG << "out 44 :" << output_sum_ptr[44]; + + return 0; +} diff --git a/test/test_helper.h b/test/test_helper.h index ecbc251a815e343f75b1247ffc430e9c52d6abfd..03ee27d71d58eb5c727172a8112aeedfde244d0f 100644 --- a/test/test_helper.h +++ b/test/test_helper.h @@ -27,6 +27,7 @@ limitations under the License. */ static const char *g_ocr = "../models/ocr"; static const char *g_mobilenet_ssd = "../models/mobilenet+ssd"; static const char *g_genet_combine = "../models/enet"; +static const char *g_eng = "../models/eng_20conv_1_9_fc"; static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture"; static const char *g_mobilenet_combined = "../models/mobilenet_combine"; static const char *g_googlenetv1_combined = "../models/googlenetv1_combine"; @@ -51,6 +52,7 @@ static const char *g_test_image_1x3x224x224_banana = static const char *g_test_image_desktop_1_3_416_416_nchw_float = "../images/in_put_1_3_416_416_2"; static const char *g_hand = "../images/hand_image"; +static const char *g_moto = "../images/moto_300x300_float"; static const char *g_imgfssd_ar = "../images/test_image_ssd_ar"; static const char *g_imgfssd_ar1 = "../images/003_0001.txt"; static const char *g_img = "../images/img.bin"; diff --git a/tools/op.cmake b/tools/op.cmake index 898f66a634d70a5def7c7ce328a7a291d9b55c70..4795568b8e64549d3d21fd5546ff2eec15a05012 100644 --- a/tools/op.cmake +++ b/tools/op.cmake @@ -33,6 +33,7 @@ if (CON GREATER -1) set(POOL_OP ON) set(RESHAPE_OP ON) set(FUSION_CONVADDBNRELU_OP ON) + set(FUSION_CONVADDRELU_OP ON) set(FUSION_CONVADD_OP ON) set(FOUND_MATCH ON) @@ -220,6 +221,8 @@ if(NOT FOUND_MATCH) set(SPLIT_OP ON) set(FLATTEN_OP ON) set(SHAPE_OP ON) + set(ELEMENTWISEMUL_OP ON) + set(SUM_OP ON) endif() # option(BATCHNORM_OP "" ON) @@ -388,3 +391,11 @@ endif() if (SHAPE_OP) add_definitions(-DSHAPE_OP) endif() + +if (ELEMENTWISEMUL_OP) + add_definitions(-DELEMENTWISEMUL_OP) +endif() +if (SUM_OP) + add_definitions(-DSUM_OP) +endif() + diff --git a/python/tools/imagetools/imagetools.py b/tools/python/imagetools/imagetools.py similarity index 100% rename from python/tools/imagetools/imagetools.py rename to tools/python/imagetools/imagetools.py diff --git a/python/tools/imagetools/img2nchw.py b/tools/python/imagetools/img2nchw.py similarity index 86% rename from python/tools/imagetools/img2nchw.py rename to tools/python/imagetools/img2nchw.py index 70ca456a1b1b5d20b92d0aaa51b01abb352c1d54..b38c9808059e08b089303208063184bb956667c1 100644 --- a/python/tools/imagetools/img2nchw.py +++ b/tools/python/imagetools/img2nchw.py @@ -45,13 +45,13 @@ def combine_bgrs_nchw(bgrs, means_b_g_r, scale, channel_type=ChannelType.BGR): print '------------------' print bgrs_float_array[0] - print bgrs_float_array[416 * 416 * 2 + 416 * 2 + 2] + print bgrs_float_array[224 * 224 * 2 + 224 * 2 + 2] # for i in range(0, 9): # print'bs %d' % i # print bs[i] / 255. - print bs[416 * 2 + 2] / 255. + print bs[224 * 2 + 2] / 255. print '--------------combine_bgrs_nchw-----------------end' return bgrs_float_array @@ -64,6 +64,6 @@ def combine_bgrs_nchw(bgrs, means_b_g_r, scale, channel_type=ChannelType.BGR): # cv2.waitKey(0) -bgrs = tools.resize_take_rgbs('datas/newyolo.jpg', (416, 416, 3)) +bgrs = tools.resize_take_rgbs('datas/jpgs/0000_0.9834-148196_82452-0ad4b83ec6bc0f9c5f28101539267054.jpg_p0_0.126571263346.jpg', (224, 224, 3)) array = combine_bgrs_nchw(bgrs, (0, 0, 0), 1. / 255, ChannelType.RGB) -tools.save_to_file('datas/desktop_1_3_416_416_nchw_float', array) +tools.save_to_file('datas/desktop_1_3_224_224_nchw_float', array) diff --git a/python/tools/imagetools/img2nhwc.py b/tools/python/imagetools/img2nhwc.py similarity index 100% rename from python/tools/imagetools/img2nhwc.py rename to tools/python/imagetools/img2nhwc.py diff --git a/python/tools/imagetools/numpy2binary.py b/tools/python/imagetools/numpy2binary.py similarity index 58% rename from python/tools/imagetools/numpy2binary.py rename to tools/python/imagetools/numpy2binary.py index dd4bc6e10074183b8dcee4122860c4140ff54229..87f0fda76666225256e7a80ddf3a5b0cda8ad12f 100644 --- a/python/tools/imagetools/numpy2binary.py +++ b/tools/python/imagetools/numpy2binary.py @@ -15,11 +15,11 @@ from array import array # image.resize(shape_h_w) -data = np.fromfile('datas/img.res') +data = np.fromfile('/Users/xiebaiyuan/PaddleProject/paddle-mobile/tools/python/imagetools/datas/jpgs2/0000_0.9834-148196_82452-0ad4b83ec6bc0f9c5f28101539267054.jpg_p0_0.126571263346.jpg.input.npfile','f') print data.size -print data[0] +print data -data.reshape(1, 3, 416, 416) +data.reshape(1, 3, 224, 224) out_array = array('f') print'--------------------' print data.size @@ -27,12 +27,12 @@ print data[0] print '如果是nhwc --------' # rgb rgb rgb rgb rgb -print data[416 * 3 * 2 + 3 * 2 + 2] +print data[224 * 3 * 2 + 3 * 2 + 2] # print data[2] print '如果是nchw --------' # rgb rgb rgb rgb rgb -print data[416 * 416 * 2 + 416 * 2 + 2] +print data[224 * 224 * 2 + 224 * 2 + 2] # print data[2] # 明明是nchw @@ -42,6 +42,8 @@ for i in range(0, data.size): print len(out_array) -print out_array[416 * 416 * 2 + 416 * 2 + 2] +print out_array[224 * 224 * 2 + 224 * 2 + 2] + +# print out_array -tools.save_to_file('datas/in_put_1_3_416_416_2', out_array) +tools.save_to_file('datas/in_put_1_3_224_224_nchw', out_array) diff --git a/tools/python/modeltools/.gitignore b/tools/python/modeltools/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..4108f5244bc039cb95b06e391d51250bb9d0ce42 --- /dev/null +++ b/tools/python/modeltools/.gitignore @@ -0,0 +1,109 @@ +# Created by .ignore support plugin (hsz.mobi) +### Python template +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +/yolo/datas/ +/mobilenet/datas/ diff --git a/tools/python/modeltools/core/__init__.py b/tools/python/modeltools/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/python/tools/mdl2fluid/framework.proto b/tools/python/modeltools/core/framework.proto similarity index 100% rename from python/tools/mdl2fluid/framework.proto rename to tools/python/modeltools/core/framework.proto diff --git a/python/tools/mdl2fluid/framework_pb2.py b/tools/python/modeltools/core/framework_pb2.py similarity index 100% rename from python/tools/mdl2fluid/framework_pb2.py rename to tools/python/modeltools/core/framework_pb2.py diff --git a/python/tools/mdl2fluid/op_types.py b/tools/python/modeltools/core/op_types.py similarity index 59% rename from python/tools/mdl2fluid/op_types.py rename to tools/python/modeltools/core/op_types.py index ff7d78d20835c605dc581ef14ad2d7d5171fea1d..550f87339c9a048a3732daa7707dd6427965029a 100644 --- a/python/tools/mdl2fluid/op_types.py +++ b/tools/python/modeltools/core/op_types.py @@ -5,22 +5,28 @@ layer_mdl_conv = 'ConvolutionLayer' layer_mdl_deepwise_conv = 'DepthwiseConvolutionLayer' layer_mdl_relu = 'ReluLayer' layer_mdl_pointwise_add = 'PointwiseConvolutionLayer' +layer_mdl_pooling = 'PoolingLayer' +layer_mdl_softmax = 'SoftmaxLayer' # fluid ops op_fluid_fusion_conv_add = 'fusion_conv_add' op_fluid_relu = 'relu' +op_fluid_pooling = 'pool2d' +op_fluid_softmax = 'softmax' # dict mdk layer --- fluid op mdl2fluid_op_layer_dict = { layer_mdl_conv: op_fluid_fusion_conv_add, layer_mdl_deepwise_conv: op_fluid_fusion_conv_add, layer_mdl_relu: op_fluid_relu, - layer_mdl_pointwise_add: op_fluid_fusion_conv_add + layer_mdl_pointwise_add: op_fluid_fusion_conv_add, + layer_mdl_pooling: op_fluid_pooling, + layer_mdl_softmax: op_fluid_softmax } mdl_outputs_key = "outputs" mdl_inputs_key = "inputs" -mdl_weight_key = "weights" +mdl_weight_key = "weight" mdl_attrs_key = "params" # dict of mdl-input _out param to fluid input out attrs @@ -39,13 +45,30 @@ fusion_conv_add_dict = { relu_dict = { mdl_inputs_key: 'X', mdl_outputs_key: 'Out', - mdl_weight_key: () + # mdl_weight_key: () } + +pool2d_dict = { + mdl_inputs_key: 'X', + mdl_outputs_key: 'Out', + # mdl_weight_key: (), + mdl_attrs_key: ('pooling_type', 'global_pooling') + +} + +softmax_dict = { + mdl_inputs_key: 'X', + mdl_outputs_key: 'Out', + mdl_weight_key: (), + mdl_attrs_key: () +} # mdl layers --- fluid ops op_io_dict = { 'fusion_conv_add': fusion_conv_add_dict, - 'relu': relu_dict + 'relu': relu_dict, + 'pool2d': pool2d_dict, + 'softmax': softmax_dict } # fluid attr key --- mdl params key @@ -54,70 +77,17 @@ fusion_conv_add_attrs_dict = { 'strides': 'stride', 'groups': 'group' } + +# fluid attr key --- mdl params key +pool2d_attrs_dict = { + 'global_pooling': 'global_pooling', + 'pooling_type': 'type' +} + + # fluid attr key --- mdl params key fluid_attrs_type_dict = { 'paddings': 0, 'strides': 6, 'groups': 6 } - -# '': "bias_term", 是不是要add 目前 yolo的模型都是 bias_term = 1 - - -# attrs { -# name: "axis" -# type: INT -# i: 1 -# } - - -# attrs_name = { -# 'name': "workspace_size_MB", -# 'type': 'INT', -# 'i': '4096' -# } -# attrs -# { -# name: "data_format" -# type: STRING -# s: "AnyLayout" -# } -# attrs -# { -# name: "use_mkldnn" -# type: BOOLEAN -# b: false -# } -# attrs -# { -# name: "use_cudnn" -# type: BOOLEAN -# b: true -# } -# attrs -# { -# name: "dilations" -# type: INTS -# ints: 1 -# ints: 1 -# } -# attrs -# { -# name: "groups" -# type: INT -# i: 1 -# } -# attrs -# { -# name: "paddings" -# type: INTS -# ints: 0 -# ints: 0 -# } -# attrs -# { -# name: "strides" -# type: INTS -# ints: 1 -# ints: 1 -# } diff --git a/tools/python/modeltools/mobilenet/__init__.py b/tools/python/modeltools/mobilenet/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tools/python/modeltools/mobilenet/converter_mobilenet.py b/tools/python/modeltools/mobilenet/converter_mobilenet.py new file mode 100644 index 0000000000000000000000000000000000000000..ca1e1f7f4d83cf219e1e74603bb23a15c34cfb36 --- /dev/null +++ b/tools/python/modeltools/mobilenet/converter_mobilenet.py @@ -0,0 +1,509 @@ +# coding=utf-8 +import json +import os + +from core import framework_pb2 as framework_pb2, op_types as types +from mobilenet.swicher import Swichter +import shutil + + +def load_mdl(mdl_json_path): + # print('mdl json path : ' + mdl_json_path) + with open(mdl_json_path, 'r') as f: + return json.load(f) + + +def create_if_not_exit(target_dir): + if os.path.exists(target_dir): + shutil.rmtree(target_dir) + os.makedirs(target_dir, 0777) + + +class Converter: + 'convert mdlmodel to fluidmodel' + + def __init__(self, base_dir, mdl_json_path): + print 'base_dir: ' + base_dir + self.mdl_json_path = base_dir + mdl_json_path + self.base_dir = base_dir + print mdl_json_path + self.source_weights_dir = self.base_dir + 'datas/sourcemodels/source_weights/' + self.target_weight_dir = self.base_dir + 'datas/target/target_weights/' + + create_if_not_exit(self.target_weight_dir) + + self.mdl_json = load_mdl(self.mdl_json_path) + self.program_desc = framework_pb2.ProgramDesc() + self.weight_list_ = [] + self.deepwise_weight_list_ = [] + # print(json_dick) + # layers = (json_dick['layer']) + # for layer in layers: + # print(layer) + + def convert(self): + print 'convert begin.....' + # add block_desc + block_desc = self.program_desc.blocks.add() + block_desc.idx = 0 + block_desc.parent_idx = -1 + self.package_ops(block_desc) + self.package_vars(block_desc) + print 'blocks: ' + print self.program_desc.blocks + print 'convert end.....' + desc_serialize_to_string = self.program_desc.SerializeToString() + + outputmodel_dir = self.base_dir + 'datas/target/mobilenet_classfication/' + if os.path.exists(outputmodel_dir): + shutil.rmtree(outputmodel_dir) + os.makedirs(outputmodel_dir, 0777) + + if os.path.exists(outputmodel_dir): + shutil.rmtree(outputmodel_dir) + # create_if_not_exit(outputmodel_dir) + + shutil.copytree(self.target_weight_dir, outputmodel_dir) + + f = open(outputmodel_dir + "__model__", "wb") + f.write(desc_serialize_to_string) + f.close() + + def package_ops(self, block_desc): + + self.add_op_feed(block_desc) + + # add ops with layer + if 'layer' in self.mdl_json: + + layers_ = self.mdl_json['layer'] + for layer in layers_: + + if layer['type'] == 'SoftmaxLayer': + pass + else: + desc_ops_add = block_desc.ops.add() + + # print layer + # for i in layer: + # print i + if 'name' in layer: + l_name = layer['name'] + if 'type' in layer: + self.package_ops_type(desc_ops_add, layer) + + if 'weight' in layer: + self.package_ops_weight2inputs(desc_ops_add, layer) + + if 'output' in layer: + self.package_ops_outputs(desc_ops_add, layer) + + if 'input' in layer: + self.package_ops_inputs(desc_ops_add, layer) + + self.package_ops_attrs(desc_ops_add, layer) + + self.add_op_fetch(block_desc) + + def add_op_feed(self, block_desc): + desc_ops_add = block_desc.ops.add() + inputs_add = desc_ops_add.inputs.add() + inputs_add.parameter = 'X' + inputs_add.arguments.append('feed') + desc_ops_add.type = 'feed' + outputs_add = desc_ops_add.outputs.add() + outputs_add.parameter = 'Out' + outputs_add.arguments.append('data') + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'col' + # boolean + attrs_add.type = 0 + attrs_add.i = 0 + + def add_op_fetch(self, block_desc): + desc_ops_add = block_desc.ops.add() + inputs_add = desc_ops_add.inputs.add() + inputs_add.parameter = 'X' + # todo pick last layer --> op output + inputs_add.arguments.append('fc7') + desc_ops_add.type = 'fetch' + outputs_add = desc_ops_add.outputs.add() + outputs_add.parameter = 'Out' + outputs_add.arguments.append('fetch') + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'col' + # boolean + attrs_add.type = 0 + attrs_add.i = 0 + + @staticmethod + def package_ops_attrs(desc_ops_add, layer): + # print l_params + # print desc_ops_add.type + if desc_ops_add.type == types.op_fluid_fusion_conv_add: + Converter.pack_fusion_conv_add_attr(desc_ops_add, layer) + elif desc_ops_add.type == types.op_fluid_relu: + # fusion_conv_add : attrs + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'use_mkldnn' + # boolean + attrs_add.type = 6 + attrs_add.b = 0 + elif desc_ops_add.type == types.op_fluid_pooling: + Converter.pack_pooling_attr(desc_ops_add, layer) + pass + elif desc_ops_add.type == types.op_fluid_softmax: + pass + + @staticmethod + def pack_pooling_attr(desc_ops_add, layer): + print layer + l_params = layer['param'] + + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'use_mkldnn' + # boolean + attrs_add.type = 6 + attrs_add.b = 0 + + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'use_cudnn' + # boolean + attrs_add.type = 6 + attrs_add.b = 1 + + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'paddings' + # ints + attrs_add.type = 3 + attrs_add.ints.append(0) + attrs_add.ints.append(0) + + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'strides' + # ints + attrs_add.type = 3 + attrs_add.ints.append(1) + attrs_add.ints.append(1) + + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'global_pooling' + # boolean + attrs_add.type = 6 + attrs_add.b = (l_params[types.pool2d_attrs_dict.get('global_pooling')]) + + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'pooling_type' + # 2-->STRING + attrs_add.type = 2 + # 注意这里 avg but mdl is ave + attrs_add.s = l_params[types.pool2d_attrs_dict.get('pooling_type')] + + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'ceil_mode' + # boolean + attrs_add.type = 6 + attrs_add.b = 1 + + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'ksize' + # ints + attrs_add.type = 3 + attrs_add.ints.append(7) + attrs_add.ints.append(7) + + # type: "pool2d" + # attrs + # { + # name: "use_mkldnn" + # type: BOOLEAN + # b: false + # } + # attrs + # { + # name: "ceil_mode" + # type: BOOLEAN + # b: true + # } + # attrs + # { + # name: "use_cudnn" + # type: BOOLEAN + # b: true + # } + # attrs + # { + # name: "paddings" + # type: INTS + # ints: 0 + # ints: 0 + # } + # attrs + # { + # name: "strides" + # type: INTS + # ints: 1 + # ints: 1 + # } + # attrs + # { + # name: "global_pooling" + # type: BOOLEAN + # b: false + # } + # attrs + # { + # name: "data_format" + # type: STRING + # s: "AnyLayout" + # } + # attrs + # { + # name: "ksize" + # type: INTS + # ints: 7 + # ints: 7 + # } + # attrs + # { + # name: "pooling_type" + # type: STRING + # s: "avg" + # } + # is_target: false + + @staticmethod + def pack_fusion_conv_add_attr(desc_ops_add, layer): + + # fusion_conv_add : attrs + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'workspace_size_MB' + # 0-->INT + attrs_add.type = 0 + attrs_add.i = 4096 + + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'data_format' + # 2-->STRING + attrs_add.type = 2 + attrs_add.s = 'AnyLayout' + + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'use_mkldnn' + # boolean + attrs_add.type = 6 + attrs_add.b = 0 + + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'use_cudnn' + # boolean + attrs_add.type = 6 + attrs_add.b = 1 + + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'dilations' + # ints + attrs_add.type = 3 + attrs_add.ints.append(1) + attrs_add.ints.append(1) + + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'axis' + # int + attrs_add.type = 0 + attrs_add.i = 1 + + if 'param' in layer: + l_params = layer['param'] + + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'paddings' + # ints + attrs_add.type = 3 + attrs_add.ints.append(l_params[types.fusion_conv_add_attrs_dict.get('paddings')]) + attrs_add.ints.append(l_params[types.fusion_conv_add_attrs_dict.get('paddings')]) + + # attrs_add = desc_ops_add.attrs.add() + # attrs_add.name = 'paddings' + # # ints + # attrs_add.type = 3 + # attrs_add.ints.append(0) + # attrs_add.ints.append(0) + + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'strides' + # ints + attrs_add.type = 3 + attrs_add.ints.append(l_params[types.fusion_conv_add_attrs_dict.get('strides')]) + attrs_add.ints.append(l_params[types.fusion_conv_add_attrs_dict.get('strides')]) + + # attrs_add = desc_ops_add.attrs.add() + # attrs_add.name = 'strides' + # # ints + # attrs_add.type = 3 + # attrs_add.ints.append(6) + # attrs_add.ints.append(6) + + attrs_add = desc_ops_add.attrs.add() + attrs_add.name = 'groups' + # int + attrs_add.type = 0 + attrs_add.i = l_params[types.fusion_conv_add_attrs_dict.get('groups')] + # attrs_add.i = 1 + + # + # op_attrs_tupl = types.op_io_dict.get(desc_ops_add.type) \ + # .get(types.mdl_attrs_key) + # + # + # + # + # # group stride padding + # print '----------------------' + # for i, val in enumerate(op_attrs_tupl): + # attrs_add = desc_ops_add.attrs.add() + # attr_name = op_attrs_tupl[i] + # print attr_name + # attrs_add.name = attr_name + # attrs_add.type = types.fluid_attrs_type_dict.get(attr_name) + # attrs_add. + # print l_params[types.fusion_conv_add_attrs_dict.get(attr_name)] + + # for p in l_params: + # attrs_add = desc_ops_add.attrs.add() + + @staticmethod + def package_ops_inputs(desc_ops_add, layer): + l_inputs = layer['input'] + for i in l_inputs: + inputs_add = desc_ops_add.inputs.add() + # print i + inputs_add.parameter = types.op_io_dict.get(desc_ops_add.type).get(types.mdl_inputs_key) + inputs_add.arguments.append(i) + + @staticmethod + def package_ops_outputs(desc_ops_add, layer): + l_outputs = layer['output'] + for o in l_outputs: + # print o + outputs_add = desc_ops_add.outputs.add() + dict = types.op_io_dict.get(desc_ops_add.type) + # print 'desc_ops_add.type: ' + desc_ops_add.type + # print dict + outputs_add.parameter = dict.get(types.mdl_outputs_key) + outputs_add.arguments.append(o) + + def package_ops_weight2inputs(self, desc_ops_add, layer): + l_weights = layer['weight'] + for w in l_weights: + self.weight_list_.append(w) + + if layer['type'] == types.layer_mdl_deepwise_conv: + # print l_weights[0] + self.deepwise_weight_list_.append(l_weights[0]) + + op_weight_tup = types.op_io_dict.get(desc_ops_add.type).get(types.mdl_weight_key) + if op_weight_tup is not None: + # print len(op_weight_tup) + for i, val in enumerate(op_weight_tup): + # print i + # print val + inputs_add = desc_ops_add.inputs.add() + inputs_add.parameter = op_weight_tup[i] + inputs_add.arguments.append(l_weights[i]) + + # for w in l_weights: + # inputs_add = desc_ops_add.inputs.add() + # # print w + # inputs_add.parameter = op_weight_tup[0] + # inputs_add.arguments.append(w) + + @staticmethod + def package_ops_type(desc_ops_add, layer): + l_type = layer['type'] + # print l_type + # print mdl2fluid_op_layer_dict.get(l_type) + desc_ops_add.type = types.mdl2fluid_op_layer_dict.get(l_type) + + def package_vars(self, block_desc): + vars_add = block_desc.vars.add() + vars_add.name = 'feed' + vars_add.type.type = 9 # 9 is FEED_MINIBATCH + vars_add.persistable = 1 + # fetch + vars_add = block_desc.vars.add() + vars_add.name = 'fetch' + vars_add.type.type = 10 # 10 is fetch list + vars_add.persistable = 1 + + json_matrix_ = self.mdl_json['matrix'] + # print json_matrix_ + for j in json_matrix_: + vars_add = block_desc.vars.add() + vars_add.name = j + vars_add.type.type = 7 # 7 is lodtensor + # print j + tensor = vars_add.type.lod_tensor.tensor + tensor.data_type = 5 # 5 is FP32 + + # print json_matrix_ + + dims_of_matrix = json_matrix_.get(j) + # dims_size = len(dims_of_matrix) + # print dims_size + + # if dims_size == 4: + # tensor.dims.append(dims_of_matrix[0]) # N + # tensor.dims.append(dims_of_matrix[3]) # C + # tensor.dims.append(dims_of_matrix[1]) # H + # tensor.dims.append(dims_of_matrix[2]) # W + # else: + + # issues in mdl model filter swich n and c + if j in self.deepwise_weight_list_ and len(dims_of_matrix) == 4: + print "deep wise issue fit: " + j + tensor.dims.append(dims_of_matrix[1]) + tensor.dims.append(dims_of_matrix[0]) + tensor.dims.append(dims_of_matrix[2]) + tensor.dims.append(dims_of_matrix[3]) + print tensor.dims + else: + for dims in dims_of_matrix: + # print dims + tensor.dims.append(dims) + + if j in self.weight_list_: + vars_add.persistable = 1 + dims_size = len(dims_of_matrix) + # print dims_size + # print 'weight name : ' + j + Swichter().copy_add_head( + self.source_weights_dir + j + '.bin', + self.target_weight_dir + j + ) + + # if dims_size == 4: + # # convert weight from nhwc to nchw + # Swichter().nhwc2nchw_one_slice_add_head( + # 'yolo/datas/multiobjects/float32s_nhwc/' + j + '.bin', + # 'yolo/datas/multiobjects/float32s_nchw_with_head/' + j, + # 'yolo/datas/multiobjects/float32s_nchw/' + j + '.tmp', + # dims_of_matrix[0], + # dims_of_matrix[1], + # dims_of_matrix[2], + # dims_of_matrix[3] + # ) + # else: + # Swichter().copy_add_head( + # 'yolo/datas/multiobjects/float32s_nhwc/' + j + '.bin', + # 'yolo/datas/multiobjects/float32s_nchw_with_head/' + j, + # 'yolo/datas/multiobjects/float32s_nchw/' + j + '.tmp' + # ) + else: + vars_add.persistable = 0 + + +mdl_path = "datas/sourcemodels/source_profile/mobileNetModel.json" +base_dir = "/Users/xiebaiyuan/PaddleProject/paddle-mobile/tools/python/modeltools/mobilenet/" +converter = Converter(base_dir, mdl_path) +converter.convert() diff --git a/tools/python/modeltools/mobilenet/swicher.py b/tools/python/modeltools/mobilenet/swicher.py new file mode 100644 index 0000000000000000000000000000000000000000..90bc6d26f600624b14c5912cddfe6e156865d196 --- /dev/null +++ b/tools/python/modeltools/mobilenet/swicher.py @@ -0,0 +1,119 @@ +import os +import shutil +from array import array + + +class Swichter: + def __init__(self): + pass + + def nhwc2nchw_one_slice(self, from_file_name, to_file_name, batch, channel, height, width): + from_file = open(from_file_name, "rb") + to_file = open(to_file_name, "wb") + + float_array = array("f") + float_array.fromfile(from_file, width * height * batch * channel) + float_write_array = array("f") + + for b in range(batch): + for c in range(channel): + for h in range(height): + for w in range(width): + float_value = float_array[b * channel * width * height + + channel * (h * width + w) + c] + + float_write_array.append(float_value) + + float_write_array.tofile(to_file) + from_file.close() + to_file.close() + + def copy(self, from_file_name, to_file_name): + from_file = open(from_file_name, "rb") + to_file = open(to_file_name, "wb") + + to_file.write(from_file.read()) + from_file.close() + to_file.close() + + def nhwc2nchw_one_slice_add_head(self, from_file_name, to_file_name, tmp_file_name, batch, channel, height, width): + from_file = open(from_file_name, "rb") + tmp_file = open(tmp_file_name, "wb+") + float_array = array("f") + float_array.fromfile(from_file, width * height * batch * channel) + float_write_array = array("f") + + for b in range(batch): + for c in range(channel): + for h in range(height): + for w in range(width): + float_value = float_array[b * channel * width * height + + channel * (h * width + w) + c] + + float_write_array.append(float_value) + + float_write_array.tofile(tmp_file) + tmp_file.close() + from_file.close() + + tmp_file = open(tmp_file_name, "rb") + to_file = open(to_file_name, "wb") + + tmp = tmp_file.read() + head = self.read_head('yolo/datas/yolo/head') + to_file.write(head) + to_file.write(tmp) + tmp_file.close() + to_file.close() + + def read_head(self, head_file): + from_file = open(head_file, "rb") + read = from_file.read(24) + # print read + from_file.close() + # print read + return read + + def copy_add_head(self, from_file_name, to_file_name): + + from_file = open(from_file_name, "rb") + to_file = open(to_file_name, "wb") + # tmp_file = open(tmp_file_name, "wb") + + head = self.read_head( + '/Users/xiebaiyuan/PaddleProject/paddle-mobile/tools/python/modeltools/mobilenet/datas/sourcemodels/head/head') + to_file.write(head) + to_file.write(from_file.read()) + from_file.close() + to_file.close() + pass + + def copy_padding_add_head(self, from_file_name, to_file_name, tmp_file_name, padding): + print'padding = %d' % padding + from_file = open(from_file_name, "rb") + # print len(from_file.read()) + from_file.seek(padding, 0) + + read = from_file.read() + print len(read) + + to_file = open(to_file_name, "wb") + # tmp_file = open(tmp_file_name, "wb") + + head = self.read_head('yolo/datas/yolo/head') + to_file.write(head) + to_file.write(read) + from_file.close() + to_file.close() + pass + +# Swichter().nhwc2nchw_one_slice_add_head( +# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/modeltools/multiobjects/float32s_nhwc/conv1_0.bin', +# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/modeltools/multiobjects/float32s_nchw_with_head/conv1_0', +# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/modeltools/multiobjects/float32s_nchw/.tmp', +# 32, +# 3, 3, 3) + +# Swichter().read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/modeltools/yolo/head') + +# Swichter().copy_add_head('datas/model.0.0.weight', 'datas/conv1_0', '') diff --git a/tools/python/modeltools/tools/__init__.py b/tools/python/modeltools/tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/python/tools/mdl2fluid/float2halffloat.py b/tools/python/modeltools/tools/float2halffloat.py similarity index 100% rename from python/tools/mdl2fluid/float2halffloat.py rename to tools/python/modeltools/tools/float2halffloat.py diff --git a/python/tools/mdl2fluid/loader.py b/tools/python/modeltools/tools/loader.py similarity index 73% rename from python/tools/mdl2fluid/loader.py rename to tools/python/modeltools/tools/loader.py index ef2258e365a84003b7b90ac480abbd9798f48f59..cb996c8bedd78004e667f1433bfdb20785e7792f 100644 --- a/python/tools/mdl2fluid/loader.py +++ b/tools/python/modeltools/tools/loader.py @@ -1,9 +1,4 @@ -import datetime import json -import os - -import google.protobuf as pbg -import framework_pb2 as framework_pb2 def loadmdl(json_path): diff --git a/python/tools/mdl2fluid/model_combine.py b/tools/python/modeltools/tools/model_combine.py similarity index 100% rename from python/tools/mdl2fluid/model_combine.py rename to tools/python/modeltools/tools/model_combine.py diff --git a/python/tools/mdl2fluid/model_reader.py b/tools/python/modeltools/tools/model_reader.py similarity index 71% rename from python/tools/mdl2fluid/model_reader.py rename to tools/python/modeltools/tools/model_reader.py index 8d53350db20739526b77663f791942299d4bc149..5f6e5f0cb9da8fb349e35211ed56f77bb9cf95da 100644 --- a/python/tools/mdl2fluid/model_reader.py +++ b/tools/python/modeltools/tools/model_reader.py @@ -1,6 +1,6 @@ import os -import framework_pb2 as framework_pb2 +from core import framework_pb2 as framework_pb2 def read_model(model_path): @@ -16,7 +16,7 @@ def read_model(model_path): # print desc.blocks except IOError: - print ": File not found. Creating a new file." + print ": File not found." def get_file_size(file_path): @@ -26,5 +26,5 @@ def get_file_size(file_path): return round(fsize, 2) -path = "newyolo/__model__" +path = '/Users/xiebaiyuan/PaddleProject/paddle-mobile/tools/python/modeltools/mobilenet/datas/sourcemodels/mobilenet_example/mobilenet/__model__' read_model(path) diff --git a/tools/python/modeltools/yolo/__init__.py b/tools/python/modeltools/yolo/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/python/tools/mdl2fluid/mdl2fluid.py b/tools/python/modeltools/yolo/mdl2fluid.py similarity index 89% rename from python/tools/mdl2fluid/mdl2fluid.py rename to tools/python/modeltools/yolo/mdl2fluid.py index a57a01d09eaf236fd9f890dcb9e8eead19aa7868..2c2d0f3e9498254f26da6ff1b88b8a33e1b31d27 100644 --- a/python/tools/mdl2fluid/mdl2fluid.py +++ b/tools/python/modeltools/yolo/mdl2fluid.py @@ -1,9 +1,7 @@ import json -import os -import framework_pb2 as framework_pb2 -import op_types as types -from swicher import Swichter +from core import framework_pb2 as framework_pb2, op_types as types +from yolo.swicher import Swichter import shutil @@ -40,10 +38,10 @@ class Converter: print self.program_desc.blocks print 'convert end.....' desc_serialize_to_string = self.program_desc.SerializeToString() - shutil.rmtree('newyolo/') - shutil.copytree('multiobjects/float32s_nchw_with_head', 'newyolo/') + shutil.rmtree('yolo/datas/newyolo/') + shutil.copytree('yolo/datas/multiobjects/float32s_nchw_with_head/', 'yolo/datas/newyolo/') - f = open("newyolo/__model__", "wb") + f = open("yolo/datas/newyolo/__model__", "wb") f.write(desc_serialize_to_string) f.close() @@ -312,9 +310,9 @@ class Converter: if dims_size == 4: # convert weight from nhwc to nchw Swichter().nhwc2nchw_one_slice_add_head( - '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nhwc/' + j + '.bin', - '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw_with_head/' + j, - '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw/' + j + '.tmp', + 'yolo/datas/multiobjects/float32s_nhwc/' + j + '.bin', + 'yolo/datas/multiobjects/float32s_nchw_with_head/' + j, + 'yolo/datas/multiobjects/float32s_nchw/' + j + '.tmp', dims_of_matrix[0], dims_of_matrix[1], dims_of_matrix[2], @@ -322,14 +320,14 @@ class Converter: ) else: Swichter().copy_add_head( - '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nhwc/' + j + '.bin', - '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw_with_head/' + j, - '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw/' + j + '.tmp' + 'yolo/datas/multiobjects/float32s_nhwc/' + j + '.bin', + 'yolo/datas/multiobjects/float32s_nchw_with_head/' + j, + 'yolo/datas/multiobjects/float32s_nchw/' + j + '.tmp' ) else: vars_add.persistable = 0 -mdl_path = "/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/YOLO_Universal.json" +mdl_path = "yolo/datas/multiobjects/YOLO_Universal.json" converter = Converter(mdl_path) converter.convert() diff --git a/python/tools/mdl2fluid/swicher.py b/tools/python/modeltools/yolo/swicher.py similarity index 86% rename from python/tools/mdl2fluid/swicher.py rename to tools/python/modeltools/yolo/swicher.py index bfe0360fd5b32f5e6fa61f6f05a0a384fb3a1e9b..713ce93985957fe7f3c99d6bc6a9c436faea59a4 100644 --- a/python/tools/mdl2fluid/swicher.py +++ b/tools/python/modeltools/yolo/swicher.py @@ -58,7 +58,7 @@ class Swichter: to_file = open(to_file_name, "wb") tmp = tmp_file.read() - head = self.read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases') + head = self.read_head('yolo/datas/yolo/head') to_file.write(head) to_file.write(tmp) tmp_file.close() @@ -77,7 +77,7 @@ class Swichter: to_file = open(to_file_name, "wb") # tmp_file = open(tmp_file_name, "wb") - head = self.read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases') + head = self.read_head('yolo/datas/yolo/head') to_file.write(head) to_file.write(from_file.read()) from_file.close() @@ -96,7 +96,7 @@ class Swichter: to_file = open(to_file_name, "wb") # tmp_file = open(tmp_file_name, "wb") - head = self.read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases') + head = self.read_head('yolo/datas/yolo/head') to_file.write(head) to_file.write(read) from_file.close() @@ -104,12 +104,12 @@ class Swichter: pass # Swichter().nhwc2nchw_one_slice_add_head( -# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nhwc/conv1_0.bin', -# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw_with_head/conv1_0', -# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw/.tmp', +# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/modeltools/multiobjects/float32s_nhwc/conv1_0.bin', +# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/modeltools/multiobjects/float32s_nchw_with_head/conv1_0', +# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/modeltools/multiobjects/float32s_nchw/.tmp', # 32, # 3, 3, 3) -# Swichter().read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases') +# Swichter().read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/modeltools/yolo/head') # Swichter().copy_add_head('datas/model.0.0.weight', 'datas/conv1_0', '')