diff --git a/README.md b/README.md index 05a109a81791ac85d975138dcd76f7f71716624a..de8fe0bb7f4613bd9d6dfebd82db1d407ee682f4 100644 --- a/README.md +++ b/README.md @@ -28,19 +28,19 @@ Paddle-Moible是PaddlePaddle组织下的项目,是一个致力于嵌入式平 |mobilenet arm v7|1线程|2线程|4线程| |------------|----|-----|-----| -|麒麟960(ms)|110.586|70.897|47.474| +|麒麟960(ms)|110.586|63.285|38.215| ||||| |mobilenetssd arm v7|1线程|2线程|4线程| -|麒麟960(ms)|222.124|138.952|90.856| +|麒麟960(ms)|220.248|128.473|79.334| ||||| |googlenet(v1) arm v7|1线程|2线程|4线程| -|麒麟960(ms)|348.018|240.304|169.998| +|麒麟960(ms)|341.965|228.724|161.531| ||||| |squeezenet arm v7|1线程|2线程|4线程| -|麒麟960(ms)|84.685|56.544|38.833| +|麒麟960(ms)|84.080|55.641|37.182| ||||| |yolo arm v7|1线程|2线程|4线程| -|麒麟960(ms)|131.831|88.990|60.905| +|麒麟960(ms)|129.445|80.627|50.936| arm cpu是paddle-mobile的主要支持方向,cpu的通用性一直是其优势。嵌入式深度学习,需要大量的cpu汇编实现。我们正在紧锣密鼓的编码,为的是能充分硬件的每一点加速能力。 arm cpu的优化工作还在进行中,现在使用了常规的cpu优化。在arm a73上paddle-mobile arm-v7现在单核运行一次mobilenet1.0是110+ms,显然这不是我们的最终目标,我们正在用大量的汇编改写,后续性能仍会有巨大提升空间, 目前只支持armv7, 未来我们也会支持armv8。 diff --git a/src/io/executor.cpp b/src/io/executor.cpp index 73e6c9d6f170fc4eebb6af2f8b7a67c847961950..91005287055b7af859d738ea20c40abbf5f7db96 100644 --- a/src/io/executor.cpp +++ b/src/io/executor.cpp @@ -192,8 +192,14 @@ void Executor::LoadMemory(const framework::VarDesc var_desc, } *data += (memory_size * sizeof(uint8_t)); } else { - for (int n = 0; n < memory_size * type_size; ++n) { - static_cast(memory)[n] = (*data)[n]; + for (int n = 0; n < memory_size; n++) { + float value; + memcpy(&value, *data + n * type_size, type_size); + if (value < 1e-30 && value > -1e-30) { + static_cast(memory)[n] = 0.0; + } else { + static_cast(memory)[n] = value; + } } (*data) += (sizeof(char) * memory_size * type_size); } diff --git a/test/net/test_squeezenet.cpp b/test/net/test_squeezenet.cpp index 4c14f63bde40675a7e0016e28d900788431ff2ae..5d89618859d47fd7d61d61871583e1ebbca3db33 100644 --- a/test/net/test_squeezenet.cpp +++ b/test/net/test_squeezenet.cpp @@ -18,7 +18,7 @@ limitations under the License. */ int main() { paddle_mobile::PaddleMobile paddle_mobile; - paddle_mobile.SetThreadNum(2); + paddle_mobile.SetThreadNum(4); // ../../../test/models/googlenet // ../../../test/models/mobilenet auto time1 = time(); diff --git a/test/net/test_yolo.cpp b/test/net/test_yolo.cpp index 83508cff335c55f5cc416c6652d83706a4626c1a..ffe3cdc22c4f847da2503192660a99f7f6d62e37 100644 --- a/test/net/test_yolo.cpp +++ b/test/net/test_yolo.cpp @@ -18,7 +18,7 @@ limitations under the License. */ int main() { paddle_mobile::PaddleMobile paddle_mobile; - paddle_mobile.SetThreadNum(2); + paddle_mobile.SetThreadNum(4); // ../../../test/models/googlenet // ../../../test/models/mobilenet auto time1 = time(); diff --git a/test/operators/test_fusion_conv_add_bn_relu_op.cpp b/test/operators/test_fusion_conv_add_bn_relu_op.cpp index 81400d987195364c06b4b93d0859469b43f90e7b..7764d95ed72da613459233bd55ddcffdc444318f 100644 --- a/test/operators/test_fusion_conv_add_bn_relu_op.cpp +++ b/test/operators/test_fusion_conv_add_bn_relu_op.cpp @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include "../test_include.h" #include "operators/fusion_conv_add_bn_relu_op.h"