提交 f9ea0707 编写于 作者: Z zhangyang

Merge remote-tracking branch 'upstream/develop' into develop

......@@ -35,6 +35,12 @@ Paddle-Moible是PaddlePaddle组织下的项目,是一个致力于嵌入式平
|||||
|googlenet(v1) arm v7|1线程|2线程|4线程|
|麒麟960(ms)|348.018|240.304|169.998|
|||||
|squeezenet arm v7|1线程|2线程|4线程|
|麒麟960(ms)|84.685|56.544|38.833|
|||||
|yolo arm v7|1线程|2线程|4线程|
|麒麟960(ms)|131.831|88.990|60.905|
arm cpu是paddle-mobile的主要支持方向,cpu的通用性一直是其优势。嵌入式深度学习,需要大量的cpu汇编实现。我们正在紧锣密鼓的编码,为的是能充分硬件的每一点加速能力。
arm cpu的优化工作还在进行中,现在使用了常规的cpu优化。在arm a73上paddle-mobile arm-v7现在单核运行一次mobilenet1.0是110+ms,显然这不是我们的最终目标,我们正在用大量的汇编改写,后续性能仍会有巨大提升空间, 目前只支持armv7, 未来我们也会支持armv8。
......
......@@ -16,6 +16,8 @@ limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/conv_add_relu_kernel.h"
......@@ -65,11 +67,11 @@ class FusionConvAddReluOp : public framework::OperatorWithKernel<
#ifdef PADDLE_MOBILE_CPU
//#ifndef CONV_ADD_RELU_REGISTER
//#define CONV_ADD_RELU_REGISTER
// static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(new
// FusionConvAddReluOpMatcher());
//#endif
#ifndef CONV_ADD_RELU_REGISTER
#define CONV_ADD_RELU_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(
new FusionConvAddReluOpMatcher());
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/math/gemm.h"
#include "operators/math/math_function.h"
......@@ -26,6 +27,8 @@ limitations under the License. */
#define k 1024
int main() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile.SetThreadNum(4);
Tensor aa, bb, cc, scale, bias;
auto aaptr = aa.mutable_data<float>({m, k});
auto bbptr = bb.mutable_data<float>({k, n});
......
......@@ -12,16 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
int main() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile.SetThreadNum(4);
auto time1 = time();
if (paddle_mobile.Load(g_resnet, false)) {
if (paddle_mobile.Load(g_resnet, true)) {
auto time2 = time();
DLOG << "load cost :" << time_diff(time1, time1) << "ms";
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
std::vector<int64_t> dims{1, 3, 32, 32};
Tensor input_tensor;
SetupTensor<float>(&input_tensor, {1, 3, 32, 32}, static_cast<float>(0),
......@@ -29,10 +30,15 @@ int main() {
std::vector<float> input(input_tensor.data<float>(),
input_tensor.data<float>() + input_tensor.numel());
// 预热一次
paddle_mobile.Predict(input, dims);
auto time3 = time();
for (int i = 0; i < 10; ++i) {
paddle_mobile.Predict(input, dims);
}
auto time4 = time();
DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
<< std::endl;
}
return 0;
......
......@@ -12,18 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
int main() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile.SetThreadNum(2);
// ../../../test/models/googlenet
// ../../../test/models/mobilenet
auto time1 = time();
if (paddle_mobile.Load(g_squeezenet, false)) {
if (paddle_mobile.Load(g_squeezenet, true)) {
auto time2 = time();
DLOG << "load cost :" << time_diff(time1, time1) << "ms";
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
std::vector<int64_t> dims{1, 3, 227, 227};
Tensor input_tensor;
SetupTensor<float>(&input_tensor, {1, 3, 227, 227}, static_cast<float>(0),
......@@ -31,10 +32,15 @@ int main() {
std::vector<float> input(input_tensor.data<float>(),
input_tensor.data<float>() + input_tensor.numel());
// 预热一次
paddle_mobile.Predict(input, dims);
auto time3 = time();
for (int i = 0; i < 10; ++i) {
paddle_mobile.Predict(input, dims);
}
auto time4 = time();
DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
<< std::endl;
}
return 0;
......
......@@ -12,18 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
int main() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile.SetThreadNum(2);
// ../../../test/models/googlenet
// ../../../test/models/mobilenet
auto time1 = time();
if (paddle_mobile.Load(g_yolo, false)) {
if (paddle_mobile.Load(g_yolo, true)) {
auto time2 = time();
DLOG << "load cost :" << time_diff(time1, time1) << "ms";
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
std::vector<int64_t> dims{1, 3, 227, 227};
Tensor input_tensor;
......@@ -32,10 +33,15 @@ int main() {
std::vector<float> input(input_tensor.data<float>(),
input_tensor.data<float>() + input_tensor.numel());
// 预热一次
paddle_mobile.Predict(input, dims);
auto time3 = time();
for (int i = 0; i < 10; ++i) {
paddle_mobile.Predict(input, dims);
}
auto time4 = time();
DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
<< std::endl;
}
return 0;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册