未验证 提交 8f2e11bb 编写于 作者: qnqinan's avatar qnqinan 提交者: GitHub

Merge pull request #790 from zhangyang0701/develop

Correct FPGA kernels. close  #789
...@@ -37,11 +37,11 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) { ...@@ -37,11 +37,11 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
auto bn_scale_ptr = param->InputScale()->data<float>(); auto bn_scale_ptr = param->InputScale()->data<float>();
auto bn_bias_ptr = param->InputBias()->data<float>(); auto bn_bias_ptr = param->InputBias()->data<float>();
const float epsilon = param->Epsilon(); const float epsilon = param->Epsilon();
PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0] && PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0] &&
bias->dims()[0] == param->InputBias()->dims()[0], bias->dims()[0] == param->InputBias()->dims()[0],
"Image channel should be equal to bias number"); "Output channel should be equal to bias number");
const int channel = input->dims()[1]; const int channel = out->dims()[1];
float *bs_ptr = float *bs_ptr =
reinterpret_cast<float *>(fpga::fpga_malloc(2 * channel * sizeof(float))); reinterpret_cast<float *>(fpga::fpga_malloc(2 * channel * sizeof(float)));
Tensor *new_scale = new Tensor(); Tensor *new_scale = new Tensor();
......
...@@ -35,11 +35,11 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(FusionConvAddBNReluParam *param) { ...@@ -35,11 +35,11 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(FusionConvAddBNReluParam *param) {
auto bn_scale_ptr = param->InputScale()->data<float>(); auto bn_scale_ptr = param->InputScale()->data<float>();
auto bn_bias_ptr = param->InputBias()->data<float>(); auto bn_bias_ptr = param->InputBias()->data<float>();
const float epsilon = param->Epsilon(); const float epsilon = param->Epsilon();
PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0] && PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0] &&
bias->dims()[0] == param->InputBias()->dims()[0], bias->dims()[0] == param->InputBias()->dims()[0],
"Image channel should be equal to bias number"); "Output channel should be equal to bias number");
const int channel = input->dims()[1]; const int channel = out->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
Tensor *new_scale = new Tensor(); Tensor *new_scale = new Tensor();
Tensor *new_bias = new Tensor(); Tensor *new_bias = new Tensor();
......
...@@ -31,9 +31,9 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam *param) { ...@@ -31,9 +31,9 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam *param) {
Tensor *out = param->Output(); Tensor *out = param->Output();
auto out_ptr = out->mutable_data<half>(); auto out_ptr = out->mutable_data<half>();
PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0], PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
"Image channel should be equal to bias number"); "Output channel should be equal to bias number");
int channel = input->dims()[1]; int channel = out->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
for (int i = 0; i < channel; i++) { for (int i = 0; i < channel; i++) {
bs_ptr[i * 2] = 1; bs_ptr[i * 2] = 1;
......
...@@ -35,10 +35,10 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam *param) { ...@@ -35,10 +35,10 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam *param) {
auto bn_scale_ptr = param->InputScale()->data<float>(); auto bn_scale_ptr = param->InputScale()->data<float>();
auto bn_bias_ptr = param->InputBias()->data<float>(); auto bn_bias_ptr = param->InputBias()->data<float>();
const float epsilon = param->Epsilon(); const float epsilon = param->Epsilon();
PADDLE_MOBILE_ENFORCE(input->dims()[1] == param->InputBias()->dims()[0], PADDLE_MOBILE_ENFORCE(out->dims()[1] == param->InputBias()->dims()[0],
"Image channel should be equal to bias number"); "Output channel should be equal to bias number");
const int channel = input->dims()[1]; const int channel = out->dims()[1];
float *bs_ptr = float *bs_ptr =
reinterpret_cast<float *>(fpga::fpga_malloc(2 * channel * sizeof(float))); reinterpret_cast<float *>(fpga::fpga_malloc(2 * channel * sizeof(float)));
Tensor *new_scale = new Tensor(); Tensor *new_scale = new Tensor();
......
...@@ -33,10 +33,10 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam *param) { ...@@ -33,10 +33,10 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam *param) {
auto bn_scale_ptr = param->InputScale()->data<float>(); auto bn_scale_ptr = param->InputScale()->data<float>();
auto bn_bias_ptr = param->InputBias()->data<float>(); auto bn_bias_ptr = param->InputBias()->data<float>();
const float epsilon = param->Epsilon(); const float epsilon = param->Epsilon();
PADDLE_MOBILE_ENFORCE(input->dims()[1] == param->InputBias()->dims()[0], PADDLE_MOBILE_ENFORCE(out->dims()[1] == param->InputBias()->dims()[0],
"Image channel should be equal to bias number"); "Output channel should be equal to bias number");
const int channel = input->dims()[1]; const int channel = out->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
Tensor *new_scale = new Tensor(); Tensor *new_scale = new Tensor();
Tensor *new_bias = new Tensor(); Tensor *new_bias = new Tensor();
......
...@@ -33,7 +33,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam *param) { ...@@ -33,7 +33,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam *param) {
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0], PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0],
"Image channel should be equal to weight number"); "Image channel should be equal to weight number");
int channel = input_x->dims()[1]; int channel = out->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
for (int i = 0; i < channel; i++) { for (int i = 0; i < channel; i++) {
bs_ptr[i * 2] = 1; bs_ptr[i * 2] = 1;
......
...@@ -32,7 +32,7 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam *param) { ...@@ -32,7 +32,7 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam *param) {
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0], PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0],
"Image channel should be equal to weight number"); "Image channel should be equal to weight number");
int channel = input_x->dims()[1]; int channel = out->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
for (int i = 0; i < channel; i++) { for (int i = 0; i < channel; i++) {
bs_ptr[i * 2] = 1; bs_ptr[i * 2] = 1;
......
...@@ -17,7 +17,13 @@ limitations under the License. */ ...@@ -17,7 +17,13 @@ limitations under the License. */
#include "../test_include.h" #include "../test_include.h"
int main() { int main() {
#ifdef PADDLE_MOBILE_FPGA
paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
#endif
#ifdef PADDLE_MOBILE_CPU
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile; paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
#endif
paddle_mobile.SetThreadNum(4); paddle_mobile.SetThreadNum(4);
auto time1 = time(); auto time1 = time();
if (paddle_mobile.Load(g_resnet, true)) { if (paddle_mobile.Load(g_resnet, true)) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册