diff --git a/src/common/enforce.h b/src/common/enforce.h index 51d2110e32433686d1b3353bc63b92a564a13e9d..aebe2a58031cb1341596f07dbf653be4a5e01900 100644 --- a/src/common/enforce.h +++ b/src/common/enforce.h @@ -61,7 +61,14 @@ struct PaddleMobileException : public std::exception { } #else #define PADDLE_MOBILE_THROW_EXCEPTION(...) -#define PADDLE_MOBILE_ENFORCE(stat, ...) + +#define PADDLE_MOBILE_ENFORCE(stat, ...) \ + { \ + if (stat) { \ + } else { \ + } \ + } + #endif } // namespace paddle_mobile diff --git a/src/io/api.cc b/src/io/api.cc index 2103c5317b8d15988b19d1c1bf07e1042a6453a0..0e254aa15ac06083038773d89c23d40242847782 100644 --- a/src/io/api.cc +++ b/src/io/api.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "cstring" #include "io/paddle_inference_api.h" namespace paddle_mobile { diff --git a/src/operators/feed_op.h b/src/operators/feed_op.h index b34c7cf78b0b808e512e68e5429671bf8e9d8c4a..4766d56d9ae0b86cc28c476a17547acfd53ab02b 100644 --- a/src/operators/feed_op.h +++ b/src/operators/feed_op.h @@ -74,4 +74,5 @@ USE_OP_CPU(feed); USE_OP_MALI_GPU(feed); #endif #ifdef PADDLE_MOBILE_FPGA +USE_OP_FPGA(feed); #endif diff --git a/src/operators/fetch_op.cpp b/src/operators/fetch_op.cpp index adbd61d5ec364a40b565059ceb5d5d49999c8436..30cddceaa45da91be5ea91d70f78503c404552c3 100644 --- a/src/operators/fetch_op.cpp +++ b/src/operators/fetch_op.cpp @@ -25,4 +25,5 @@ REGISTER_OPERATOR_CPU(fetch, ops::FetchOp); REGISTER_OPERATOR_MALI_GPU(fetch, ops::FetchOp); #endif #ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(fetch, ops::FetchOp); #endif diff --git a/src/operators/fetch_op.h b/src/operators/fetch_op.h index 5614fef8fe1a5b2e234b29e6d7b52cc4c2719008..417637c80086b099395e93227991491309f656fe 100644 --- a/src/operators/fetch_op.h +++ b/src/operators/fetch_op.h @@ -54,4 +54,5 @@ USE_OP_CPU(fetch); USE_OP_MALI_GPU(fetch); #endif #ifdef PADDLE_MOBILE_FPGA +USE_OP_FPGA(fetch); #endif diff --git a/src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h b/src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h index 7b019b60db98d87e4de9315e96fedca7929d4add..6c619dd2a29ce140c783af0637f51153a1866791 100644 --- a/src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h +++ b/src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h @@ -32,12 +32,7 @@ void ConvAddReluCompute(const FusionConvAddReluParam ¶m) { Tensor bias = *param.Bias(); int axis = param.Axis(); Tensor *output = param.Output(); - // math::expand_bias(bias, axis, output->dims()); - float *output_data = output->data(); float *biase_data = bias.data(); - // for (int k = 0; k < output->numel(); ++k) { - // output_data[k] = biase_data[k]; - // } int groups = param.Groups(); std::vector strides = param.Strides(); diff --git a/src/operators/kernel/central-arm-func/conv_arm_func.h b/src/operators/kernel/central-arm-func/conv_arm_func.h index 41acb973409d9655ae47a8655c1cb527e9563775..33caded3afaaf125bac9108f2fafeda3d3c2049f 100644 --- a/src/operators/kernel/central-arm-func/conv_arm_func.h +++ b/src/operators/kernel/central-arm-func/conv_arm_func.h @@ -30,7 +30,6 @@ inline void ConvBasic(const ConvParam ¶m) { Tensor filter = *param.Filter(); Tensor *output = param.Output(); output->mutable_data(); - float *bias_data = output->mutable_data(); int groups = param.Groups(); std::vector strides = param.Strides(); std::vector paddings = param.Paddings(); @@ -107,7 +106,7 @@ inline void ConvBasic(const ConvParam ¶m) { Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); math::matmul(filter_slice, false, col_matrix, false, static_cast(1), &out_slice, - static_cast(0), false, bias_data); + static_cast(0)); } } } diff --git a/src/operators/kernel/central-arm-func/mul_arm_func.h b/src/operators/kernel/central-arm-func/mul_arm_func.h index 341759a96e1e7216fb9550596d3d3533dd0ab80a..d2da67afe1d2eb746971a2443bdb449eb2b66ec4 100644 --- a/src/operators/kernel/central-arm-func/mul_arm_func.h +++ b/src/operators/kernel/central-arm-func/mul_arm_func.h @@ -59,7 +59,6 @@ void MulCompute(const MulParam ¶m) { const Tensor *input_y = param.InputY(); Tensor *out = param.Out(); out->mutable_data(); - float *bias_data = out->mutable_data(); const Tensor x_matrix = input_x->dims().size() > 2 ? framework::ReshapeToMatrix(*input_x, param.XNumColDims()) @@ -73,7 +72,7 @@ void MulCompute(const MulParam ¶m) { out->Resize({x_matrix.dims()[0], y_matrix.dims()[1]}); } math::matmul(x_matrix, false, y_matrix, false, static_cast(1), - out, static_cast(0), false, bias_data); + out, static_cast(0)); if (out_dim.size() != 2) { out->Resize(out_dim); } diff --git a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp index 5dd8991e2a23540e81f043cd6199443d98098ff8..88a19beb41f67e5fc9336c8883c8ea75aaa939e0 100644 --- a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp @@ -25,9 +25,9 @@ bool ElementwiseAddReluKernel::Init( const Tensor *input_x = param->InputX(); const Tensor *input_y = param->InputY(); Tensor *out = param->Out(); - auto input_x_ptr = input_x->data(); - auto input_y_ptr = input_y->data(); - auto out_ptr = out->mutable_data(); + auto input_x_ptr = input_x->data(); + auto input_y_ptr = input_y->data(); + auto out_ptr = out->mutable_data(); fpga::EWAddArgs ewaddArgs; ewaddArgs.relu_enabled = relu_enabled; diff --git a/src/operators/kernel/fpga/fc_relu_kernel.cpp b/src/operators/kernel/fpga/fc_relu_kernel.cpp index 6b828f102412fb5aa8ef125c4ccb9b96598fc458..21e334b12b70be1980d9417ed11161143106d1c6 100644 --- a/src/operators/kernel/fpga/fc_relu_kernel.cpp +++ b/src/operators/kernel/fpga/fc_relu_kernel.cpp @@ -22,13 +22,13 @@ template <> bool FusionFcReluKernel::Init(FusionFcReluParam *param) { bool relu_enabled = true; const Tensor *input_x = param->InputX(); - auto input_x_ptr = input_x->data(); + auto input_x_ptr = input_x->data(); const Tensor *input_y = param->InputY(); auto input_y_ptr = input_y->data(); const Tensor *input_z = param->InputZ(); auto input_z_ptr = input_z->data(); Tensor *out = param->Out(); - auto out_ptr = out->mutable_data(); + auto out_ptr = out->mutable_data(); PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0], "Image channel should be equal to weight number"); diff --git a/src/operators/kernel/fpga/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/fusion_fc_kernel.cpp index 340561a9aa97ceda0bd37c40d721a0c5e3e535b4..505b8768565dc4003152c3493b558448f9d73d04 100644 --- a/src/operators/kernel/fpga/fusion_fc_kernel.cpp +++ b/src/operators/kernel/fpga/fusion_fc_kernel.cpp @@ -22,13 +22,13 @@ template <> bool FusionFcKernel::Init(FusionFcParam *param) { bool relu_enabled = false; const Tensor *input_x = param->InputX(); - auto input_x_ptr = input_x->data(); + auto input_x_ptr = input_x->data(); const Tensor *input_y = param->InputY(); auto input_y_ptr = input_y->data(); const Tensor *input_z = param->InputZ(); auto input_z_ptr = input_z->data(); Tensor *out = param->Out(); - auto out_ptr = out->mutable_data(); + auto out_ptr = out->mutable_data(); PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0], "Image channel should be equal to weight number"); diff --git a/src/operators/kernel/fpga/pool_kernel.cpp b/src/operators/kernel/fpga/pool_kernel.cpp index 3e7dc5fd591fc85b98c7850102248c2264c62ba3..a7ff022c3b8616847c48a71bf94e4018cedcad2e 100644 --- a/src/operators/kernel/fpga/pool_kernel.cpp +++ b/src/operators/kernel/fpga/pool_kernel.cpp @@ -22,9 +22,9 @@ namespace operators { template <> bool PoolKernel::Init(PoolParam *param) { const Tensor *input = param->Input(); - auto input_ptr = input->data(); + auto input_ptr = input->data(); Tensor *output = param->Output(); - auto output_ptr = output->mutable_data(); + auto output_ptr = output->mutable_data(); vector ksize = param->Ksize(); vector strides = param->Strides(); vector paddings = param->Paddings(); diff --git a/src/operators/math/gemm.cpp b/src/operators/math/gemm.cpp index ef1625b72c54b168eb3b58a4126d2500fbfe561f..815db53d037f25285871f603de248970ac4cb4e8 100644 --- a/src/operators/math/gemm.cpp +++ b/src/operators/math/gemm.cpp @@ -373,9 +373,9 @@ void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b, #endif } } + if (alpha != 1) { WriteWithAlphaBeta(mc, nc, c, C, ldc); - return; } if (beta == 0) { @@ -2244,6 +2244,27 @@ void AddDot4x4(int k, const float *a, const float *b, float *c, int ldc) { } } +void AddDot4x8(int k, const float *a, const float *b, float *c, int ldc) {} + +void WriteBasic(int mc, int nc, float *c, float *C, int ldc) {} + +void WriteWithAlphaBeta(int mc, int nc, float *c, float *C, int ldc) {} + +void WriteWithAdd(int mc, int nc, float *c, float *C, int ldc) {} + +void WriteWithAddV1(int mc, int nc, float *c, float *C, int ldc, float *bias) {} + +void WriteWithAddRelu(int mc, int nc, float *c, float *C, int ldc) {} + +void WriteWithAddReluV1(int mc, int nc, float *c, float *C, int ldc, + float *bias) {} + +void WriteWithBn(int mc, int nc, float *c, float *C, int ldc, float *new_scale, + float *new_bias) {} + +void WriteWithBnRelu(int mc, int nc, float *c, float *C, int ldc, + float *new_scale, float *new_bias) {} + #endif // __ARM_NEON // 32位 float 矩阵乘法