提交 f5cc72dc 编写于 作者: S smilejames 提交者: GitHub

Merge branch 'develop' into develop

...@@ -61,7 +61,14 @@ struct PaddleMobileException : public std::exception { ...@@ -61,7 +61,14 @@ struct PaddleMobileException : public std::exception {
} }
#else #else
#define PADDLE_MOBILE_THROW_EXCEPTION(...) #define PADDLE_MOBILE_THROW_EXCEPTION(...)
#define PADDLE_MOBILE_ENFORCE(stat, ...)
#define PADDLE_MOBILE_ENFORCE(stat, ...) \
{ \
if (stat) { \
} else { \
} \
}
#endif #endif
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "cstring"
#include "io/paddle_inference_api.h" #include "io/paddle_inference_api.h"
namespace paddle_mobile { namespace paddle_mobile {
......
...@@ -74,4 +74,5 @@ USE_OP_CPU(feed); ...@@ -74,4 +74,5 @@ USE_OP_CPU(feed);
USE_OP_MALI_GPU(feed); USE_OP_MALI_GPU(feed);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(feed);
#endif #endif
...@@ -25,4 +25,5 @@ REGISTER_OPERATOR_CPU(fetch, ops::FetchOp); ...@@ -25,4 +25,5 @@ REGISTER_OPERATOR_CPU(fetch, ops::FetchOp);
REGISTER_OPERATOR_MALI_GPU(fetch, ops::FetchOp); REGISTER_OPERATOR_MALI_GPU(fetch, ops::FetchOp);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fetch, ops::FetchOp);
#endif #endif
...@@ -54,4 +54,5 @@ USE_OP_CPU(fetch); ...@@ -54,4 +54,5 @@ USE_OP_CPU(fetch);
USE_OP_MALI_GPU(fetch); USE_OP_MALI_GPU(fetch);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(fetch);
#endif #endif
...@@ -32,12 +32,7 @@ void ConvAddReluCompute(const FusionConvAddReluParam &param) { ...@@ -32,12 +32,7 @@ void ConvAddReluCompute(const FusionConvAddReluParam &param) {
Tensor bias = *param.Bias(); Tensor bias = *param.Bias();
int axis = param.Axis(); int axis = param.Axis();
Tensor *output = param.Output(); Tensor *output = param.Output();
// math::expand_bias(bias, axis, output->dims());
float *output_data = output->data<float>();
float *biase_data = bias.data<float>(); float *biase_data = bias.data<float>();
// for (int k = 0; k < output->numel(); ++k) {
// output_data[k] = biase_data[k];
// }
int groups = param.Groups(); int groups = param.Groups();
std::vector<int> strides = param.Strides(); std::vector<int> strides = param.Strides();
......
...@@ -30,7 +30,6 @@ inline void ConvBasic(const ConvParam &param) { ...@@ -30,7 +30,6 @@ inline void ConvBasic(const ConvParam &param) {
Tensor filter = *param.Filter(); Tensor filter = *param.Filter();
Tensor *output = param.Output(); Tensor *output = param.Output();
output->mutable_data<float>(); output->mutable_data<float>();
float *bias_data = output->mutable_data<float>();
int groups = param.Groups(); int groups = param.Groups();
std::vector<int> strides = param.Strides(); std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings(); std::vector<int> paddings = param.Paddings();
...@@ -107,7 +106,7 @@ inline void ConvBasic(const ConvParam &param) { ...@@ -107,7 +106,7 @@ inline void ConvBasic(const ConvParam &param) {
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
math::matmul<float>(filter_slice, false, col_matrix, false, math::matmul<float>(filter_slice, false, col_matrix, false,
static_cast<float>(1), &out_slice, static_cast<float>(1), &out_slice,
static_cast<float>(0), false, bias_data); static_cast<float>(0));
} }
} }
} }
......
...@@ -59,7 +59,6 @@ void MulCompute(const MulParam &param) { ...@@ -59,7 +59,6 @@ void MulCompute(const MulParam &param) {
const Tensor *input_y = param.InputY(); const Tensor *input_y = param.InputY();
Tensor *out = param.Out(); Tensor *out = param.Out();
out->mutable_data<float>(); out->mutable_data<float>();
float *bias_data = out->mutable_data<float>();
const Tensor x_matrix = const Tensor x_matrix =
input_x->dims().size() > 2 input_x->dims().size() > 2
? framework::ReshapeToMatrix(*input_x, param.XNumColDims()) ? framework::ReshapeToMatrix(*input_x, param.XNumColDims())
...@@ -73,7 +72,7 @@ void MulCompute(const MulParam &param) { ...@@ -73,7 +72,7 @@ void MulCompute(const MulParam &param) {
out->Resize({x_matrix.dims()[0], y_matrix.dims()[1]}); out->Resize({x_matrix.dims()[0], y_matrix.dims()[1]});
} }
math::matmul<float>(x_matrix, false, y_matrix, false, static_cast<float>(1), math::matmul<float>(x_matrix, false, y_matrix, false, static_cast<float>(1),
out, static_cast<float>(0), false, bias_data); out, static_cast<float>(0));
if (out_dim.size() != 2) { if (out_dim.size() != 2) {
out->Resize(out_dim); out->Resize(out_dim);
} }
......
...@@ -25,9 +25,9 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init( ...@@ -25,9 +25,9 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
const Tensor *input_x = param->InputX(); const Tensor *input_x = param->InputX();
const Tensor *input_y = param->InputY(); const Tensor *input_y = param->InputY();
Tensor *out = param->Out(); Tensor *out = param->Out();
auto input_x_ptr = input_x->data<float>(); auto input_x_ptr = input_x->data<half>();
auto input_y_ptr = input_y->data<float>(); auto input_y_ptr = input_y->data<half>();
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<half>();
fpga::EWAddArgs ewaddArgs; fpga::EWAddArgs ewaddArgs;
ewaddArgs.relu_enabled = relu_enabled; ewaddArgs.relu_enabled = relu_enabled;
......
...@@ -22,13 +22,13 @@ template <> ...@@ -22,13 +22,13 @@ template <>
bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam *param) { bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam *param) {
bool relu_enabled = true; bool relu_enabled = true;
const Tensor *input_x = param->InputX(); const Tensor *input_x = param->InputX();
auto input_x_ptr = input_x->data<float>(); auto input_x_ptr = input_x->data<half>();
const Tensor *input_y = param->InputY(); const Tensor *input_y = param->InputY();
auto input_y_ptr = input_y->data<float>(); auto input_y_ptr = input_y->data<float>();
const Tensor *input_z = param->InputZ(); const Tensor *input_z = param->InputZ();
auto input_z_ptr = input_z->data<float>(); auto input_z_ptr = input_z->data<float>();
Tensor *out = param->Out(); Tensor *out = param->Out();
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<half>();
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0], PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0],
"Image channel should be equal to weight number"); "Image channel should be equal to weight number");
......
...@@ -22,13 +22,13 @@ template <> ...@@ -22,13 +22,13 @@ template <>
bool FusionFcKernel<FPGA, float>::Init(FusionFcParam *param) { bool FusionFcKernel<FPGA, float>::Init(FusionFcParam *param) {
bool relu_enabled = false; bool relu_enabled = false;
const Tensor *input_x = param->InputX(); const Tensor *input_x = param->InputX();
auto input_x_ptr = input_x->data<float>(); auto input_x_ptr = input_x->data<half>();
const Tensor *input_y = param->InputY(); const Tensor *input_y = param->InputY();
auto input_y_ptr = input_y->data<float>(); auto input_y_ptr = input_y->data<float>();
const Tensor *input_z = param->InputZ(); const Tensor *input_z = param->InputZ();
auto input_z_ptr = input_z->data<float>(); auto input_z_ptr = input_z->data<float>();
Tensor *out = param->Out(); Tensor *out = param->Out();
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<half>();
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0], PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0],
"Image channel should be equal to weight number"); "Image channel should be equal to weight number");
......
...@@ -22,9 +22,9 @@ namespace operators { ...@@ -22,9 +22,9 @@ namespace operators {
template <> template <>
bool PoolKernel<FPGA, float>::Init(PoolParam *param) { bool PoolKernel<FPGA, float>::Init(PoolParam *param) {
const Tensor *input = param->Input(); const Tensor *input = param->Input();
auto input_ptr = input->data<float>(); auto input_ptr = input->data<half>();
Tensor *output = param->Output(); Tensor *output = param->Output();
auto output_ptr = output->mutable_data<float>(); auto output_ptr = output->mutable_data<half>();
vector<int> ksize = param->Ksize(); vector<int> ksize = param->Ksize();
vector<int> strides = param->Strides(); vector<int> strides = param->Strides();
vector<int> paddings = param->Paddings(); vector<int> paddings = param->Paddings();
......
...@@ -373,9 +373,9 @@ void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b, ...@@ -373,9 +373,9 @@ void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b,
#endif #endif
} }
} }
if (alpha != 1) { if (alpha != 1) {
WriteWithAlphaBeta(mc, nc, c, C, ldc); WriteWithAlphaBeta(mc, nc, c, C, ldc);
return; return;
} }
if (beta == 0) { if (beta == 0) {
...@@ -2244,6 +2244,27 @@ void AddDot4x4(int k, const float *a, const float *b, float *c, int ldc) { ...@@ -2244,6 +2244,27 @@ void AddDot4x4(int k, const float *a, const float *b, float *c, int ldc) {
} }
} }
void AddDot4x8(int k, const float *a, const float *b, float *c, int ldc) {}
void WriteBasic(int mc, int nc, float *c, float *C, int ldc) {}
void WriteWithAlphaBeta(int mc, int nc, float *c, float *C, int ldc) {}
void WriteWithAdd(int mc, int nc, float *c, float *C, int ldc) {}
void WriteWithAddV1(int mc, int nc, float *c, float *C, int ldc, float *bias) {}
void WriteWithAddRelu(int mc, int nc, float *c, float *C, int ldc) {}
void WriteWithAddReluV1(int mc, int nc, float *c, float *C, int ldc,
float *bias) {}
void WriteWithBn(int mc, int nc, float *c, float *C, int ldc, float *new_scale,
float *new_bias) {}
void WriteWithBnRelu(int mc, int nc, float *c, float *C, int ldc,
float *new_scale, float *new_bias) {}
#endif // __ARM_NEON #endif // __ARM_NEON
// 32位 float 矩阵乘法 // 32位 float 矩阵乘法
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册