提交 b2897733 编写于 作者: R Ruilong Liu 提交者: GitHub

Merge branch 'develop' into develop

......@@ -61,7 +61,14 @@ struct PaddleMobileException : public std::exception {
}
#else
#define PADDLE_MOBILE_THROW_EXCEPTION(...)
#define PADDLE_MOBILE_ENFORCE(stat, ...)
#define PADDLE_MOBILE_ENFORCE(stat, ...) \
{ \
if (stat) { \
} else { \
} \
}
#endif
} // namespace paddle_mobile
......@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "cstring"
#include "io/paddle_inference_api.h"
namespace paddle_mobile {
......
......@@ -30,9 +30,6 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
: framework::OperatorBase<DeviceType>(type, inputs, outputs, attrs,
scope),
param_(inputs, outputs, attrs, scope.get()) {}
void RunImpl() const { param_.Out()->ShareDataWith(*param_.InputX()); }
void Init() {}
void InferShape() const {
auto out_dims = param_.Out()->dims();
......@@ -40,6 +37,29 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
param_.Out()->Resize(out_dims);
}
#ifdef PADDLE_MOBILE_FPGA
void RunImpl() const { fpga::PerformBypass(param_.FpgaArgs()); }
void Init() {
const Tensor *input = param_.InputX();
auto input_ptr = input->data<float>();
Tensor *output = param_.Out();
auto output_ptr = output->mutable_data<half>();
fpga::BypassArgs args;
args.convert_type = fpga::DATA_FP32_TO_FP16;
args.layout_type = fpga::LAYOUT_CHW_TO_HWC;
args.image.address = (void *)input_ptr;
args.image.channels = input->dims()[1];
args.image.height = input->dims()[2];
args.image.width = input->dims()[3];
args.output.address = output_ptr;
param_.SetFpgaArgs(args);
}
#else
void RunImpl() const { param_.Out()->ShareDataWith(*param_.InputX()); }
void Init() {}
#endif
protected:
FeedParam param_;
};
......@@ -54,4 +74,5 @@ USE_OP_CPU(feed);
USE_OP_MALI_GPU(feed);
#endif
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(feed);
#endif
......@@ -25,4 +25,5 @@ REGISTER_OPERATOR_CPU(fetch, ops::FetchOp);
REGISTER_OPERATOR_MALI_GPU(fetch, ops::FetchOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fetch, ops::FetchOp);
#endif
......@@ -54,4 +54,5 @@ USE_OP_CPU(fetch);
USE_OP_MALI_GPU(fetch);
#endif
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(fetch);
#endif
......@@ -16,6 +16,8 @@ limitations under the License. */
#pragma once
#include <cmath>
#include "framework/tensor.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
......@@ -122,7 +124,7 @@ void BoxCoderCompute(const BoxCoderParam& param) {
auto col = input_priorbox->dims()[0];
auto len = input_priorbox->dims()[1];
Tensor* output_box = param.OutputBox();
framework::Tensor* output_box = param.OutputBox();
auto* output_box_dataptr = output_box->mutable_data<float>({row, col, len});
if (code_type == "encode_center_size") {
......
......@@ -31,12 +31,7 @@ void ConvAddBasic(const FusionConvAddParam &param) {
Tensor bias = *param.Bias();
int axis = param.Axis();
Tensor *output = param.Output();
math::expand_bias(bias, axis, output->dims());
float *output_data = output->data<float>();
float *biase_data = bias.data<float>();
for (int k = 0; k < output->numel(); ++k) {
output_data[k] = biase_data[k];
}
int groups = param.Groups();
std::vector<int> strides = param.Strides();
......@@ -113,7 +108,7 @@ void ConvAddBasic(const FusionConvAddParam &param) {
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
math::matmul<float>(filter_slice, false, col_matrix, false,
static_cast<float>(1), &out_slice,
static_cast<float>(1));
static_cast<float>(1), false, biase_data);
}
}
}
......
......@@ -18,6 +18,9 @@ limitations under the License. */
#include <vector>
#include "operators/math/depthwise_conv_3x3.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace paddle_mobile {
......
......@@ -16,6 +16,10 @@ limitations under the License. */
#pragma once
#include <vector>
#include "operators/math/conv_func.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace paddle_mobile {
......@@ -28,12 +32,7 @@ void ConvAddReluCompute(const FusionConvAddReluParam &param) {
Tensor bias = *param.Bias();
int axis = param.Axis();
Tensor *output = param.Output();
math::expand_bias(bias, axis, output->dims());
float *output_data = output->data<float>();
float *biase_data = bias.data<float>();
for (int k = 0; k < output->numel(); ++k) {
output_data[k] = biase_data[k];
}
int groups = param.Groups();
std::vector<int> strides = param.Strides();
......@@ -111,7 +110,7 @@ void ConvAddReluCompute(const FusionConvAddReluParam &param) {
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
math::matmul<float>(filter_slice, false, col_matrix, false,
static_cast<float>(1), &out_slice,
static_cast<float>(1), true);
static_cast<float>(1), true, biase_data);
}
}
}
......
......@@ -17,6 +17,9 @@ limitations under the License. */
#pragma once
#include <vector>
#include "operators/math/depthwise_conv_3x3.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
......
......@@ -17,6 +17,9 @@ limitations under the License. */
#pragma once
#include <vector>
#include "operators/math/depthwise_conv_3x3.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
......
......@@ -15,6 +15,8 @@ limitations under the License. */
#ifdef ELEMENTWISEADD_OP
#pragma once
#include "operators/math/elementwise_op_function.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
......
......@@ -15,6 +15,8 @@ limitations under the License. */
#ifdef FUSION_FC_OP
#pragma once
#include "operators/math/math_function.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
......@@ -28,6 +30,7 @@ void FusionFcCompute(const FusionFcParam &param) {
int axis = param.Axis();
Tensor *out = param.Out();
auto *out_data = out->mutable_data<float>();
float *bias_data = out->mutable_data<float>();
const Tensor x_matrix =
input_x->dims().size() > 2
? framework::ReshapeToMatrix(*input_x, param.XNumColDims())
......@@ -45,18 +48,18 @@ void FusionFcCompute(const FusionFcParam &param) {
PADDLE_MOBILE_ENFORCE(out_dim[1] == input_z->dims()[0],
" out_dim.size must be 2.");
axis = (axis == -1 ? out_dim.size() - input_z->dims().size() : axis);
PADDLE_MOBILE_ENFORCE(axis == 1, " to fit broadcast, axis = 1. ")
PADDLE_MOBILE_ENFORCE(axis == 1, " to fit broadcast, axis = 1. ");
int64_t classes = input_z->numel();
for (int i = 0; i < out_dim[0]; i++) {
memory::Copy(out_data + i * classes, input_z_data, sizeof(float) * classes);
}
for (int i = 0; i < out->numel(); i++) {
DLOG << out_data[i];
}
// for (int i = 0; i < out->numel(); i++) {
// DLOG << out_data[i];
// }
math::matmul<float>(x_matrix, false, y_matrix, false, static_cast<float>(1),
out, static_cast<float>(1));
out, static_cast<float>(1), false, bias_data);
PADDLE_MOBILE_ENFORCE(out_dim.size() == 2, " out_dim.size must be 2.");
// if (out_dim.size() != 2) {
// out->Resize(out_dim);
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef LRN_OP
#pragma once
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
......
......@@ -19,6 +19,8 @@ limitations under the License. */
#include <map>
#include <utility>
#include <vector>
#include "framework/tensor.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
......@@ -89,7 +91,8 @@ static inline T JaccardOverlap(const T* box1, const T* box2,
}
template <typename T>
static inline void NMSFast(const Tensor& bbox, const Tensor& scores,
static inline void NMSFast(const framework::Tensor& bbox,
const framework::Tensor& scores,
const T score_threshold, const T nms_threshold,
const T eta, const int64_t top_k,
std::vector<int>* selected_indices) {
......@@ -131,7 +134,8 @@ static inline void NMSFast(const Tensor& bbox, const Tensor& scores,
}
template <typename T>
void MultiClassNMS(const Tensor& scores, const Tensor& bboxes,
void MultiClassNMS(const framework::Tensor& scores,
const framework::Tensor& bboxes,
std::map<int, std::vector<int>>* indices, int* num_nmsed_out,
const int& background_label, const int& nms_top_k,
const int& keep_top_k, const T& nms_threshold,
......@@ -141,7 +145,7 @@ void MultiClassNMS(const Tensor& scores, const Tensor& bboxes,
int num_det = 0;
for (int64_t c = 0; c < class_num; ++c) {
if (c == background_label) continue;
Tensor score = scores.Slice(c, c + 1);
framework::Tensor score = scores.Slice(c, c + 1);
/// [c] is key
NMSFast<float>(bboxes, score, score_threshold, nms_threshold, nms_eta,
nms_top_k, &((*indices)[c]));
......@@ -181,9 +185,10 @@ void MultiClassNMS(const Tensor& scores, const Tensor& bboxes,
}
template <typename T>
void MultiClassOutput(const Tensor& scores, const Tensor& bboxes,
void MultiClassOutput(const framework::Tensor& scores,
const framework::Tensor& bboxes,
const std::map<int, std::vector<int>>& selected_indices,
Tensor* outs) {
framework::Tensor* outs) {
int predict_dim = scores.dims()[1];
auto* scores_data = scores.data<T>();
auto* bboxes_data = bboxes.data<T>();
......@@ -231,10 +236,10 @@ void MultiClassNMSCompute(const MultiClassNMSParam& param) {
std::vector<std::map<int, std::vector<int>>> all_indices;
std::vector<size_t> batch_starts = {0};
for (int64_t i = 0; i < batch_size; ++i) {
Tensor ins_score = input_scores->Slice(i, i + 1);
framework::Tensor ins_score = input_scores->Slice(i, i + 1);
ins_score.Resize({class_num, predict_dim});
Tensor ins_boxes = input_bboxes->Slice(i, i + 1);
framework::Tensor ins_boxes = input_bboxes->Slice(i, i + 1);
ins_boxes.Resize({predict_dim, box_dim});
std::map<int, std::vector<int>> indices;
......@@ -253,16 +258,16 @@ void MultiClassNMSCompute(const MultiClassNMSParam& param) {
} else {
outs->mutable_data<float>({num_kept, kOutputDim});
for (int64_t i = 0; i < batch_size; ++i) {
Tensor ins_score = input_scores->Slice(i, i + 1);
framework::Tensor ins_score = input_scores->Slice(i, i + 1);
ins_score.Resize({class_num, predict_dim});
Tensor ins_boxes = input_bboxes->Slice(i, i + 1);
framework::Tensor ins_boxes = input_bboxes->Slice(i, i + 1);
ins_boxes.Resize({predict_dim, box_dim});
int64_t s = batch_starts[i];
int64_t e = batch_starts[i + 1];
if (e > s) {
Tensor out = outs->Slice(s, e);
framework::Tensor out = outs->Slice(s, e);
MultiClassOutput<float>(ins_score, ins_boxes, all_indices[i], &out);
}
}
......
......@@ -16,6 +16,7 @@ limitations under the License. */
#pragma once
#include <operators/math/transform.h>
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
......
......@@ -16,6 +16,8 @@ limitations under the License. */
#pragma once
#include <vector>
#include "operators/kernel/reshape_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef SOFTMAX_OP
#pragma once
#include "../../math/softmax.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename P>
......
......@@ -16,6 +16,7 @@ limitations under the License. */
#pragma once
#include <vector>
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
......
......@@ -24,13 +24,13 @@ template <>
bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
bool relu_enabled = false;
const Tensor *input = param->Input();
auto input_ptr = input->data<float>();
auto input_ptr = input->data<half>();
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
const Tensor *filter = param->Filter();
auto filter_ptr = filter->data<float>();
Tensor *out = param->Output();
auto out_ptr = out->mutable_data<float>();
auto out_ptr = out->mutable_data<half>();
auto bn_mean_ptr = param->InputMean()->data<float>();
auto bn_var_ptr = param->InputVariance()->data<float>();
auto bn_scale_ptr = param->InputScale()->data<float>();
......
......@@ -24,13 +24,13 @@ template <>
bool ConvAddBNReluKernel<FPGA, float>::Init(FusionConvAddBNReluParam *param) {
bool relu_enabled = true;
const Tensor *input = param->Input();
auto input_ptr = input->data<float>();
auto input_ptr = input->data<half>();
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
const Tensor *filter = param->Filter();
auto filter_ptr = filter->data<float>();
Tensor *out = param->Output();
auto out_ptr = out->mutable_data<float>();
auto out_ptr = out->mutable_data<half>();
auto bn_mean_ptr = param->InputMean()->data<float>();
auto bn_var_ptr = param->InputVariance()->data<float>();
auto bn_scale_ptr = param->InputScale()->data<float>();
......
......@@ -24,13 +24,13 @@ template <>
bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam *param) {
bool relu_enabled = true;
const Tensor *input = param->Input();
auto input_ptr = input->data<float>();
auto input_ptr = input->data<half>();
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
const Tensor *filter = param->Filter();
auto filter_ptr = filter->data<float>();
Tensor *out = param->Output();
auto out_ptr = out->mutable_data<float>();
auto out_ptr = out->mutable_data<half>();
PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0],
"Image channel should be equal to bias number");
......
......@@ -25,9 +25,9 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
const Tensor *input_x = param->InputX();
const Tensor *input_y = param->InputY();
Tensor *out = param->Out();
auto input_x_ptr = input_x->data<float>();
auto input_y_ptr = input_y->data<float>();
auto out_ptr = out->mutable_data<float>();
auto input_x_ptr = input_x->data<half>();
auto input_y_ptr = input_y->data<half>();
auto out_ptr = out->mutable_data<half>();
fpga::EWAddArgs ewaddArgs;
ewaddArgs.relu_enabled = relu_enabled;
......
......@@ -22,13 +22,13 @@ template <>
bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam *param) {
bool relu_enabled = true;
const Tensor *input_x = param->InputX();
auto input_x_ptr = input_x->data<float>();
auto input_x_ptr = input_x->data<half>();
const Tensor *input_y = param->InputY();
auto input_y_ptr = input_y->data<float>();
const Tensor *input_z = param->InputZ();
auto input_z_ptr = input_z->data<float>();
Tensor *out = param->Out();
auto out_ptr = out->mutable_data<float>();
auto out_ptr = out->mutable_data<half>();
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0],
"Image channel should be equal to weight number");
......
......@@ -22,13 +22,13 @@ template <>
bool FusionFcKernel<FPGA, float>::Init(FusionFcParam *param) {
bool relu_enabled = false;
const Tensor *input_x = param->InputX();
auto input_x_ptr = input_x->data<float>();
auto input_x_ptr = input_x->data<half>();
const Tensor *input_y = param->InputY();
auto input_y_ptr = input_y->data<float>();
const Tensor *input_z = param->InputZ();
auto input_z_ptr = input_z->data<float>();
Tensor *out = param->Out();
auto out_ptr = out->mutable_data<float>();
auto out_ptr = out->mutable_data<half>();
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0],
"Image channel should be equal to weight number");
......
......@@ -22,9 +22,9 @@ namespace operators {
template <>
bool PoolKernel<FPGA, float>::Init(PoolParam *param) {
const Tensor *input = param->Input();
auto input_ptr = input->data<float>();
auto input_ptr = input->data<half>();
Tensor *output = param->Output();
auto output_ptr = output->mutable_data<float>();
auto output_ptr = output->mutable_data<half>();
vector<int> ksize = param->Ksize();
vector<int> strides = param->Strides();
vector<int> paddings = param->Paddings();
......
此差异已折叠。
......@@ -50,6 +50,10 @@ void PackMatrixA_6r(int m, int k, int m_tail, const float *A, int lda,
float *buffer);
void PackMatrixA_8r(int m, int k, int m_tail, const float *A, int lda,
float *buffer);
void PackMatrixA_omp_6r(int m, int k, int m_tail, const float *A, int lda,
float *buffer);
void PackMatrixA_omp_8r(int m, int k, int m_tail, const float *A, int lda,
float *buffer);
// 将 B 矩阵分块复制到连续内存(RowMajor)
void PackMatrixB_8c(int k, int n, int n_tail, const float *B, int ldb,
......@@ -58,10 +62,19 @@ void PackMatrixB_12c(int k, int n, int n_tail, const float *B, int ldb,
float *buffer);
void PackMatrixB_16c(int k, int n, int n_tail, const float *B, int ldb,
float *buffer);
void PackMatrixB_omp_8c(int k, int n, int n_tail, const float *B, int ldb,
float *buffer);
void PackMatrixB_omp_12c(int k, int n, int n_tail, const float *B, int ldb,
float *buffer);
void PackMatrixB_omp_16c(int k, int n, int n_tail, const float *B, int ldb,
float *buffer);
// 分块矩阵乘法
void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b,
float beta, float *c, float *C, int ldc, bool relu);
void InnerKernelWithBias(int mc, int nc, float alpha, const float *a,
const float *b, float beta, float *c, float *C,
int ldc, bool relu, float *bias);
void InnerKernelWithBn(int mc, int nc, float alpha, const float *a,
const float *b, float beta, float *c, float *C, int ldc,
......@@ -91,8 +104,13 @@ void WriteBasic(int mc, int nc, float *c, float *C, int ldc);
void WriteWithAlphaBeta(int mc, int nc, float *c, float *C, int ldc);
// C = A * B + C
void WriteWithAdd(int mc, int nc, float *c, float *C, int ldc);
// C = A * B + bias
void WriteWithAddV1(int mc, int nc, float *c, float *C, int ldc, float *bias);
// C = A * B + C, relu(C)
void WriteWithAddRelu(int mc, int nc, float *c, float *C, int ldc);
// C = A * B + bias ,relu(C)
void WriteWithAddReluV1(int mc, int nc, float *c, float *C, int ldc,
float *bias);
// C = A * B, batchnorm(C)
void WriteWithBn(int mc, int nc, float *c, float *C, int ldc, float *new_scale,
float *new_bias);
......@@ -120,13 +138,24 @@ void VecWriteWithBnRelu(int n, float *c, float *C, int ldc, float *new_scale,
// 32位 float 矩阵乘法
void Sgemm(int m, int n, int k, float alpha, const float *A, int lda,
const float *B, int ldb, float beta, float *C, int ldc, bool relu);
const float *B, int ldb, float beta, float *C, int ldc, bool relu,
float *bias);
// 32位 float 矩阵乘法, 并对结果进行 batchnrom
void SgemmWithBn(int m, int n, int k, float alpha, const float *A, int lda,
const float *B, int ldb, float beta, float *C, int ldc,
bool relu, float *new_scale, float *new_bias);
// 32位 float 矩阵乘法(openmp 多线程版本)
void Sgemm_omp(int m, int n, int k, float alpha, const float *A, int lda,
const float *B, int ldb, float beta, float *C, int ldc,
bool relu, float *bias);
// 32位 float 矩阵乘法, 并对结果进行 batchnrom(openmp 多线程版本)
void SgemmWithBn_omp(int m, int n, int k, float alpha, const float *A, int lda,
const float *B, int ldb, float beta, float *C, int ldc,
bool relu, float *new_scale, float *new_bias);
} // namespace math
} // namespace operators
} // namespace paddle_mobile
......@@ -22,7 +22,8 @@ namespace math {
template <>
void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
const framework::Tensor &matrix_b, bool trans_b, float alpha,
framework::Tensor *matrix_out, float beta, bool relu) {
framework::Tensor *matrix_out, float beta, bool relu,
float *bias) {
auto dim_a = matrix_a.dims();
auto dim_b = matrix_b.dims();
auto dim_out = matrix_out->dims();
......@@ -41,8 +42,13 @@ void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
int N = dim_out[1];
int K = (!trans_a) ? dim_a[1] : dim_a[0];
#ifdef _OPENMP
Sgemm_omp(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(),
N, beta, matrix_out->data<float>(), N, relu, bias);
#else
Sgemm(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
beta, matrix_out->data<float>(), N, relu);
beta, matrix_out->data<float>(), N, relu, bias);
#endif
}
template <>
......@@ -69,10 +75,17 @@ void matmulWithBn<float>(const framework::Tensor &matrix_a, bool trans_a,
int N = dim_out[1];
int K = (!trans_a) ? dim_a[1] : dim_a[0];
#ifdef _OPENMP
SgemmWithBn_omp(M, N, K, alpha, matrix_a.data<float>(), K,
matrix_b.data<float>(), N, beta, matrix_out->data<float>(), N,
relu, new_scale->data<float>() + group,
new_bias->data<float>() + group);
#else
SgemmWithBn(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(),
N, beta, matrix_out->data<float>(), N, relu,
new_scale->data<float>() + group,
new_bias->data<float>() + group);
#endif
}
} // namespace math
......
......@@ -21,11 +21,11 @@ namespace paddle_mobile {
namespace operators {
namespace math {
// matrix multiply with continuous memory
template <typename T>
void matmul(const framework::Tensor &matrix_a, bool trans_a,
const framework::Tensor &matrix_b, bool trans_b, T alpha,
framework::Tensor *matrix_out, T beta, bool relu = false);
framework::Tensor *matrix_out, T beta, bool relu = false,
float *bias = nullptr);
template <typename T>
void matmulWithBn(const framework::Tensor &matrix_a, bool trans_a,
......
......@@ -665,6 +665,16 @@ class FeedParam : public OpParam {
Tensor *input_x_;
Tensor *out_;
int batch_size;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::BypassArgs fpga_bypass_args;
public:
const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; }
void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; }
#endif
};
class FetchParam : public OpParam {
......
......@@ -49,9 +49,9 @@ int main() {
auto time1 = time();
for (int j = 0; j < 10; ++j) {
paddle_mobile::operators::math::matmul<float>(aa, false, bb, false,
static_cast<float>(1), &cc,
static_cast<float>(0), false);
paddle_mobile::operators::math::matmul<float>(
aa, false, bb, false, static_cast<float>(1), &cc, static_cast<float>(0),
false, biasptr);
// paddle_mobile::operators::math::matmulWithBn<float>(
// aa, false, bb, false, static_cast<float>(1), &cc,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册