提交 baa59299 编写于 作者: Z zhangyang

Merge remote-tracking branch 'upstream/develop' into develop

......@@ -25,21 +25,22 @@ namespace operators {
template <>
bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
const Tensor *input = param->InputX();
if (input->type() == typeid(half)) {
auto input_ptr = input->data<half>();
auto output_ptr = param->Out();
fpga::BypassArgs args;
args.input_layout_type = fpga::LAYOUT_HWC;
args.output_layout_type = fpga::LAYOUT_CHW;
args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP32;
args.image.address = (void *)(input_ptr);
args.image.height = (uint32_t)input->dims()[0];
args.image.width = (uint32_t)input->dims()[1];
args.image.channels = 1;
args.output.address = output_ptr;
param->SetFpgaArgs(args);
}
auto input_ptr = input->data<float>();
auto output_ptr = param->Out();
Tensor *floatInput = new Tensor(*input);
fpga::BypassArgs args;
args.input_layout_type = fpga::LAYOUT_HWC;
args.output_layout_type = fpga::LAYOUT_CHW;
args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP32;
args.image.address = (void *)(input_ptr);
args.image.height = (uint32_t)input->dims()[0];
args.image.width = (uint32_t)input->dims()[1];
args.image.channels = 1;
args.output.address = (void *)floatInput->mutable_data<float>();
param->SetFloatInput(floatInput);
param->SetFpgaArgs(args);
return true;
}
......@@ -48,8 +49,12 @@ void SoftmaxKernel<FPGA, float>::Compute(
const SoftmaxParam<FPGA> &param) const {
DLOG << "======================================= FPGA SoftMAX "
"===============================================";
const Tensor *in_x = param.InputX();
const Tensor *in_x = param.FloatInput();
Tensor *out = param.Out();
fpga::fpga_flush((void *)in_x->data<float>(), in_x->memory_size());
fpga::PerformBypass(param.FpgaArgs());
fpga::fpga_invalidate(out->data<float>(), out->memory_size());
auto x_dims = in_x->dims();
out->Resize(x_dims);
math::SoftmaxFuntor<CPU, float>()(in_x, out);
......
......@@ -36,13 +36,35 @@ void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
int N = dim_out[1];
int K = (!trans_a) ? dim_a[1] : dim_a[0];
if (trans_a) {
int numel = matrix_a.numel();
int m = matrix_a.dims()[0];
int n = matrix_a.dims()[1];
float *tmp = (float *)(matrix_a.data<float>());
float *a = static_cast<float *>(
paddle_mobile::memory::Alloc(sizeof(float) * numel));
int index = 0;
for (int j = 0; j < n; j++) {
for (int i = 0; i < m; i++) {
a[index++] = tmp[i * n + j];
}
}
#ifdef _OPENMP
Sgemm_omp(M, N, K, alpha, a, K, matrix_b.data<float>(), N, beta,
matrix_out->data<float>(), N, relu, bias);
#else
Sgemm(M, N, K, alpha, a, K, matrix_b.data<float>(), N, beta,
matrix_out->data<float>(), N, relu, bias);
#endif
} else {
#ifdef _OPENMP
Sgemm_omp(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(),
N, beta, matrix_out->data<float>(), N, relu, bias);
Sgemm_omp(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(),
N, beta, matrix_out->data<float>(), N, relu, bias);
#else
Sgemm(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
beta, matrix_out->data<float>(), N, relu, bias);
Sgemm(M, N, K, alpha, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
beta, matrix_out->data<float>(), N, relu, bias);
#endif
}
}
template <>
......
......@@ -785,7 +785,7 @@ class SoftmaxParam : public OpParam {
fpga::BypassArgs fpga_bypass_args;
public:
RType *FloatInput() {
RType *FloatInput() const {
return float_input_x_ == nullptr ? input_x_ : float_input_x_.get();
}
void SetFloatInput(Tensor *input) { float_input_x_.reset(input); }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册