提交 f1233bbf 编写于 作者: H hjchen2

update

上级 acdc21cf
...@@ -31,7 +31,8 @@ namespace paddle_mobile { ...@@ -31,7 +31,8 @@ namespace paddle_mobile {
#ifdef ANDROID #ifdef ANDROID
extern const char *ANDROID_LOG_TAG; static const char *ANDROID_LOG_TAG =
"paddle_mobile LOG built on " __DATE__ " " __TIME__;
#define ANDROIDLOGI(...) \ #define ANDROIDLOGI(...) \
__android_log_print(ANDROID_LOG_INFO, ANDROID_LOG_TAG, __VA_ARGS__); \ __android_log_print(ANDROID_LOG_INFO, ANDROID_LOG_TAG, __VA_ARGS__); \
......
...@@ -531,20 +531,6 @@ void Executor<Device, T>::FeedData(const std::vector<void *> &v) { ...@@ -531,20 +531,6 @@ void Executor<Device, T>::FeedData(const std::vector<void *> &v) {
} }
} }
template <typename Device, typename T>
void Executor<Device, T>::FeedTensorData(const vector<framework::Tensor> &v) {
auto input_size = v.size();
int index = 0;
auto vars = program_.scope->VarContain("feed", &index);
PADDLE_MOBILE_ENFORCE(input_size == vars.size(),
"input data number not correct");
for (int i = 0; i < input_size; i++) {
auto var = program_.scope->Var("feed", i + index);
auto feed_tensor = var->template GetMutable<LoDTensor>();
feed_tensor->ShareDataWith(v[i]);
}
}
template <typename Device, typename T> template <typename Device, typename T>
void Executor<Device, T>::GetResults(std::vector<void *> *v) { void Executor<Device, T>::GetResults(std::vector<void *> *v) {
auto output_size = v->size(); auto output_size = v->size();
......
...@@ -53,7 +53,6 @@ class Executor { ...@@ -53,7 +53,6 @@ class Executor {
void InjectVariable(const Tensor &t, std::string var_name); void InjectVariable(const Tensor &t, std::string var_name);
void FeedData(const Tensor &t); void FeedData(const Tensor &t);
void FeedData(const std::vector<void *> &v); void FeedData(const std::vector<void *> &v);
void FeedTensorData(const std::vector<framework::Tensor> &v);
void GetResults(std::vector<void *> *v); void GetResults(std::vector<void *> *v);
void GetTensorResults(std::vector<framework::Tensor *> *v); void GetTensorResults(std::vector<framework::Tensor *> *v);
......
...@@ -146,7 +146,7 @@ void PaddleMobilePredictor<Device, T>::FeedPaddleTensors( ...@@ -146,7 +146,7 @@ void PaddleMobilePredictor<Device, T>::FeedPaddleTensors(
tensors[i].init(typeid(float)); tensors[i].init(typeid(float));
ConvertPaddleTensors(inputs[i], &tensors[i]); ConvertPaddleTensors(inputs[i], &tensors[i]);
} }
paddle_mobile_->FeedTensorData(tensors); // paddle_mobile_->FeedTensorData(tensors);
} }
template <typename Device, typename T> template <typename Device, typename T>
......
...@@ -39,8 +39,6 @@ using framework::Tensor; ...@@ -39,8 +39,6 @@ using framework::Tensor;
using paddle_mobile::CPU; using paddle_mobile::CPU;
using std::string; using std::string;
const char *ANDROID_LOG_TAG =
"paddle_mobile LOG built on " __DATE__ " " __TIME__;
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile; paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
static std::mutex shared_mutex; static std::mutex shared_mutex;
......
...@@ -91,7 +91,6 @@ class PaddleMobile { ...@@ -91,7 +91,6 @@ class PaddleMobile {
void InjectVariable(const framework::Tensor &t, std::string var_name); void InjectVariable(const framework::Tensor &t, std::string var_name);
void FeedData(const framework::Tensor &t); void FeedData(const framework::Tensor &t);
void FeedData(const std::vector<void *> &v); void FeedData(const std::vector<void *> &v);
void FeedTensorData(const std::vector<framework::Tensor> &v);
void GetResults(std::vector<void *> *v); void GetResults(std::vector<void *> *v);
void GetTensorResults(std::vector<framework::Tensor *> *v); void GetTensorResults(std::vector<framework::Tensor *> *v);
......
...@@ -57,7 +57,7 @@ class FusionDeconvAddBNOp : public framework::OperatorWithKernel< ...@@ -57,7 +57,7 @@ class FusionDeconvAddBNOp : public framework::OperatorWithKernel<
FusionDeconvAddBNOp(const string &type, const VariableNameMap &inputs, FusionDeconvAddBNOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDeconvAddBNParam<DeviceType>, DeviceType, FusionDeconvAddBNParam<DeviceType>,
operators::DeconvAddBNKernel<DeviceType, T>>(type, inputs, outputs, operators::DeconvAddBNKernel<DeviceType, T>>(type, inputs, outputs,
......
...@@ -59,7 +59,7 @@ class FusionDeconvAddBNReluOp ...@@ -59,7 +59,7 @@ class FusionDeconvAddBNReluOp
FusionDeconvAddBNReluOp(const string &type, const VariableNameMap &inputs, FusionDeconvAddBNReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDeconvAddBNReluParam<DeviceType>, DeviceType, FusionDeconvAddBNReluParam<DeviceType>,
operators::DeconvAddBNReluKernel<DeviceType, T>>( operators::DeconvAddBNReluKernel<DeviceType, T>>(
......
...@@ -56,7 +56,7 @@ class FusionDeconvBNReluOp ...@@ -56,7 +56,7 @@ class FusionDeconvBNReluOp
FusionDeconvBNReluOp(const string &type, const VariableNameMap &inputs, FusionDeconvBNReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, FusionDeconvBNReluParam<DeviceType>, DeviceType, FusionDeconvBNReluParam<DeviceType>,
operators::DeconvBNReluKernel<DeviceType, T>>(type, inputs, outputs, operators::DeconvBNReluKernel<DeviceType, T>>(type, inputs, outputs,
......
...@@ -47,6 +47,7 @@ bool IsExpand(const std::vector<int64_t> &filter_dim, ...@@ -47,6 +47,7 @@ bool IsExpand(const std::vector<int64_t> &filter_dim,
return !(filter_1 && strides_1 && padding_0 && dilation_1); return !(filter_1 && strides_1 && padding_0 && dilation_1);
} }
#ifdef PADDLE_MOBILE_CPU
template <typename Itype, typename Otype> template <typename Itype, typename Otype>
void GemmConv(const ConvParam<CPU> &param) { void GemmConv(const ConvParam<CPU> &param) {
const Tensor *input = param.Input(); const Tensor *input = param.Input();
...@@ -241,6 +242,7 @@ template void GemmConv<int8_t, int32_t>(const ConvParam<CPU> &param); ...@@ -241,6 +242,7 @@ template void GemmConv<int8_t, int32_t>(const ConvParam<CPU> &param);
template void DepthwiseConv3x3<int8_t, int32_t>(const ConvParam<CPU> &param); template void DepthwiseConv3x3<int8_t, int32_t>(const ConvParam<CPU> &param);
template void DepthwiseConv5x5<int8_t, int32_t>(const ConvParam<CPU> &param); template void DepthwiseConv5x5<int8_t, int32_t>(const ConvParam<CPU> &param);
#endif #endif
#endif
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -24,8 +24,8 @@ bool ConvKernel<FPGA, float>::Init(ConvParam<FPGA> *param) { ...@@ -24,8 +24,8 @@ bool ConvKernel<FPGA, float>::Init(ConvParam<FPGA> *param) {
paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE; paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0; int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<LoDTensor *>(param->Input());
auto filter = const_cast<Tensor *>(param->Filter()); auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
int channel = out->dims()[1]; int channel = out->dims()[1];
auto bs_ptr = auto bs_ptr =
......
...@@ -27,10 +27,10 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) { ...@@ -27,10 +27,10 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE; paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0; int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<LoDTensor *>(param->Input());
// const Tensor *bias = param->Bias(); // const Tensor *bias = param->Bias();
// auto bias_ptr = bias->data<float>(); // auto bias_ptr = bias->data<float>();
auto filter = const_cast<Tensor *>(param->Filter()); auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
// PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], // PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
......
...@@ -27,10 +27,10 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) { ...@@ -27,10 +27,10 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE; paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0; int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<LoDTensor *>(param->Input());
const Tensor *bias = param->InputBias(); const Tensor *bias = param->InputBias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
auto filter = const_cast<Tensor *>(param->Filter()); auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
......
...@@ -28,10 +28,10 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init( ...@@ -28,10 +28,10 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU; paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0; int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<LoDTensor *>(param->Input());
const Tensor *bias = param->InputBias(); const Tensor *bias = param->InputBias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
auto filter = const_cast<Tensor *>(param->Filter()); auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
......
...@@ -29,10 +29,10 @@ bool DeconvBNReluKernel<FPGA, float>::Init( ...@@ -29,10 +29,10 @@ bool DeconvBNReluKernel<FPGA, float>::Init(
paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU; paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0; int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<LoDTensor *>(param->Input());
const Tensor *bias = param->InputBias(); const Tensor *bias = param->InputBias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
auto filter = const_cast<Tensor *>(param->Filter()); auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
auto bn_mean_ptr = param->InputMean()->data<float>(); auto bn_mean_ptr = param->InputMean()->data<float>();
auto bn_var_ptr = param->InputVariance()->data<float>(); auto bn_var_ptr = param->InputVariance()->data<float>();
......
...@@ -57,13 +57,9 @@ void dealign(float *src, float *dst, int input_c, int input_h, int input_w) { ...@@ -57,13 +57,9 @@ void dealign(float *src, float *dst, int input_c, int input_h, int input_w) {
} }
template <> template <>
void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) { void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) {
auto input = const_cast<Tensor *>(param.InputX()); auto input = const_cast<LoDTensor *>(param.InputX());
if (input->type() == typeid(float)) {
int col = param.Col(); int col = param.Col();
auto output = &(param.Out()->at(col)); LoDTensor *out = &param.Out()->at(col);
output->ShareDataWith(*input);
return;
}
fpga::BypassArgs args = param.fpga_bypass_args; fpga::BypassArgs args = param.fpga_bypass_args;
auto input_address = (input->data<half>()); auto input_address = (input->data<half>());
...@@ -71,7 +67,7 @@ void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) { ...@@ -71,7 +67,7 @@ void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) {
float *outdata_ptr = float *outdata_ptr =
reinterpret_cast<float *>(param.fpga_bypass_args.output.address); reinterpret_cast<float *>(param.fpga_bypass_args.output.address);
const int num_th = 32; const int num_th = 32;
if ((param.Out()->fpga_data_num) < num_th) { if ((out->fpga_data_num) < num_th) {
fpga::fpga_invalidate(input_address, (input->fpga_data_num) * sizeof(half)); fpga::fpga_invalidate(input_address, (input->fpga_data_num) * sizeof(half));
for (int idx = 0; idx < product(input->dims()); ++idx) { for (int idx = 0; idx < product(input->dims()); ++idx) {
...@@ -81,14 +77,14 @@ void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) { ...@@ -81,14 +77,14 @@ void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) {
} }
fpga::PerformBypass(args); fpga::PerformBypass(args);
auto outC = param.Out()->dims()[1]; auto outC = out->dims()[1];
auto outH = param.Out()->dims()[2]; auto outH = out->dims()[2];
auto outW = param.Out()->dims()[3]; auto outW = out->dims()[3];
fpga::fpga_invalidate(param.fpga_bypass_args.output.address, fpga::fpga_invalidate(param.fpga_bypass_args.output.address,
param.Out()->fpga_data_num * sizeof(float)); out->fpga_data_num * sizeof(float));
if (param.Out()->fpga_data_num != product(input->dims())) { if (out->fpga_data_num != product(input->dims())) {
float *data_tmp = float *data_tmp =
reinterpret_cast<float *>(malloc(outC * outH * outW * sizeof(float))); reinterpret_cast<float *>(malloc(outC * outH * outW * sizeof(float)));
dealign(outdata_ptr, data_tmp, outC, outH, outW); dealign(outdata_ptr, data_tmp, outC, outH, outW);
......
...@@ -25,7 +25,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) { ...@@ -25,7 +25,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
paddle_mobile::fpga::LEAKYRELU; paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0; int16_t leaky_relu_negative_slope = 0;
auto input_x = const_cast<LoDTensor *>(param->InputX()); auto input_x = const_cast<LoDTensor *>(param->InputX());
auto filter = const_cast<Tensor *>(param->InputY()); auto filter = const_cast<LoDTensor *>(param->InputY());
const Tensor *input_z = param->InputZ(); const Tensor *input_z = param->InputZ();
auto input_z_ptr = input_z->data<float>(); auto input_z_ptr = input_z->data<float>();
auto out = param->Out(); auto out = param->Out();
......
...@@ -16,8 +16,8 @@ limitations under the License. */ ...@@ -16,8 +16,8 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template <> template <>
bool Pad2dKernel<FPGA, float>::Init(Pad2dParam<FPGA> *param) { bool Pad2DKernel<FPGA, float>::Init(Pad2DParam<FPGA> *param) {
Tensor *output = param->Out(); Tensor *output = param->output_;
fpga::format_fp16_ofm(output); fpga::format_fp16_ofm(output);
return true; return true;
} }
...@@ -39,9 +39,9 @@ void pad2dFunc(const framework::Tensor *input, framework::Tensor *output) { ...@@ -39,9 +39,9 @@ void pad2dFunc(const framework::Tensor *input, framework::Tensor *output) {
} }
} }
template <> template <>
void Pad2dKernel<FPGA, float>::Compute(const Pad2dParam<FPGA> &param) { void Pad2DKernel<FPGA, float>::Compute(const Pad2DParam<FPGA> &param) {
auto in_x = param.InputX(); auto in_x = param.input_;
auto out = param.Out(); auto out = param.output_;
fpga::fpga_invalidate((void *)in_x->data<half>(), // NOLINT fpga::fpga_invalidate((void *)in_x->data<half>(), // NOLINT
in_x->numel() * sizeof(half)); in_x->numel() * sizeof(half));
pad2dFunc(in_x, out); pad2dFunc(in_x, out);
......
...@@ -68,7 +68,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) { ...@@ -68,7 +68,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
template <> template <>
void PoolKernel<FPGA, float>::Compute(const PoolParam<FPGA> &param) { void PoolKernel<FPGA, float>::Compute(const PoolParam<FPGA> &param) {
auto *input = const_cast<Tensor *>(param.Input()); auto *input = const_cast<LoDTensor *>(param.Input());
if (input->type() == typeid(float)) { if (input->type() == typeid(float)) {
auto *output = param.Output(); auto *output = param.Output();
......
...@@ -24,7 +24,7 @@ bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) { ...@@ -24,7 +24,7 @@ bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) {
paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::SIGMOID; paddle_mobile::fpga::SIGMOID;
int16_t leaky_relu_negative_slope = 0; int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->InputX()); auto input = const_cast<LoDTensor *>(param->InputX());
auto input_ptr = input->data<half>(); auto input_ptr = input->data<half>();
auto out = param->Out(); auto out = param->Out();
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
......
...@@ -33,7 +33,7 @@ void AddChannelWise(const framework::Tensor *input, ...@@ -33,7 +33,7 @@ void AddChannelWise(const framework::Tensor *input,
// maybe check shape // maybe check shape
int batch_size = input->dims()[0]; int batch_size = input->dims()[0];
int channels = input->dims()[1]; int channels = input->dims()[1];
size_t spatial_size = input->dims()[2] * input->dims()[3]; int spatial_size = input->dims()[2] * input->dims()[3];
for (int batch = 0; batch < batch_size; ++batch) { for (int batch = 0; batch < batch_size; ++batch) {
for (int channel = 0; channel < channels; ++channel) { for (int channel = 0; channel < channels; ++channel) {
...@@ -88,7 +88,7 @@ void ScaleAddChannelWise(const framework::Tensor *input, ...@@ -88,7 +88,7 @@ void ScaleAddChannelWise(const framework::Tensor *input,
// maybe check shape // maybe check shape
int batch_size = input->dims()[0]; int batch_size = input->dims()[0];
int channels = input->dims()[1]; int channels = input->dims()[1];
size_t spatial_size = input->dims()[2] * input->dims()[3]; int spatial_size = input->dims()[2] * input->dims()[3];
for (int batch = 0; batch < batch_size; ++batch) { for (int batch = 0; batch < batch_size; ++batch) {
for (int channel = 0; channel < channels; ++channel) { for (int channel = 0; channel < channels; ++channel) {
......
...@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
#pragma once #pragma once
#include "operators/math/gemm/cblas.h" #include "operators/math/gemm/cblas.h"
...@@ -47,3 +49,5 @@ void cblas_sgemv(const bool trans, const int M, const int N, const float alpha, ...@@ -47,3 +49,5 @@ void cblas_sgemv(const bool trans, const int M, const int N, const float alpha,
} // namespace math } // namespace math
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
#endif
...@@ -37,5 +37,8 @@ namespace ops = paddle_mobile::operators; ...@@ -37,5 +37,8 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(pad2d, ops::Pad2DOp); REGISTER_OPERATOR_CPU(pad2d, ops::Pad2DOp);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(pad2d, ops::Pad2DOp);
#endif
#endif // PAD2D_OP #endif // PAD2D_OP
...@@ -12,16 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,16 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef PADDLE_MOBILE_FPGA
#define PADDLE_MOBILE_FPGA
#endif
#include <fstream>
#include <iostream> #include <iostream>
#include "io/paddle_inference_api.h" #include "../test_helper.h"
#include "../test_include.h"
#ifdef PADDLE_MOBILE_FPGA_V1
#include "fpga/V1/api.h"
#endif
#ifdef PADDLE_MOBILE_FPGA_V2
#include "fpga/V2/api.h"
#endif
static const char *g_image = "../models/rfcn/data.bin"; #include <string>
static const char *g_model = "../models/rfcn/model";
static const char *g_param = "../models/rfcn/params";
void readStream(std::string filename, char *buf) { void readStream(std::string filename, char *buf) {
std::ifstream in; std::ifstream in;
...@@ -35,137 +37,116 @@ void readStream(std::string filename, char *buf) { ...@@ -35,137 +37,116 @@ void readStream(std::string filename, char *buf) {
auto length = in.tellg(); // report location (this is the length) auto length = in.tellg(); // report location (this is the length)
in.seekg(0, std::ios::beg); // go back to the beginning in.seekg(0, std::ios::beg); // go back to the beginning
in.read(buf, length); in.read(buf, length);
DLOG << length;
in.close(); in.close();
} }
PaddleMobileConfig GetConfig() { void convert_to_chw(int16_t **data_in, int channel, int height, int width,
PaddleMobileConfig config; int num, int16_t *data_tmp) {
config.precision = PaddleMobileConfig::FP32; int64_t amount_per_side = width * height;
config.device = PaddleMobileConfig::kFPGA; for (int n = 0; n < num; n++) {
config.prog_file = g_model; for (int h = 0; h < height; h++) {
config.param_file = g_param; for (int w = 0; w < width; w++) {
config.thread_num = 1; for (int c = 0; c < channel; c++) {
config.batch_size = 1; *(data_tmp + n * amount_per_side * channel + c * amount_per_side +
config.optimize = true; width * h + w) = *((*data_in)++);
config.lod_mode = true;
config.quantification = false;
return config;
}
PaddleMobileConfig GetConfig1() {
PaddleMobileConfig config;
config.precision = PaddleMobileConfig::FP32;
config.device = PaddleMobileConfig::kFPGA;
config.model_dir = "../models/resnet50";
config.thread_num = 1;
config.batch_size = 1;
config.optimize = true;
config.quantification = false;
return config;
}
int main() {
open_device();
PaddleMobileConfig config = GetConfig();
auto predictor =
CreatePaddlePredictor<PaddleMobileConfig,
PaddleEngineKind::kPaddleMobile>(config);
std::cout << "Finishing loading model" << std::endl;
float img_info[3] = {432, 1280, 1.0f};
int img_length = 432 * 1280 * 3;
auto img = reinterpret_cast<float *>(fpga_malloc(img_length * sizeof(float)));
readStream(g_image, reinterpret_cast<char *>(img));
std::cout << "Finishing initializing data" << std::endl;
struct PaddleTensor t_img_info, t_img;
t_img.dtypeid = typeid(float);
t_img_info.layout = LAYOUT_HWC;
t_img_info.shape = std::vector<int>({1, 3});
t_img_info.name = "Image information";
t_img_info.data.Reset(img_info, 3 * sizeof(float));
t_img.dtypeid = typeid(float);
t_img.layout = LAYOUT_HWC;
t_img.shape = std::vector<int>({1, 432, 1280, 3});
t_img.name = "Image information";
t_img.data.Reset(img, img_length * sizeof(float));
predictor->FeedPaddleTensors({t_img_info, t_img});
std::cout << "Finishing feeding data " << std::endl;
predictor->Predict_From_To(0, -1);
std::cout << "Finishing predicting " << std::endl;
std::vector<PaddleTensor> v; // No need to initialize v
predictor->FetchPaddleTensors(&v); // Old data in v will be cleared
std::cout << "Output number is " << v.size() << std::endl;
std::cout << "out[0] length " << v[0].data.length() << std::endl;
std::cout << "out[1] length " << v[1].data.length() << std::endl;
std::cout << "out[2] length " << v[2].data.length() << std::endl;
auto post_nms = v[0].data.length() / sizeof(float) / 8;
for (int num = 0; num < post_nms; num++) {
for (int i = 0; i < 8; i++) {
auto p = reinterpret_cast<float *>(v[0].data.data());
std::cout << p[num * 8 + i] << std::endl;
} }
} }
for (int num = 0; num < post_nms; num++) {
for (int i = 0; i < 8; i++) {
auto p = reinterpret_cast<float *>(v[1].data.data());
std::cout << p[num * 8 + i] << std::endl;
} }
} }
for (int num = 0; num < post_nms; num++) { }
for (int i = 0; i < 4; i++) {
auto p = reinterpret_cast<float *>(v[2].data.data()); void dump_stride_half(std::string filename, Tensor input_tensor,
std::cout << p[num * 4 + i] << std::endl; const int dumpnum, bool use_chw) {
// bool use_chw = true;
if (input_tensor.dims().size() != 4) return;
int c = (input_tensor.dims())[1];
int h = (input_tensor.dims())[2];
int w = (input_tensor.dims())[3];
int n = (input_tensor.dims())[0];
auto data_ptr = input_tensor.get_data();
auto *data_ptr_16 = reinterpret_cast<half *>(data_ptr);
auto data_tmp = data_ptr_16;
if (use_chw) {
data_tmp =
reinterpret_cast<half *>(malloc(n * c * h * w * sizeof(int16_t)));
convert_to_chw(&data_ptr_16, c, h, w, n, data_tmp);
} }
std::ofstream out(filename.c_str());
float result = 0;
int stride = input_tensor.numel() / dumpnum;
stride = stride > 0 ? stride : 1;
for (int i = 0; i < input_tensor.numel(); i += stride) {
result = paddle_mobile::fpga::fp16_2_fp32(data_tmp[i]);
out << result << std::endl;
} }
std::cout << "Finish getting vector values" << std::endl; out.close();
if (data_tmp != data_ptr_16) {
//////////////////////////////////////////////////// free(data_tmp);
PaddleTensor tensor;
predictor->GetPaddleTensor("fetch2", &tensor);
for (int i = 0; i < post_nms; i++) {
auto p = reinterpret_cast<float *>(tensor.data.data());
std::cout << p[+i] << std::endl;
} }
}
////////////////////////////////////////////////////// void dump_stride_float(std::string filename, Tensor input_tensor,
const int dumpnum) {
PaddleMobileConfig config1 = GetConfig1(); auto data_ptr = reinterpret_cast<float *>(input_tensor.get_data());
auto predictor1 = std::ofstream out(filename.c_str());
CreatePaddlePredictor<PaddleMobileConfig, float result = 0;
PaddleEngineKind::kPaddleMobile>(config1); int stride = input_tensor.numel() / dumpnum;
stride = stride > 0 ? stride : 1;
std::cout << "Finishing loading model" << std::endl; for (int i = 0; i < input_tensor.numel(); i += stride) {
result = data_ptr[i];
int img_length1 = 224 * 224 * 3; out << result << std::endl;
auto img1 = }
reinterpret_cast<float *>(fpga_malloc(img_length1 * sizeof(float))); out.close();
}
std::cout << "Finishing initializing data" << std::endl; void dump_stride(std::string filename, Tensor input_tensor, const int dumpnum,
bool use_chw) {
static int i = 0;
if (input_tensor.numel() == 0) {
return;
}
if (input_tensor.type() == typeid(float)) {
DLOG << "op: " << i++ << ", float data " << input_tensor.numel();
struct PaddleTensor t_img1; dump_stride_float(filename, input_tensor, dumpnum);
} else {
DLOG << "op: " << i++ << ", half data " << input_tensor.numel();
t_img1.dtypeid = typeid(float); dump_stride_half(filename, input_tensor, dumpnum, use_chw);
t_img1.layout = LAYOUT_HWC; }
t_img1.shape = std::vector<int>({1, 224, 224, 3}); DLOG << "dump input address: " << input_tensor.get_data();
t_img1.name = "Image information"; }
t_img1.data.Reset(img1, img_length1 * sizeof(float));
predictor1->FeedPaddleTensors({t_img1});
predictor1->Predict_From_To(0, -1);
std::cout << "Finishing predicting " << std::endl;
std::vector<PaddleTensor> v1; // No need to initialize v static const char *g_rfcn_combine = "../models/rfcn";
predictor1->FetchPaddleTensors(&v1); // Old data in v will be cleared static const char *g_image_src_float = "../models/rfcn/data.bin";
std::cout << "Output number is " << v1.size() << std::endl; int main() {
std::cout << "out[0] length " << v1[0].data.length() << std::endl; paddle_mobile::fpga::open_device();
paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
if (paddle_mobile.Load(std::string(g_rfcn_combine) + "/model",
std::string(g_rfcn_combine) + "/params", true, false,
1, true)) {
float img_info[3] = {768, 1536, 768.0f / 960.0f};
auto img = reinterpret_cast<float *>(
fpga::fpga_malloc(768 * 1536 * 3 * sizeof(float)));
readStream(g_image_src_float, reinterpret_cast<char *>(img));
std::vector<void *> v(3, nullptr);
paddle_mobile.FeedData(std::vector<void *>({img_info, img}));
paddle_mobile.Predict_To(-1);
for (int i = 65; i < 69; i++) {
auto tensor_ptr = paddle_mobile.FetchResult(i);
std::string saveName = "rfcn_" + std::to_string(i);
paddle_mobile::fpga::fpga_invalidate((*tensor_ptr).get_data(),
tensor_ptr->numel() * sizeof(float));
dump_stride(saveName, (*tensor_ptr), tensor_ptr->numel(), true);
}
// paddle_mobile.GetResults(&v);
DLOG << "Computation done";
fpga::fpga_free(img);
}
return 0; return 0;
} }
...@@ -36,7 +36,10 @@ int main(int argc, char* argv[]) { ...@@ -36,7 +36,10 @@ int main(int argc, char* argv[]) {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile; paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile.SetThreadNum(thread_num); paddle_mobile.SetThreadNum(thread_num);
auto time1 = time(); auto time1 = time();
if (paddle_mobile.Load(fluid_model, optimize)) { // if (paddle_mobile.Load(fluid_model, optimize, false, 1, true)) {
if (paddle_mobile.Load(std::string(fluid_model) + "/model",
std::string(fluid_model) + "/params", optimize, false,
1, true)) {
auto time2 = time(); auto time2 = time();
std::cout << "load cost :" << time_diff(time1, time2) << "ms\n"; std::cout << "load cost :" << time_diff(time1, time2) << "ms\n";
paddle_mobile::framework::Tensor input; paddle_mobile::framework::Tensor input;
...@@ -51,14 +54,15 @@ int main(int argc, char* argv[]) { ...@@ -51,14 +54,15 @@ int main(int argc, char* argv[]) {
paddle_mobile::framework::DDim in_shape = paddle_mobile::framework::DDim in_shape =
paddle_mobile::framework::make_ddim(dims); paddle_mobile::framework::make_ddim(dims);
SetupTensor<float>(&input, in_shape, 0.f, 255.f); SetupTensor<float>(&input, in_shape, 0.f, 255.f);
// warmup // // warmup
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 2; ++i) {
paddle_mobile.Predict(input); paddle_mobile.Predict(input);
} }
auto time3 = time(); auto time3 = time();
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
paddle_mobile.Predict(input); paddle_mobile.Predict(input);
} }
auto time4 = time(); auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms\n"; std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms\n";
std::ostringstream os("output tensor size: "); std::ostringstream os("output tensor size: ");
...@@ -68,7 +72,7 @@ int main(int argc, char* argv[]) { ...@@ -68,7 +72,7 @@ int main(int argc, char* argv[]) {
os << ", " << output->data<float>()[i]; os << ", " << output->data<float>()[i];
} }
std::string output_str = os.str(); std::string output_str = os.str();
std::cout << output_str << std::endl; // std::cout << output_str << std::endl;
} }
return 0; return 0;
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册