提交 ca6eec78 编写于 作者: qnqinan's avatar qnqinan

fix bugs of concat, reshape and slice op and add usleep in fpga regpoll, test=develop

上级 b816754c
...@@ -110,6 +110,10 @@ void concat_images(int8_t **images_in, float **scales_in, void *image_out, ...@@ -110,6 +110,10 @@ void concat_images(int8_t **images_in, float **scales_in, void *image_out,
} }
} }
fpga_flush(image_out, height * align_each_out_area_cw * sizeof(int8_t)); fpga_flush(image_out, height * align_each_out_area_cw * sizeof(int8_t));
for (i = 0; i < image_num; i++) {
fpga_free(images_in_tmp[i]);
}
fpga_free(images_in_tmp);
} }
void split_image(int8_t *image_in, void **images_out, int image_num, void split_image(int8_t *image_in, void **images_out, int image_num,
......
...@@ -134,9 +134,9 @@ int fpga_regpoll(uint64_t reg, uint64_t val, int time) { ...@@ -134,9 +134,9 @@ int fpga_regpoll(uint64_t reg, uint64_t val, int time) {
uint64_t i = 0; uint64_t i = 0;
/*timeout精确性待确认*/ /*timeout精确性待确认*/
int64_t timeout = time * 6; int64_t timeout = time * 6;
usleep(1);
for (i = 0; i < timeout; i++) { for (i = 0; i < timeout; i++) {
usleep(1);
if (val == reg_readq(reg)) { if (val == reg_readq(reg)) {
break; break;
} }
......
...@@ -211,6 +211,7 @@ struct ConcatArgs { ...@@ -211,6 +211,7 @@ struct ConcatArgs {
uint32_t out_channel; uint32_t out_channel;
uint32_t height; uint32_t height;
uint32_t width; uint32_t width;
std::vector<std::shared_ptr<char>> vector_concat_space;
}; };
struct SplitConvArgs { struct SplitConvArgs {
......
...@@ -53,6 +53,15 @@ bool ConcatKernel<FPGA, float>::Init(ConcatParam<FPGA> *param) { ...@@ -53,6 +53,15 @@ bool ConcatKernel<FPGA, float>::Init(ConcatParam<FPGA> *param) {
concatArgs.channel_num = channel_num; concatArgs.channel_num = channel_num;
concatArgs.height = height; concatArgs.height = height;
concatArgs.width = width; concatArgs.width = width;
auto deleter = [](void *p) { fpga::fpga_free(p); };
concatArgs.vector_concat_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(concatArgs.images_in), deleter));
concatArgs.vector_concat_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(concatArgs.scales_in), deleter));
concatArgs.vector_concat_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(concatArgs.channel_num), deleter));
param->SetFpgaArgs(concatArgs); param->SetFpgaArgs(concatArgs);
return true; return true;
} }
......
...@@ -110,7 +110,27 @@ void Reshape2Kernel<FPGA, float>::Compute(const Reshape2Param<FPGA> &param) { ...@@ -110,7 +110,27 @@ void Reshape2Kernel<FPGA, float>::Compute(const Reshape2Param<FPGA> &param) {
} }
} }
output->Resize(framework::make_ddim(shape)); output->Resize(framework::make_ddim(shape));
bool reshapeNeedFlg = 1;
if (output->dims() == input->dims()) { if (output->dims() == input->dims()) {
reshapeNeedFlg = 0;
} else if (output->dims().size() != input->dims().size()) {
auto inputdimsize = input->dims().size();
auto outputdimsize = output->dims().size();
int smallersize =
inputdimsize > outputdimsize ? outputdimsize : inputdimsize;
int i = 0;
for (i = 0; i < smallersize; i++) {
if ((input->dims())[i] != (output->dims())[i])
break;
}
if (i == smallersize) {
reshapeNeedFlg = 0;
}
}
if (reshapeNeedFlg) {
reshape(input, output);
} else {
DLOG << "No need to reshape"; DLOG << "No need to reshape";
output->ShareDataWith(*input); output->ShareDataWith(*input);
framework::LoD lod = input->lod(); framework::LoD lod = input->lod();
...@@ -118,9 +138,6 @@ void Reshape2Kernel<FPGA, float>::Compute(const Reshape2Param<FPGA> &param) { ...@@ -118,9 +138,6 @@ void Reshape2Kernel<FPGA, float>::Compute(const Reshape2Param<FPGA> &param) {
output->scale[0] = input->scale[0]; output->scale[0] = input->scale[0];
return; return;
} }
reshape(input, output);
//
} }
} // namespace operators } // namespace operators
......
...@@ -30,6 +30,7 @@ bool SliceKernel<FPGA, float>::Init(SliceParam<FPGA>* param) { ...@@ -30,6 +30,7 @@ bool SliceKernel<FPGA, float>::Init(SliceParam<FPGA>* param) {
} }
return true; return true;
} }
template <> template <>
void SliceKernel<FPGA, float>::Compute(const SliceParam<FPGA>& param) { void SliceKernel<FPGA, float>::Compute(const SliceParam<FPGA>& param) {
// Only support slicing in channel dimension // Only support slicing in channel dimension
...@@ -38,6 +39,8 @@ void SliceKernel<FPGA, float>::Compute(const SliceParam<FPGA>& param) { ...@@ -38,6 +39,8 @@ void SliceKernel<FPGA, float>::Compute(const SliceParam<FPGA>& param) {
auto input = param.input_; auto input = param.input_;
auto output = param.output_; auto output = param.output_;
int H = input->dims()[2];
int W = input->dims()[3];
int HW = input->dims()[2] * input->dims()[3]; int HW = input->dims()[2] * input->dims()[3];
int channel = input->dims()[1]; int channel = input->dims()[1];
auto input_ptr = input->data<int8_t>(); auto input_ptr = input->data<int8_t>();
...@@ -53,10 +56,32 @@ void SliceKernel<FPGA, float>::Compute(const SliceParam<FPGA>& param) { ...@@ -53,10 +56,32 @@ void SliceKernel<FPGA, float>::Compute(const SliceParam<FPGA>& param) {
end = end > channel ? channel : end; end = end > channel ? channel : end;
int len = end - start; int len = end - start;
size_t size = len * sizeof(int8_t); size_t size = len * sizeof(int8_t);
DLOG << input->fpga_data_num;
fpga::fpga_invalidate(input_ptr, input->fpga_data_num*sizeof(int8_t));
DLOG << output->fpga_data_num;
fpga::fpga_invalidate(output_ptr, output->fpga_data_num*sizeof(int8_t));
int unalignedWC = len * W;
int alignedWC = fpga::align_to_x(W * len, IMAGE_ALIGNMENT);
for (int i = 0; i < HW; i++) { if (unalignedWC != alignedWC) {
memcpy(output_ptr + len * i, input_ptr + i * channel + start, size); auto tmpOutput = reinterpret_cast<int8_t*>
(fpga::fpga_malloc(len*HW * sizeof(int8_t)));
for (int i = 0; i < HW; i++) {
memcpy(tmpOutput + len * i, input_ptr + i * channel + start, size);
}
for (int i = 0; i < H; i++) {
for (int j = 0; j < unalignedWC; j++) {
*(output_ptr + alignedWC * i + j) =
*(tmpOutput + unalignedWC * i + j);
}
}
fpga::fpga_free(tmpOutput);
} else {
for (int i = 0; i < HW; i++) {
memcpy(output_ptr + len * i, input_ptr + i * channel + start, size);
}
} }
fpga::fpga_flush(output_ptr, output->fpga_data_num*sizeof(int8_t));
} }
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册