提交 ca6eec78 编写于 作者: qnqinan's avatar qnqinan

fix bugs of concat, reshape and slice op and add usleep in fpga regpoll, test=develop

上级 b816754c
......@@ -110,6 +110,10 @@ void concat_images(int8_t **images_in, float **scales_in, void *image_out,
}
}
fpga_flush(image_out, height * align_each_out_area_cw * sizeof(int8_t));
for (i = 0; i < image_num; i++) {
fpga_free(images_in_tmp[i]);
}
fpga_free(images_in_tmp);
}
void split_image(int8_t *image_in, void **images_out, int image_num,
......
......@@ -134,9 +134,9 @@ int fpga_regpoll(uint64_t reg, uint64_t val, int time) {
uint64_t i = 0;
/*timeout精确性待确认*/
int64_t timeout = time * 6;
usleep(1);
for (i = 0; i < timeout; i++) {
usleep(1);
if (val == reg_readq(reg)) {
break;
}
......
......@@ -211,6 +211,7 @@ struct ConcatArgs {
uint32_t out_channel;
uint32_t height;
uint32_t width;
std::vector<std::shared_ptr<char>> vector_concat_space;
};
struct SplitConvArgs {
......
......@@ -53,6 +53,15 @@ bool ConcatKernel<FPGA, float>::Init(ConcatParam<FPGA> *param) {
concatArgs.channel_num = channel_num;
concatArgs.height = height;
concatArgs.width = width;
auto deleter = [](void *p) { fpga::fpga_free(p); };
concatArgs.vector_concat_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(concatArgs.images_in), deleter));
concatArgs.vector_concat_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(concatArgs.scales_in), deleter));
concatArgs.vector_concat_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(concatArgs.channel_num), deleter));
param->SetFpgaArgs(concatArgs);
return true;
}
......
......@@ -110,7 +110,27 @@ void Reshape2Kernel<FPGA, float>::Compute(const Reshape2Param<FPGA> &param) {
}
}
output->Resize(framework::make_ddim(shape));
bool reshapeNeedFlg = 1;
if (output->dims() == input->dims()) {
reshapeNeedFlg = 0;
} else if (output->dims().size() != input->dims().size()) {
auto inputdimsize = input->dims().size();
auto outputdimsize = output->dims().size();
int smallersize =
inputdimsize > outputdimsize ? outputdimsize : inputdimsize;
int i = 0;
for (i = 0; i < smallersize; i++) {
if ((input->dims())[i] != (output->dims())[i])
break;
}
if (i == smallersize) {
reshapeNeedFlg = 0;
}
}
if (reshapeNeedFlg) {
reshape(input, output);
} else {
DLOG << "No need to reshape";
output->ShareDataWith(*input);
framework::LoD lod = input->lod();
......@@ -118,9 +138,6 @@ void Reshape2Kernel<FPGA, float>::Compute(const Reshape2Param<FPGA> &param) {
output->scale[0] = input->scale[0];
return;
}
reshape(input, output);
//
}
} // namespace operators
......
......@@ -30,6 +30,7 @@ bool SliceKernel<FPGA, float>::Init(SliceParam<FPGA>* param) {
}
return true;
}
template <>
void SliceKernel<FPGA, float>::Compute(const SliceParam<FPGA>& param) {
// Only support slicing in channel dimension
......@@ -38,6 +39,8 @@ void SliceKernel<FPGA, float>::Compute(const SliceParam<FPGA>& param) {
auto input = param.input_;
auto output = param.output_;
int H = input->dims()[2];
int W = input->dims()[3];
int HW = input->dims()[2] * input->dims()[3];
int channel = input->dims()[1];
auto input_ptr = input->data<int8_t>();
......@@ -53,10 +56,32 @@ void SliceKernel<FPGA, float>::Compute(const SliceParam<FPGA>& param) {
end = end > channel ? channel : end;
int len = end - start;
size_t size = len * sizeof(int8_t);
DLOG << input->fpga_data_num;
fpga::fpga_invalidate(input_ptr, input->fpga_data_num*sizeof(int8_t));
DLOG << output->fpga_data_num;
fpga::fpga_invalidate(output_ptr, output->fpga_data_num*sizeof(int8_t));
int unalignedWC = len * W;
int alignedWC = fpga::align_to_x(W * len, IMAGE_ALIGNMENT);
for (int i = 0; i < HW; i++) {
memcpy(output_ptr + len * i, input_ptr + i * channel + start, size);
if (unalignedWC != alignedWC) {
auto tmpOutput = reinterpret_cast<int8_t*>
(fpga::fpga_malloc(len*HW * sizeof(int8_t)));
for (int i = 0; i < HW; i++) {
memcpy(tmpOutput + len * i, input_ptr + i * channel + start, size);
}
for (int i = 0; i < H; i++) {
for (int j = 0; j < unalignedWC; j++) {
*(output_ptr + alignedWC * i + j) =
*(tmpOutput + unalignedWC * i + j);
}
}
fpga::fpga_free(tmpOutput);
} else {
for (int i = 0; i < HW; i++) {
memcpy(output_ptr + len * i, input_ptr + i * channel + start, size);
}
}
fpga::fpga_flush(output_ptr, output->fpga_data_num*sizeof(int8_t));
}
} // namespace operators
} // namespace paddle_mobile
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册