提交 0354167d 编写于 作者: C Chon 提交者: GitHub

Merge pull request #958 from zhangyang0701/develop

Implement memory flush & invalidate for FPGA track close #957
...@@ -67,6 +67,20 @@ void fpga_copy(void *dest, const void *src, size_t num) { ...@@ -67,6 +67,20 @@ void fpga_copy(void *dest, const void *src, size_t num) {
memcpy(dest, src, num); memcpy(dest, src, num);
} }
int fpga_flush(void *address, size_t size) {
struct MemoryCacheArgs args;
args.address = address;
args.size = size;
return do_ioctl(IOCTL_MEMCACHE_FLUSH, &args);
}
int fpga_invalidate(void *address, size_t size) {
struct MemoryCacheArgs args;
args.address = address;
args.size = size;
return do_ioctl(IOCTL_MEMCACHE_INVAL, &args);
}
int ComputeFpgaConv(const struct WrapperConvArgs &args) { int ComputeFpgaConv(const struct WrapperConvArgs &args) {
#ifdef FPGA_TEST_MODE #ifdef FPGA_TEST_MODE
/*DLOG << " relu_enabled:" << args.relu_enabled /*DLOG << " relu_enabled:" << args.relu_enabled
......
...@@ -139,6 +139,11 @@ struct FpgaRegReadArgs { ...@@ -139,6 +139,11 @@ struct FpgaRegReadArgs {
uint64_t value; uint64_t value;
}; };
struct MemoryCacheArgs {
void* address;
size_t size;
};
#define IOCTL_FPGA_MAGIC 'FPGA' #define IOCTL_FPGA_MAGIC 'FPGA'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs) #define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
...@@ -146,6 +151,8 @@ struct FpgaRegReadArgs { ...@@ -146,6 +151,8 @@ struct FpgaRegReadArgs {
#define IOCTL_SEPARATOR_0 10 #define IOCTL_SEPARATOR_0 10
#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs) #define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
#define IOCTL_MEMCACHE_INVAL _IOW(IOCTL_FPGA_MAGIC, 12, struct MemoryCacheArgs)
#define IOCTL_MEMCACHE_FLUSH _IOW(IOCTL_FPGA_MAGIC, 13, struct MemoryCacheArgs)
#define IOCTL_SEPARATOR_1 20 #define IOCTL_SEPARATOR_1 20
...@@ -188,6 +195,8 @@ int close_device(); ...@@ -188,6 +195,8 @@ int close_device();
void* fpga_malloc(size_t size); void* fpga_malloc(size_t size);
void fpga_free(void* ptr); void fpga_free(void* ptr);
void fpga_copy(void* dst, const void* src, size_t num); void fpga_copy(void* dst, const void* src, size_t num);
int fpga_flush(void* address, size_t size);
int fpga_invalidate(void* address, size_t size);
int PerformBypass(const struct BypassArgs& args); int PerformBypass(const struct BypassArgs& args);
int ComputeFpgaConv(const struct WrapperConvArgs& args); int ComputeFpgaConv(const struct WrapperConvArgs& args);
......
...@@ -79,6 +79,7 @@ void format_bias_scale_array(float **bias_scale_array, ...@@ -79,6 +79,7 @@ void format_bias_scale_array(float **bias_scale_array,
int element_num_after_division = int element_num_after_division =
align_to_x(element_num_per_division, BS_NUM_ALIGNMENT); align_to_x(element_num_per_division, BS_NUM_ALIGNMENT);
interleave(bias_scale_array, div_num * element_num_after_division); interleave(bias_scale_array, div_num * element_num_after_division);
fpga_flush(*bias_scale_array, 2 * element_num_after_division * sizeof(float));
} }
} // namespace bias_scale } // namespace bias_scale
......
...@@ -206,6 +206,8 @@ void format_filter(float **data_in, int num, int channel, int height, int width, ...@@ -206,6 +206,8 @@ void format_filter(float **data_in, int num, int channel, int height, int width,
align_num(quantize_data, num_per_div_before_alignment, num, chw); align_num(quantize_data, num_per_div_before_alignment, num, chw);
reorder(quantize_data, num_after_alignment, chw); reorder(quantize_data, num_after_alignment, chw);
interleave(quantize_data, num_after_alignment, chw); interleave(quantize_data, num_after_alignment, chw);
fpga_flush(*quantize_data, align_to_x(chw, FILTER_ELEMENT_ALIGNMENT) *
num_after_alignment * sizeof(char));
} }
} // namespace filter } // namespace filter
......
...@@ -38,7 +38,6 @@ void convert_to_hwc(float **data_in, int channel, int height, int width) { ...@@ -38,7 +38,6 @@ void convert_to_hwc(float **data_in, int channel, int height, int width) {
} }
void align_element_conv(float **data_in, int height, int cw) { void align_element_conv(float **data_in, int height, int cw) {
int i = 0;
int h = 0; int h = 0;
int align_cw = align_to_x(cw, IMAGE_ALIGNMENT); int align_cw = align_to_x(cw, IMAGE_ALIGNMENT);
if (align_cw != cw) { if (align_cw != cw) {
...@@ -60,6 +59,8 @@ void align_element_conv(float **data_in, int height, int cw) { ...@@ -60,6 +59,8 @@ void align_element_conv(float **data_in, int height, int cw) {
void format_image(float **data_in, int channel, int height, int width) { void format_image(float **data_in, int channel, int height, int width) {
convert_to_hwc(data_in, channel, height, width); convert_to_hwc(data_in, channel, height, width);
align_element_conv(data_in, height, channel * width); align_element_conv(data_in, height, channel * width);
fpga_flush(*data_in, align_to_x(channel * width, IMAGE_ALIGNMENT) * height *
sizeof(float));
} }
void concat_images(int16_t **images_in, float **scales_in, void *image_out, void concat_images(int16_t **images_in, float **scales_in, void *image_out,
...@@ -77,6 +78,10 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out, ...@@ -77,6 +78,10 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out,
for (i = 0; i < image_num; i++) { for (i = 0; i < image_num; i++) {
each_out_line_channel += channel_num[i]; each_out_line_channel += channel_num[i];
*scale_out = std::max(*scale_out, scales_in[i][0]); *scale_out = std::max(*scale_out, scales_in[i][0]);
fpga_invalidate(images_in[i],
height *
align_to_x(channel_num[i] * width, IMAGE_ALIGNMENT) *
sizeof(int16_t));
} }
align_each_out_area_cw = align_each_out_area_cw =
align_to_x(each_out_line_channel * width, IMAGE_ALIGNMENT); align_to_x(each_out_line_channel * width, IMAGE_ALIGNMENT);
...@@ -97,6 +102,8 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out, ...@@ -97,6 +102,8 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out,
} }
} }
} }
fpga_flush(image_out, height * align_each_out_area_cw * sizeof(int16_t));
} }
} // namespace image } // namespace image
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册