diff --git a/src/fpga/api.cpp b/src/fpga/api.cpp index d6994dc7443150ea2c8e4d05499a3b2fac3db579..47acd275fa644f7c6d51c34a547c814531fd88c5 100644 --- a/src/fpga/api.cpp +++ b/src/fpga/api.cpp @@ -67,6 +67,20 @@ void fpga_copy(void *dest, const void *src, size_t num) { memcpy(dest, src, num); } +int fpga_flush(void *address, size_t size) { + struct MemoryCacheArgs args; + args.address = address; + args.size = size; + return do_ioctl(IOCTL_MEMCACHE_FLUSH, &args); +} + +int fpga_invalidate(void *address, size_t size) { + struct MemoryCacheArgs args; + args.address = address; + args.size = size; + return do_ioctl(IOCTL_MEMCACHE_INVAL, &args); +} + int ComputeFpgaConv(const struct WrapperConvArgs &args) { #ifdef FPGA_TEST_MODE /*DLOG << " relu_enabled:" << args.relu_enabled diff --git a/src/fpga/api.h b/src/fpga/api.h index ea50c54b7f6da746aea24f13c66324aea1d7f5b3..9d17e05d6cbfeeb8abac1e06c731510fed2ee65d 100644 --- a/src/fpga/api.h +++ b/src/fpga/api.h @@ -139,6 +139,11 @@ struct FpgaRegReadArgs { uint64_t value; }; +struct MemoryCacheArgs { + void* address; + size_t size; +}; + #define IOCTL_FPGA_MAGIC 'FPGA' #define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs) @@ -146,6 +151,8 @@ struct FpgaRegReadArgs { #define IOCTL_SEPARATOR_0 10 #define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs) +#define IOCTL_MEMCACHE_INVAL _IOW(IOCTL_FPGA_MAGIC, 12, struct MemoryCacheArgs) +#define IOCTL_MEMCACHE_FLUSH _IOW(IOCTL_FPGA_MAGIC, 13, struct MemoryCacheArgs) #define IOCTL_SEPARATOR_1 20 @@ -188,6 +195,8 @@ int close_device(); void* fpga_malloc(size_t size); void fpga_free(void* ptr); void fpga_copy(void* dst, const void* src, size_t num); +int fpga_flush(void* address, size_t size); +int fpga_invalidate(void* address, size_t size); int PerformBypass(const struct BypassArgs& args); int ComputeFpgaConv(const struct WrapperConvArgs& args); diff --git a/src/fpga/bias_scale.cpp b/src/fpga/bias_scale.cpp index a1b0c8577b9100f69f823a39e9e136c46b7e09ff..3e5c3419a0c35b5c7c81b0ee1fd89a58838b5a26 100644 --- a/src/fpga/bias_scale.cpp +++ b/src/fpga/bias_scale.cpp @@ -79,6 +79,7 @@ void format_bias_scale_array(float **bias_scale_array, int element_num_after_division = align_to_x(element_num_per_division, BS_NUM_ALIGNMENT); interleave(bias_scale_array, div_num * element_num_after_division); + fpga_flush(*bias_scale_array, 2 * element_num_after_division * sizeof(float)); } } // namespace bias_scale diff --git a/src/fpga/filter.cpp b/src/fpga/filter.cpp index f3672182959ff21bff5c4cb264bc53ab6de53738..3b09ede10d10f605e69d06df2e148dd463e94d5b 100644 --- a/src/fpga/filter.cpp +++ b/src/fpga/filter.cpp @@ -206,6 +206,8 @@ void format_filter(float **data_in, int num, int channel, int height, int width, align_num(quantize_data, num_per_div_before_alignment, num, chw); reorder(quantize_data, num_after_alignment, chw); interleave(quantize_data, num_after_alignment, chw); + fpga_flush(*quantize_data, align_to_x(chw, FILTER_ELEMENT_ALIGNMENT) * + num_after_alignment * sizeof(char)); } } // namespace filter diff --git a/src/fpga/image.cpp b/src/fpga/image.cpp index 872abcd7c2dd6b16ab8ec8077e9afa6ec60c10d4..0603d164dfa88eb5620ebf588c610ea25a78be5f 100644 --- a/src/fpga/image.cpp +++ b/src/fpga/image.cpp @@ -38,7 +38,6 @@ void convert_to_hwc(float **data_in, int channel, int height, int width) { } void align_element_conv(float **data_in, int height, int cw) { - int i = 0; int h = 0; int align_cw = align_to_x(cw, IMAGE_ALIGNMENT); if (align_cw != cw) { @@ -60,6 +59,8 @@ void align_element_conv(float **data_in, int height, int cw) { void format_image(float **data_in, int channel, int height, int width) { convert_to_hwc(data_in, channel, height, width); align_element_conv(data_in, height, channel * width); + fpga_flush(*data_in, align_to_x(channel * width, IMAGE_ALIGNMENT) * height * + sizeof(float)); } void concat_images(int16_t **images_in, float **scales_in, void *image_out, @@ -77,6 +78,10 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out, for (i = 0; i < image_num; i++) { each_out_line_channel += channel_num[i]; *scale_out = std::max(*scale_out, scales_in[i][0]); + fpga_invalidate(images_in[i], + height * + align_to_x(channel_num[i] * width, IMAGE_ALIGNMENT) * + sizeof(int16_t)); } align_each_out_area_cw = align_to_x(each_out_line_channel * width, IMAGE_ALIGNMENT); @@ -97,6 +102,8 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out, } } } + + fpga_flush(image_out, height * align_each_out_area_cw * sizeof(int16_t)); } } // namespace image