diff --git a/src/common/types.cpp b/src/common/types.cpp index 510313d9fee0940d7162ea2c6b09426f6d9ce17a..8f284b3fe1115bd8cec78430a405289aae98e898 100644 --- a/src/common/types.cpp +++ b/src/common/types.cpp @@ -73,6 +73,8 @@ const char *G_OP_TYPE_QUANTIZE = "quantize"; const char *G_OP_TYPE_DEQUANTIZE = "dequantize"; extern const char *G_OP_TYPE_TANH = "tanh"; extern const char *G_OP_TYPE_FUSION_DECONV_RELU = "fusion_deconv_relu"; +extern const char *G_OP_TYPE_FUSION_DECONV_ADD = "fusion_deconv_add"; +extern const char *G_OP_TYPE_FUSION_DECONV_ADD_RELU = "fusion_deconv_add_relu"; std::unordered_map< std::string, std::pair, std::vector>> @@ -133,5 +135,7 @@ std::unordered_map< {G_OP_TYPE_QUANTIZE, {{"X"}, {"Out", "OutScale"}}}, {G_OP_TYPE_DEQUANTIZE, {{"X", "Scale"}, {"Out"}}}, {G_OP_TYPE_TANH, {{"X"}, {"Out"}}}, - {G_OP_TYPE_FUSION_DECONV_RELU, {{"Input"}, {"Out"}}}}; + {G_OP_TYPE_FUSION_DECONV_RELU, {{"Input"}, {"Out"}}}, + {G_OP_TYPE_FUSION_DECONV_ADD, {{"Input"}, {"Out"}}}, + {G_OP_TYPE_FUSION_DECONV_ADD_RELU, {{"Input"}, {"Out"}}}}; } // namespace paddle_mobile diff --git a/src/common/types.h b/src/common/types.h index 4cd35ac91084f6518858c97cf4c0e8da5b09555b..e9c0f81232dab7583c57fb036b58601aa26ec3c9 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -142,6 +142,9 @@ extern const char *G_OP_TYPE_DEQUANTIZE; extern const char *G_OP_TYPE_TANH; extern const char *G_OP_TYPE_FUSION_DECONV_RELU; +extern const char *G_OP_TYPE_FUSION_DECONV_ADD; +extern const char *G_OP_TYPE_FUSION_DECONV_ADD_RELU; + extern std::unordered_map< std::string, std::pair, std::vector>> op_input_output_key; diff --git a/src/fpga/V2/api.cpp b/src/fpga/V2/api.cpp index 41adc979264b5c881cdad6a3d4a00ee245ae85d5..2f8a9f119e643b3836ef2c541e098f39ab3cbd17 100644 --- a/src/fpga/V2/api.cpp +++ b/src/fpga/V2/api.cpp @@ -16,47 +16,29 @@ limitations under the License. */ #include #include "fpga/V2/bias_scale.h" #include "fpga/V2/config.h" -<<<<<<< HEAD -======= #include "fpga/V2/driver/driver.h" ->>>>>>> upstream/develop #include "fpga/V2/filter.h" #include "fpga/V2/image.h" namespace paddle_mobile { namespace fpga { -<<<<<<< HEAD -static std::map memory_map; - -int open_device() { - int ret = open_device_driver(); -======= static std::map memory_map; int open_device() { int ret = driver::open_device_driver(); ->>>>>>> upstream/develop return ret; } int close_device() { -<<<<<<< HEAD - int ret = close_device_driver(); -======= int ret = driver::close_device_driver(); ->>>>>>> upstream/develop return ret; } void *fpga_malloc(size_t size) { static uint64_t counter = 0; #ifdef PADDLE_MOBILE_ZU5 -<<<<<<< HEAD - auto ptr = fpga_malloc_driver(size); -======= auto ptr = driver::fpga_malloc_driver(size); ->>>>>>> upstream/develop #else auto ptr = malloc(size); #endif @@ -75,11 +57,7 @@ void fpga_free(void *ptr) { size = iter->second; memory_map.erase(iter); #ifdef PADDLE_MOBILE_ZU5 -<<<<<<< HEAD - fpga_free_driver(ptr); -======= driver::fpga_free_driver(ptr); ->>>>>>> upstream/develop #else free(ptr); #endif @@ -90,28 +68,6 @@ void fpga_free(void *ptr) { DLOG << "Invalid pointer"; } } -<<<<<<< HEAD - -half fp32_2_fp16(float fp32_num) { - unsigned long tmp = *(unsigned long *)(&fp32_num); // NOLINT - auto t = (half)(((tmp & 0x007fffff) >> 13) | ((tmp & 0x80000000) >> 16) | - (((tmp & 0x7f800000) >> 13) - (112 << 10))); - if (tmp & 0x1000) { - t++; // roundoff - } - return t; -} - -float fp16_2_fp32(half fp16_num) { - int frac = (fp16_num & 0x3ff); - int exp = ((fp16_num & 0x7c00) >> 10) + 112; - int s = fp16_num & 0x8000; - int tmp = 0; - float fp32_num; - tmp = s << 16 | exp << 23 | frac << 13; - fp32_num = *(float *)&tmp; // NOLINT - return fp32_num; -======= void fpga_copy(void *dest, const void *src, size_t num) { #ifdef PADDLE_MOBILE_ZU5 driver::fpga_copy_driver(dest, src, num); @@ -133,7 +89,6 @@ int fpga_invalidate(void *address, size_t size) { #else return 0; #endif ->>>>>>> upstream/develop } void format_image(framework::Tensor *image_tensor) { @@ -288,11 +243,7 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, arg->filter_num = (uint32_t)filter->dims()[0]; arg->output.address = out_ptr; arg->output.scale_address = out->scale; -<<<<<<< HEAD - arg->conv_args = -======= arg->conv_arg = ->>>>>>> upstream/develop (ConvArgs *)fpga_malloc(arg->split_num * sizeof(ConvArgs)); // NOLINT arg->concat_arg.image_num = arg->split_num; @@ -310,30 +261,6 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, (uint32_t *)fpga_malloc(n * sizeof(uint32_t)); // NOLINT for (int i = 0; i < n; i++) { -<<<<<<< HEAD - arg->conv_args[i].relu_enabled = relu_enabled; - arg->conv_args[i].sb_address = bs_ptr; - arg->conv_args[i].filter_address = (int8_t *)filter_ptr; // NOLINT - arg->conv_args[i].filter_scale_address = filter->scale; - arg->conv_args[i].filter_num = arg->filter_num; - arg->conv_args[i].group_num = (uint32_t)group_num; - - arg->conv_args[i].kernel.stride_h = (uint32_t)stride_h; - arg->conv_args[i].kernel.stride_w = (uint32_t)stride_w; - arg->conv_args[i].kernel.height = (uint32_t)filter->dims()[2]; - arg->conv_args[i].kernel.width = (uint32_t)filter->dims()[3]; - - arg->conv_args[i].image.address = input_ptr; - arg->conv_args[i].image.scale_address = input->scale; - arg->conv_args[i].image.channels = (uint32_t)input->dims()[1]; - arg->conv_args[i].image.height = (uint32_t)input->dims()[2]; - arg->conv_args[i].image.width = (uint32_t)input->dims()[3]; - arg->conv_args[i].image.pad_height = (uint32_t)padding_h; - arg->conv_args[i].image.pad_width = (uint32_t)padding_w; - - arg->conv_args[i].output.address = out_ptr; - arg->conv_args[i].output.scale_address = out->scale; -======= arg->conv_arg[i].relu_enabled = relu_enabled; arg->conv_arg[i].sb_address = bs_ptr; arg->conv_arg[i].filter_address = (int8_t *)filter_ptr; // NOLINT @@ -361,7 +288,6 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, filter::calc_aligned_num((int)input->dims()[1], arg->filter_num); arg->conv_arg[i].free_space = fpga_malloc(num_after_alignment * 2 * sizeof(half)); ->>>>>>> upstream/develop } } diff --git a/src/fpga/V2/api.h b/src/fpga/V2/api.h index a59a5ccbe3f269b1cdfd33e4a1abb8f50291ea20..1f4a203936b517d93e2d417b08a8b8456cc1fc93 100644 --- a/src/fpga/V2/api.h +++ b/src/fpga/V2/api.h @@ -14,132 +14,20 @@ limitations under the License. */ #pragma once -<<<<<<< HEAD -#include -#include -#include -#include -#include "fpga/V2/driver/driver.h" -#include "fpga/V2/driver/pe.h" -======= #include "fpga/V2/driver/pe.h" #include "fpga/V2/fpga_common.h" ->>>>>>> upstream/develop #include "framework/tensor.h" namespace paddle_mobile { namespace fpga { -<<<<<<< HEAD -enum DataType { - DATA_TYPE_FP32 = 1, - DATA_TYPE_FP16 = 0, -}; - -enum LayoutType { - LAYOUT_CHW = 1, - LAYOUT_HWC = 0, -}; - -struct KernelArgs { - uint32_t width; - uint32_t height; - uint32_t stride_w; - uint32_t stride_h; -}; - -struct ImageInputArgs { - void* address; // input featuremap virtual address - float* scale_address; // input scale address; - uint32_t channels; - uint32_t width; // featuremap width - uint32_t height; - uint32_t pad_width; // padding width; - uint32_t pad_height; -}; - -struct ImageOutputArgs { - void* address; // output result address; - float* scale_address; // output scale address; - uint64_t timer_cnt; // time counter for FPGA computation -}; - -struct ConvArgs { - bool relu_enabled; - void* sb_address; // scale and bias are interlaced; - void* filter_address; - float* filter_scale_address; - uint32_t filter_num; - uint32_t group_num; - - struct KernelArgs kernel; - struct ImageInputArgs image; // input image; - struct ImageOutputArgs output; -}; - -struct ConcatArgs { - uint32_t image_num; - half** images_in; - float** scales_in; - void* image_out; - float* scale_out; - uint32_t* channel_num; - uint32_t* aligned_channel_num; - uint32_t out_channel; - uint32_t height; - uint32_t width; -}; - -struct SplitConvArgs { - uint32_t split_num; - uint32_t group_num; - uint32_t filter_num; - struct ImageOutputArgs output; - struct ConvArgs* conv_args; - struct ConcatArgs concat_arg; -}; - -struct PoolingArgs { - int16_t mode; // mode: 0:max, 1:avg - half kernel_reciprocal; - struct KernelArgs kernel; - struct ImageInputArgs image; // input image; - struct ImageOutputArgs output; -}; - -struct EWAddArgs { - bool relu_enabled; - - uint32_t const0; // output0 = const0 x input0 + const1 x input1; - uint32_t const1; - struct ImageInputArgs image0; - struct ImageInputArgs image1; - struct ImageOutputArgs output; -}; - -struct BypassArgs { - enum DataType input_data_type; - enum DataType output_data_type; - enum LayoutType input_layout_type; - enum LayoutType output_layout_type; - struct ImageInputArgs image; - struct ImageOutputArgs output; -}; - -======= ->>>>>>> upstream/develop int open_device(); int close_device(); void* fpga_malloc(size_t size); void fpga_free(void* ptr); -<<<<<<< HEAD - -static inline int align_to_x(int num, int x) { return (num + x - 1) / x * x; } -======= void fpga_copy(void* dest, const void* src, size_t num); int fpga_flush(void* address, size_t size); int fpga_invalidate(void* address, size_t size); ->>>>>>> upstream/develop float filter_find_max(framework::Tensor* filter_tensor); int get_aligned_channel_num(int channel_num); @@ -167,11 +55,5 @@ void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input, bool relu_enabled, int group_num, int stride_h, int stride_w, int padding_h, int padding_w, float* bs_ptr); -<<<<<<< HEAD -half fp32_2_fp16(float fp32_num); -float fp16_2_fp32(half fp16_num); - -======= ->>>>>>> upstream/develop } // namespace fpga } // namespace paddle_mobile diff --git a/src/fpga/V2/bias_scale.cpp b/src/fpga/V2/bias_scale.cpp index bbf443321d8ed73fb50b10896bed6bfc3926ca5d..3afd3f51bbb10e3bb2d66195fcc54d25c56e2393 100644 --- a/src/fpga/V2/bias_scale.cpp +++ b/src/fpga/V2/bias_scale.cpp @@ -27,11 +27,7 @@ void align_element(float **data_in, int num, int num_after_alignment) { (float *)fpga_malloc(total_element * sizeof(float)); // NOLINT memset(ptr_aligned, 0, total_element * sizeof(float)); -<<<<<<< HEAD - for (int i = 1; i < num; i++) { -======= for (int i = 0; i < num; i++) { ->>>>>>> upstream/develop ptr_aligned[i * 2 + 0] = ptr_unaligned[i]; ptr_aligned[i * 2 + 1] = ptr_unaligned[i + num]; } @@ -43,10 +39,7 @@ void align_element(float **data_in, int num, int num_after_alignment) { void format_bias_scale_array(float **data_in, int num, int num_after_alignment) { align_element(data_in, num, num_after_alignment); -<<<<<<< HEAD -======= fpga_flush(*data_in, 2 * num_after_alignment * sizeof(float)); ->>>>>>> upstream/develop } } // namespace bias_scale diff --git a/src/fpga/V2/driver/bitmap.cpp b/src/fpga/V2/driver/bitmap.cpp index b3794ef36464bf5ccb09e7b3ec15c3447f5288b0..c612faa6aed11b683ff81fffdf6c57a6fed9536d 100644 --- a/src/fpga/V2/driver/bitmap.cpp +++ b/src/fpga/V2/driver/bitmap.cpp @@ -57,13 +57,8 @@ static uint64_t ffs(uint64_t data) { uint64_t bit = 0; int i = 0; -<<<<<<< HEAD - for (i = 0; i < sizeof(data); i++) { - if (data & (1 << i)) { -======= for (i = 0; i < sizeof(data) * 8; i++) { if (data & (1UL << i)) { ->>>>>>> upstream/develop bit = i; break; } diff --git a/src/fpga/V2/driver/bitmap.h b/src/fpga/V2/driver/bitmap.h index ae2d11b34b53064e0491c4b1e2ac0a6a12b00559..4cb1673d91d61c1ec27bbc6923e49e8dd04e3a37 100644 --- a/src/fpga/V2/driver/bitmap.h +++ b/src/fpga/V2/driver/bitmap.h @@ -25,11 +25,7 @@ limitations under the License. */ #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) -<<<<<<< HEAD -#define round_down(x, y) ((x) & ((y)-1)) -======= #define round_down(x, y) ((x) & ~((y)-1)) ->>>>>>> upstream/develop namespace fpga_bitmap { void bitmap_set(uint64_t *map, unsigned int start, int len); diff --git a/src/fpga/V2/driver/driver.cpp b/src/fpga/V2/driver/driver.cpp index 7bebabf29c984e55683f125538ef8c23cdac048a..d7e71782676fd350f938847c03e9736ff0adb64a 100644 --- a/src/fpga/V2/driver/driver.cpp +++ b/src/fpga/V2/driver/driver.cpp @@ -17,10 +17,7 @@ limitations under the License. */ #include #include #include -<<<<<<< HEAD -======= #include ->>>>>>> upstream/develop #include #include #include @@ -36,10 +33,7 @@ limitations under the License. */ namespace paddle_mobile { namespace fpga { -<<<<<<< HEAD -======= namespace driver { ->>>>>>> upstream/develop struct FPGA_INFO g_fpgainfo; int open_drvdevice() { @@ -51,12 +45,8 @@ int open_drvdevice() { int open_memdevice() { if (g_fpgainfo.fd_mem == -1) { -<<<<<<< HEAD - g_fpgainfo.fd_mem = open(g_fpgainfo.memdevice_path, O_RDWR | O_DSYNC); -======= // g_fpgainfo.fd_mem = open(g_fpgainfo.memdevice_path, O_RDWR | O_DSYNC); g_fpgainfo.fd_mem = open(g_fpgainfo.memdevice_path, O_RDWR); ->>>>>>> upstream/develop } return g_fpgainfo.fd_mem; } @@ -64,10 +54,6 @@ int open_memdevice() { void pl_reset() { // DLOG << "PL RESET"; -<<<<<<< HEAD - // reg_writeq(0x5a, REG_FPGA_RESET); -======= ->>>>>>> upstream/develop usleep(100 * 1000); } @@ -147,11 +133,7 @@ int pl_get_status() { return 0; } int fpga_regpoll(uint64_t reg, uint64_t val, int time) { uint64_t i = 0; /*timeout精确性待确认*/ -<<<<<<< HEAD - int64_t timeout = time * CPU_FREQ / 1000000; -======= int64_t timeout = time * 6; ->>>>>>> upstream/develop for (i = 0; i < timeout; i++) { if (val == reg_readq(reg)) { @@ -193,11 +175,6 @@ int memory_request(struct fpga_memory *memory, size_t size, uint64_t *addr) { } void memory_release(struct fpga_memory *memory) { -<<<<<<< HEAD - pthread_mutex_lock(&memory->mutex); - fpga_bitmap::bitmap_clear(memory->bitmap, 0, memory->page_num); - pthread_mutex_unlock(&memory->mutex); -======= void *ptr = nullptr; /*unmap memory*/ @@ -206,7 +183,6 @@ void memory_release(struct fpga_memory *memory) { for (iter = map.begin(); iter != map.end(); iter++) { fpga_free_driver(ptr); } ->>>>>>> upstream/develop } int create_fpga_memory_inner(struct fpga_memory *memory, size_t memory_size) { @@ -269,10 +245,6 @@ int init_fpga_memory(struct fpga_memory *memory) { return rc; } -<<<<<<< HEAD - // spin_lock_init(&memory->spin); -======= ->>>>>>> upstream/develop fpga_bitmap::bitmap_clear(memory->bitmap, 0, memory->page_num); fpga_bitmap::bitmap_set(memory->bitmap, 0, 1); // NOTE reserve fpga page 0. @@ -327,11 +299,6 @@ void *fpga_reg_malloc(size_t size) { return ret; } -<<<<<<< HEAD -void *fpga_malloc_driver(size_t size) { - void *ret = nullptr; - uint64_t phy_addr = 0; -======= void *fpga_reg_free(void *ptr) { size_t size = 0; @@ -349,7 +316,6 @@ void *fpga_malloc_driver(size_t size) { void *ret = nullptr; uint64_t phy_addr = 0; int i = 0; ->>>>>>> upstream/develop memory_request(g_fpgainfo.memory_info, size, &phy_addr); @@ -365,19 +331,14 @@ void *fpga_malloc_driver(size_t size) { void fpga_free_driver(void *ptr) { size_t size = 0; -<<<<<<< HEAD -======= uint32_t pos = 0; uint64_t p_addr = 0; ->>>>>>> upstream/develop auto iter = g_fpgainfo.fpga_addr2size_map.find(ptr); if (iter != g_fpgainfo.fpga_addr2size_map.end()) { size = iter->second; g_fpgainfo.fpga_addr2size_map.erase(iter); munmap(ptr, size); -<<<<<<< HEAD -======= p_addr = vaddr_to_paddr(ptr); pos = (p_addr - g_fpgainfo.memory_info->mem_start) / FPGA_PAGE_SIZE; @@ -387,14 +348,11 @@ void fpga_free_driver(void *ptr) { fpga_bitmap::bitmap_clear(g_fpgainfo.memory_info->bitmap, pos, g_fpgainfo.memory_info->nr[pos]); pthread_mutex_unlock(&g_fpgainfo.memory_info->mutex); ->>>>>>> upstream/develop } else { DLOG << "Invalid pointer"; } } -<<<<<<< HEAD -======= static inline int do_ioctl(unsigned long req, const void *arg) { return ioctl(g_fpgainfo.fd_mem, req, arg); } @@ -437,7 +395,6 @@ void fpga_copy_driver(void *dest, const void *src, size_t num) { return; } ->>>>>>> upstream/develop int open_device_driver() { g_fpgainfo.FpgaRegPhyAddr = FPGA_REG_PHY_ADDR; g_fpgainfo.FpgaMemPhyAddr = FPGA_MEM_PHY_ADDR; @@ -463,20 +420,13 @@ int open_device_driver() { int close_device_driver() { pl_destroy(); -<<<<<<< HEAD - fpga_free_driver(g_fpgainfo.FpgaRegVirAddr); -======= fpga_reg_free(g_fpgainfo.FpgaRegVirAddr); ->>>>>>> upstream/develop memory_release(g_fpgainfo.memory_info); destroy_fpga_memory(g_fpgainfo.memory_info); return 0; } -<<<<<<< HEAD -======= } // namespace driver ->>>>>>> upstream/develop } // namespace fpga } // namespace paddle_mobile diff --git a/src/fpga/V2/driver/driver.h b/src/fpga/V2/driver/driver.h index 018229882fc45fe29dc4ad23139a854249e0d107..633e95ea8204ada2a330a6bb4fab4ce8fe23248b 100644 --- a/src/fpga/V2/driver/driver.h +++ b/src/fpga/V2/driver/driver.h @@ -24,10 +24,7 @@ limitations under the License. */ namespace paddle_mobile { namespace fpga { -<<<<<<< HEAD -======= namespace driver { ->>>>>>> upstream/develop #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) @@ -51,8 +48,6 @@ const int PE_IDX_BYPASS = 3; enum pe_status { IDLE = 0, BUSY = 1 }; -<<<<<<< HEAD -======= struct MemoryCacheArgs { void *offset; size_t size; @@ -62,7 +57,6 @@ struct MemoryCacheArgs { #define IOCTL_MEMCACHE_INVAL _IOW(IOCTL_FPGA_MAGIC, 12, struct MemoryCacheArgs) #define IOCTL_MEMCACHE_FLUSH _IOW(IOCTL_FPGA_MAGIC, 13, struct MemoryCacheArgs) ->>>>>>> upstream/develop struct fpga_pe { char type_name[MAX_TYPE_NAME_LENTH + 1]; struct pe_data_s *outer; @@ -111,39 +105,20 @@ extern struct FPGA_INFO g_fpgainfo; inline uint64_t reg_readq(uint32_t offset) { // DLOG << "offset : " << offset; -<<<<<<< HEAD - uint64_t value = - *(uint64_t *)((uint8_t *)g_fpgainfo.FpgaRegVirAddr + offset); // NOLINT -======= uint64_t value = *(volatile uint64_t *)((uint8_t *)g_fpgainfo.FpgaRegVirAddr + offset); // NOLINT ->>>>>>> upstream/develop return value; } inline void reg_writeq(uint64_t value, uint32_t offset) { // DLOG << "offset : " << offset << ", value : " << value; -<<<<<<< HEAD - *(uint64_t *)((uint8_t *)g_fpgainfo.FpgaRegVirAddr + offset) = // NOLINT -======= *(volatile uint64_t *)((uint8_t *)g_fpgainfo.FpgaRegVirAddr + offset) = // NOLINT ->>>>>>> upstream/develop value; } int open_device_driver(); -<<<<<<< HEAD -int close_device_driver(); -void *fpga_malloc_driver(size_t size); -void fpga_free_driver(void *ptr); -/*pe*/ - -uint64_t vaddr_to_paddr(void *address); -int fpga_regpoll(uint64_t reg, uint64_t val, int time); - -======= int close_device_driver(); @@ -164,6 +139,5 @@ uint64_t vaddr_to_paddr(void *address); int fpga_regpoll(uint64_t reg, uint64_t val, int time); } // namespace driver ->>>>>>> upstream/develop } // namespace fpga } // namespace paddle_mobile diff --git a/src/fpga/V2/driver/pe.cpp b/src/fpga/V2/driver/pe.cpp index 741db868820c2e520f731aec5f46153a6c562841..2e806bfb37c131fad1c011c960bc79aa1b121186 100644 --- a/src/fpga/V2/driver/pe.cpp +++ b/src/fpga/V2/driver/pe.cpp @@ -20,49 +20,29 @@ limitations under the License. */ namespace paddle_mobile { namespace fpga { -<<<<<<< HEAD -#define MUL8(x) (x * 8) -======= #define MUL8(x) ((x)*8) ->>>>>>> upstream/develop #define BYPASS_DONE 1 float Findfp16Max() { uint16_t abs_vals[16]; uint64_t max_fp16; -<<<<<<< HEAD - max_fp16 = reg_readq(MUL8(49)); -======= max_fp16 = driver::reg_readq(MUL8(49)); ->>>>>>> upstream/develop abs_vals[0] = (uint16_t)(0x0000007f & (max_fp16)); // NOLINT abs_vals[1] = (uint16_t)(0x0000007f & (max_fp16 >> 16)); // NOLINT abs_vals[2] = (uint16_t)(0x0000007f & (max_fp16 >> 32)); // NOLINT abs_vals[3] = (uint16_t)(0x0000007f & (max_fp16 >> 48)); // NOLINT -<<<<<<< HEAD - max_fp16 = reg_readq(MUL8(50)); -======= max_fp16 = driver::reg_readq(MUL8(50)); ->>>>>>> upstream/develop abs_vals[4] = (uint16_t)(0x0000007f & (max_fp16)); // NOLINT abs_vals[5] = (uint16_t)(0x0000007f & (max_fp16 >> 16)); // NOLINT abs_vals[6] = (uint16_t)(0x0000007f & (max_fp16 >> 32)); // NOLINT abs_vals[7] = (uint16_t)(0x0000007f & (max_fp16 >> 48)); // NOLINT -<<<<<<< HEAD - max_fp16 = reg_readq(MUL8(51)); -======= max_fp16 = driver::reg_readq(MUL8(51)); ->>>>>>> upstream/develop abs_vals[8] = (uint16_t)(0x0000007f & (max_fp16)); // NOLINT abs_vals[9] = (uint16_t)(0x0000007f & (max_fp16 >> 16)); // NOLINT abs_vals[10] = (uint16_t)(0x0000007f & (max_fp16 >> 32)); // NOLINT abs_vals[11] = (uint16_t)(0x0000007f & (max_fp16 >> 48)); // NOLINT -<<<<<<< HEAD - max_fp16 = reg_readq(MUL8(52)); -======= max_fp16 = driver::reg_readq(MUL8(52)); ->>>>>>> upstream/develop abs_vals[12] = (uint16_t)(0x0000007f & (max_fp16)); abs_vals[13] = (uint16_t)(0x0000007f & (max_fp16 >> 16)); // NOLINT abs_vals[14] = (uint16_t)(0x0000007f & (max_fp16 >> 32)); // NOLINT @@ -78,11 +58,7 @@ float Findfp16Max() { } int ComputeFpgaConv(const struct SplitConvArgs &args) { -<<<<<<< HEAD - ComputeBasicConv(args.conv_args[0]); -======= ComputeBasicConv(args.conv_arg[0]); ->>>>>>> upstream/develop } int ComputeBasicConv(const struct ConvArgs &args) { @@ -190,13 +166,8 @@ int PerformBypass(const struct BypassArgs &args) { return 0; #endif -<<<<<<< HEAD - uint64_t ifm_src_paddr = vaddr_to_paddr(args.image.address); - uint64_t ifm_dst_paddr = vaddr_to_paddr(args.output.address); -======= uint64_t ifm_src_paddr = driver::vaddr_to_paddr(args.image.address); uint64_t ifm_dst_paddr = driver::vaddr_to_paddr(args.output.address); ->>>>>>> upstream/develop uint64_t bp_enable; int64_t length; uint64_t pixels; @@ -225,18 +196,6 @@ int PerformBypass(const struct BypassArgs &args) { } // start bypass -<<<<<<< HEAD - reg_writeq(ifm_src_paddr, MUL8(27)); - reg_writeq(ifm_dst_paddr, MUL8(28)); - reg_writeq(0, MUL8(0)); - reg_writeq(bp_enable, MUL8(0)); - // poll - int ret = -1; - ret = fpga_regpoll(MUL8(48), BYPASS_DONE, 0xffffffff); - if (ret != -1) { - // clear "irq" - reg_readq(MUL8(63)); -======= driver::reg_writeq(ifm_src_paddr, MUL8(27)); driver::reg_writeq(ifm_dst_paddr, MUL8(28)); driver::reg_writeq(0, MUL8(0)); @@ -247,7 +206,6 @@ int PerformBypass(const struct BypassArgs &args) { if (ret != -1) { // clear "irq" driver::reg_readq(MUL8(63)); ->>>>>>> upstream/develop } // get max value if ((!args.input_data_type) && (!args.output_data_type)) { diff --git a/src/fpga/V2/driver/pe.h b/src/fpga/V2/driver/pe.h index ae9f45fc25264d28677d16565911507905dad2a6..4903bf4c33f6b5d5899c56eeaada8c7a21d1a875 100644 --- a/src/fpga/V2/driver/pe.h +++ b/src/fpga/V2/driver/pe.h @@ -12,12 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #pragma once -<<<<<<< HEAD -#include "fpga/V2/api.h" -======= #include "fpga/V2/fpga_common.h" ->>>>>>> upstream/develop namespace paddle_mobile { namespace fpga { diff --git a/src/fpga/V2/filter.cpp b/src/fpga/V2/filter.cpp index 67d3fe5b45efb9e31a441fc773ca4d06e560291e..ce278edbeed64f2ca413c1f75ff620ee1f44c83d 100644 --- a/src/fpga/V2/filter.cpp +++ b/src/fpga/V2/filter.cpp @@ -94,10 +94,7 @@ void format_filter(float **data_in, int num, int channel, int height, int width, convert_to_hwc(data_in, num, channel, height, width); align_filter(data_in, num, channel, height, width); int pixel_num = calc_aligned_total_pixel_num(num, channel, height, width); -<<<<<<< HEAD -======= fpga_flush(*data_in, pixel_num * sizeof(float)); ->>>>>>> upstream/develop } void convert_fc_filter(float **data_in, int num, int chw) { @@ -117,11 +114,8 @@ void format_fc_filter(float **data_in, int num, int channel, int height, int chw = channel * height * width; convert_fc_filter(data_in, num, chw); align_filter(data_in, num, channel, height, width); -<<<<<<< HEAD -======= int pixel_num = calc_aligned_total_pixel_num(num, channel, height, width); fpga_flush(*data_in, pixel_num * sizeof(float)); ->>>>>>> upstream/develop } float find_max(float *data_in, int data_size) { diff --git a/src/fpga/V2/image.cpp b/src/fpga/V2/image.cpp index ac9d7cbb3b246adf18138652e9761f56b9923163..26829bfba65f2375b27251070b33b2bbe57d069b 100644 --- a/src/fpga/V2/image.cpp +++ b/src/fpga/V2/image.cpp @@ -58,10 +58,7 @@ void format_image(float **data_in, int channel, int height, int width, int aligned_channel) { convert_to_hwc(data_in, channel, height, width); align_image(data_in, channel, height, width, aligned_channel); -<<<<<<< HEAD -======= fpga_flush(*data_in, aligned_channel * height * width * sizeof(float)); ->>>>>>> upstream/develop } void concat_images(int16_t **images_in, float **scales_in, void *image_out, @@ -73,11 +70,8 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out, scale_out[1] = 0.0; for (int i = 0; i < image_num; i++) { scale_out[0] = std::max(*scale_out, scales_in[i][0]); -<<<<<<< HEAD -======= fpga_invalidate(images_in[i], height * width * aligned_channel_num[i] * sizeof(int16_t)); ->>>>>>> upstream/develop } scale_out[1] = 1 / scale_out[0]; @@ -92,10 +86,7 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out, tmp_channel_sum += channel_num[i]; } } -<<<<<<< HEAD -======= fpga_flush(image_out, hw * out_channel * sizeof(int16_t)); ->>>>>>> upstream/develop } } // namespace image diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp index 8b47f4357bb388a6701cb41ae741a045c951858d..80a990d5550ded3a5cc049fef366ba7e90938c00 100644 --- a/src/framework/executor.cpp +++ b/src/framework/executor.cpp @@ -26,10 +26,7 @@ limitations under the License. */ #include "framework/program/var_desc.h" #include "framework/scope.h" #include "framework/tensor.h" -<<<<<<< HEAD:src/framework/executor.cpp -======= #include "memory/t_malloc.h" ->>>>>>> upstream/develop:src/framework/executor.cpp #ifdef PADDLE_EXECUTOR_MULTITHREAD #include diff --git a/src/io/paddle_mobile.cpp b/src/io/paddle_mobile.cpp index ed1d7e826716e59ebae3a7061b815615f79b50e1..6a773da00ff6541d55a6a9d04ca470fed5de81a1 100644 --- a/src/io/paddle_mobile.cpp +++ b/src/io/paddle_mobile.cpp @@ -21,7 +21,6 @@ limitations under the License. */ #include "operators/math/gemm.h" namespace paddle_mobile { -static std::mutex lc; template void PaddleMobile::SetThreadNum(int num) { #ifdef _OPENMP @@ -203,10 +202,7 @@ void PaddleMobile::Predict_To(int end) { #endif #ifdef PADDLE_MOBILE_CL -<<<<<<< HEAD -======= static std::mutex lc; ->>>>>>> upstream/develop template void PaddleMobile::SetCLPath(std::string path) { std::lock_guard lock(lc); @@ -214,8 +210,6 @@ void PaddleMobile::SetCLPath(std::string path) { framework::CLEngine::Instance()->setClPath(path); } } -<<<<<<< HEAD -======= template <> double PaddleMobile::GetPredictTime() { cl_int status; @@ -418,7 +412,6 @@ int PaddleMobile::readText( return size + 1; } ->>>>>>> upstream/develop #endif template class PaddleMobile; diff --git a/src/io/paddle_mobile.h b/src/io/paddle_mobile.h index 1972f5f4430f0a0f363e16316758174f54312d71..ab148e7361c160bc658403d4696b806323595c54 100644 --- a/src/io/paddle_mobile.h +++ b/src/io/paddle_mobile.h @@ -81,11 +81,8 @@ class PaddleMobile { #ifdef PADDLE_MOBILE_CL public: void SetCLPath(std::string cl_path); -<<<<<<< HEAD -======= int readText(const char *kernelPath, char **pcode); // 读取文本文件放入 pcode,返回字符串长度 ->>>>>>> upstream/develop #endif private: diff --git a/src/operators/fusion_conv_add_op.cpp b/src/operators/fusion_conv_add_op.cpp index 1b32ec39b65f8b16fd8967be3f45f4b31db5ca16..731bb631bb98490d580e0c6fe28c24312f6ccb57 100644 --- a/src/operators/fusion_conv_add_op.cpp +++ b/src/operators/fusion_conv_add_op.cpp @@ -61,5 +61,7 @@ REGISTER_OPERATOR_MALI_GPU(fusion_conv_add, ops::FusionConvAddOp); #ifdef PADDLE_MOBILE_CL REGISTER_OPERATOR_CL(fusion_conv_add, ops::FusionConvAddOp); #endif - +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(fusion_conv_add, ops::FusionConvAddOp); +#endif #endif diff --git a/src/operators/fusion_conv_add_relu_op.h b/src/operators/fusion_conv_add_relu_op.h index 22ba67c617ecdb0f3be2f5757504b6ba530b092c..17bb65e5de457258c83a179f4d24b6f8c58824a8 100644 --- a/src/operators/fusion_conv_add_relu_op.h +++ b/src/operators/fusion_conv_add_relu_op.h @@ -29,8 +29,9 @@ namespace operators { class FusionConvAddReluOpMatcher : public framework::FusionOpMatcher { public: FusionConvAddReluOpMatcher() { - // node_ = framework::Node(G_OP_TYPE_FUSION_CONV_ADD); - // node_ > std::make_shared(G_OP_TYPE_RELU); + node_ = framework::Node(G_OP_TYPE_CONV); + node_ > std::make_shared(G_OP_TYPE_ELEMENTWISE_ADD) > + std::make_shared(G_OP_TYPE_RELU); } void FolderNodes( diff --git a/src/operators/fusion_deconv_add_op.cpp b/src/operators/fusion_deconv_add_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..99af70c1c05c166481f522282bee11895546afa5 --- /dev/null +++ b/src/operators/fusion_deconv_add_op.cpp @@ -0,0 +1,33 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_DECONVADD_OP + +#include "operators/fusion_deconv_add_op.h" + +namespace paddle_mobile { +namespace operators {} +} // namespace paddle_mobile + +namespace ops = paddle_mobile::operators; +REGISTER_FUSION_MATCHER(fusion_deconv_add, ops::FusionDeconvAddMatcher); +#ifdef PADDLE_MOBILE_CPU +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(fusion_deconv_add, ops::FusionDeconvAddOp); +#endif + +#endif diff --git a/src/operators/fusion_deconv_add_op.h b/src/operators/fusion_deconv_add_op.h new file mode 100644 index 0000000000000000000000000000000000000000..a2f78ebaf9b2b671b26e027fa6449b9f3304ab50 --- /dev/null +++ b/src/operators/fusion_deconv_add_op.h @@ -0,0 +1,108 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#ifdef FUSION_DECONVADD_OP +#pragma once +#include +#include + +#include "framework/operator.h" +#include "framework/program/program-optimize/fusion_op_register.h" +#include "operators/kernel/deconv_add_kernel.h" + +namespace paddle_mobile { +namespace operators { +using std::string; +using std::vector; +class FusionDeconvAddMatcher : public framework::FusionOpMatcher { + public: + FusionDeconvAddMatcher() { + node_ = framework::Node(G_OP_TYPE_CONV_TRANSPOSE); + node_ > std::make_shared(G_OP_TYPE_ELEMENTWISE_ADD); + } + + void FolderNodes( + framework::Node *node, + std::vector> *removed_nodes) { + node->Folder(node_.Depth(), Type(), + {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}}}}, removed_nodes); + } + + std::string Type() { return G_OP_TYPE_FUSION_DECONV_ADD; } +}; + +template +class FusionDeconvAddOp : public framework::OperatorWithKernel< + DeviceType, FusionDeconvAddParam, + operators::DeconvAddKernel> { + public: + FusionDeconvAddOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, + const framework::AttributeMap &attrs, + std::shared_ptr scope) + : framework::OperatorWithKernel< + DeviceType, FusionDeconvAddParam, + operators::DeconvAddKernel>(type, inputs, outputs, + attrs, scope) {} + + void InferShape() const { + auto input = this->param_.Input(); + auto in_dims = input->dims(); + + auto filter = this->param_.Filter(); + auto filter_dims = filter->dims(); + + std::vector strides = this->param_.Strides(); + std::vector paddings = this->param_.Paddings(); + std::vector dilations = this->param_.Dilations(); + + int groups = this->param_.Groups(); + + PADDLE_MOBILE_ENFORCE( + in_dims.size() == 4 || in_dims.size() == 5, + "ConvTransposeOp intput should be 4-D or 5-D tensor."); + PADDLE_MOBILE_ENFORCE( + in_dims.size() == filter_dims.size(), + "ConvTransposeOp input dimension and filter dimension " + "should be the same."); + PADDLE_MOBILE_ENFORCE( + in_dims.size() - strides.size() == 2U, + "ConvTransposeOp input dimension and strides dimension should " + "be consistent."); + PADDLE_MOBILE_ENFORCE(paddings.size() == strides.size(), + "ConvTransposeOp paddings dimension and strides " + "dimension should be the same."); + PADDLE_MOBILE_ENFORCE(paddings.size() == dilations.size(), + "ConvTransposeOp paddings dimension and dilations " + "dimension should be the same."); + PADDLE_MOBILE_ENFORCE( + in_dims[1] == filter_dims[0], + "In ConvTransposeOp, The number of input channels should " + "be equal to the number of filter's channels."); + + std::vector output_shape({in_dims[0], filter_dims[1] * groups}); + for (size_t i = 0; i < strides.size(); ++i) { + auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1; + output_shape.push_back((in_dims[i + 2] - 1) * strides[i] - + 2 * paddings[i] + filter_extent); + } + this->param_.Output()->Resize(framework::make_ddim(output_shape)); + } + + protected: +}; + +} // namespace operators +} // namespace paddle_mobile + +#endif // FUSION_DECONV_ADD_OP diff --git a/src/operators/fusion_deconv_add_relu_op.cpp b/src/operators/fusion_deconv_add_relu_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..524cda3aaba846441e2df598573fb43235db2c8b --- /dev/null +++ b/src/operators/fusion_deconv_add_relu_op.cpp @@ -0,0 +1,33 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_DECONVADDRELU_OP + +#include "operators/fusion_deconv_add_relu_op.h" + +namespace paddle_mobile { +namespace operators {} +} // namespace paddle_mobile + +namespace ops = paddle_mobile::operators; +REGISTER_FUSION_MATCHER(fusion_deconv_add_relu, ops::FusionDeconvAddReluMatcher); +#ifdef PADDLE_MOBILE_CPU +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(fusion_deconv_add_relu, ops::FusionDeconvAddReluOp); +#endif + +#endif diff --git a/src/operators/fusion_deconv_add_relu_op.h b/src/operators/fusion_deconv_add_relu_op.h new file mode 100644 index 0000000000000000000000000000000000000000..7882eeb1f75e003a55c3431abaeabaf07f9a55d5 --- /dev/null +++ b/src/operators/fusion_deconv_add_relu_op.h @@ -0,0 +1,109 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#ifdef FUSION_DECONVADDRELU_OP +#pragma once +#include +#include + +#include "framework/operator.h" +#include "framework/program/program-optimize/fusion_op_register.h" +#include "operators/kernel/deconv_add_relu_kernel.h" + +namespace paddle_mobile { +namespace operators { +using std::string; +using std::vector; +class FusionDeconvAddReluMatcher : public framework::FusionOpMatcher { + public: + FusionDeconvAddReluMatcher() { + node_ = framework::Node(G_OP_TYPE_CONV_TRANSPOSE); + node_ > std::make_shared(G_OP_TYPE_ELEMENTWISE_ADD) > + std::make_shared(G_OP_TYPE_RELU); + } + + void FolderNodes( + framework::Node *node, + std::vector> *removed_nodes) { + node->Folder(node_.Depth(), Type(), + {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}}}}, removed_nodes); + } + + std::string Type() { return G_OP_TYPE_FUSION_DECONV_ADD_RELU; } +}; + +template +class FusionDeconvAddReluOp : public framework::OperatorWithKernel< + DeviceType, FusionDeconvAddReluParam, + operators::DeconvAddReluKernel> { + public: + FusionDeconvAddReluOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, + const framework::AttributeMap &attrs, + std::shared_ptr scope) + : framework::OperatorWithKernel< + DeviceType, FusionDeconvAddReluParam, + operators::DeconvAddReluKernel>(type, inputs, outputs, + attrs, scope) {} + + void InferShape() const { + auto input = this->param_.Input(); + auto in_dims = input->dims(); + + auto filter = this->param_.Filter(); + auto filter_dims = filter->dims(); + + std::vector strides = this->param_.Strides(); + std::vector paddings = this->param_.Paddings(); + std::vector dilations = this->param_.Dilations(); + + int groups = this->param_.Groups(); + + PADDLE_MOBILE_ENFORCE( + in_dims.size() == 4 || in_dims.size() == 5, + "ConvTransposeOp intput should be 4-D or 5-D tensor."); + PADDLE_MOBILE_ENFORCE( + in_dims.size() == filter_dims.size(), + "ConvTransposeOp input dimension and filter dimension " + "should be the same."); + PADDLE_MOBILE_ENFORCE( + in_dims.size() - strides.size() == 2U, + "ConvTransposeOp input dimension and strides dimension should " + "be consistent."); + PADDLE_MOBILE_ENFORCE(paddings.size() == strides.size(), + "ConvTransposeOp paddings dimension and strides " + "dimension should be the same."); + PADDLE_MOBILE_ENFORCE(paddings.size() == dilations.size(), + "ConvTransposeOp paddings dimension and dilations " + "dimension should be the same."); + PADDLE_MOBILE_ENFORCE( + in_dims[1] == filter_dims[0], + "In ConvTransposeOp, The number of input channels should " + "be equal to the number of filter's channels."); + + std::vector output_shape({in_dims[0], filter_dims[1] * groups}); + for (size_t i = 0; i < strides.size(); ++i) { + auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1; + output_shape.push_back((in_dims[i + 2] - 1) * strides[i] - + 2 * paddings[i] + filter_extent); + } + this->param_.Output()->Resize(framework::make_ddim(output_shape)); + } + + protected: +}; + +} // namespace operators +} // namespace paddle_mobile + +#endif // FUSION_DECONV_ADD_RELU_OP diff --git a/src/operators/kernel/arm/fetch_kernel.cpp b/src/operators/kernel/arm/fetch_kernel.cpp index 72aca9002a777c187179913f0ae7ba2c342d1422..6c25514857dee9029afa3a7a80d5c89a97bbe9be 100644 --- a/src/operators/kernel/arm/fetch_kernel.cpp +++ b/src/operators/kernel/arm/fetch_kernel.cpp @@ -8,10 +8,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -<<<<<<< HEAD -#ifdef FUSION_CONVADD_OP -======= ->>>>>>> upstream/develop #include "operators/kernel/fetch_kernel.h" namespace paddle_mobile { namespace operators { @@ -26,7 +22,3 @@ void FetchKernel::Compute(const FetchParam ¶m) { template class FetchKernel; } // namespace operators } // namespace paddle_mobile -<<<<<<< HEAD -#endif -======= ->>>>>>> upstream/develop diff --git a/src/operators/kernel/cl/cl_kernel/feed_kernel.cl b/src/operators/kernel/cl/cl_kernel/feed_kernel.cl index 0d8ca183aebeecc68a5162d798f8d6a81d62f325..200a221c9bda49c42f2caff374fc24d6e4df27e5 100644 --- a/src/operators/kernel/cl/cl_kernel/feed_kernel.cl +++ b/src/operators/kernel/cl/cl_kernel/feed_kernel.cl @@ -13,20 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma OPENCL EXTENSION cl_khr_fp16 : enable -<<<<<<< HEAD -__kernel void feed(__global float *in, __write_only image2d_t outputImage,int h,int w) -======= __kernel void feed(__global float *in, __write_only image2d_t outputImage,int h,int w,int c) ->>>>>>> upstream/develop { int i = get_global_id(0); int j = get_global_id(1); half4 pixel; pixel.x = convert_half(in[(i * w + j)]); -<<<<<<< HEAD - pixel.y = convert_half(in[h * w + (i * w + j)]); - pixel.z = convert_half(in[2 * h * w + (i * w + j)]); -======= if(c>=2){ pixel.y = convert_half(in[h * w + (i * w + j)]); }else{ @@ -37,7 +29,6 @@ __kernel void feed(__global float *in, __write_only image2d_t outputImage,int h, }else{ pixel.z = 0.0; } ->>>>>>> upstream/develop pixel.w = 0.0; int2 coords; coords.x = j; diff --git a/src/operators/kernel/cl/feed_kernel.cpp b/src/operators/kernel/cl/feed_kernel.cpp index 7972dc2be8e9310d70c8a10fa27b935895d7d970..941a6cb815541d1eca30ccc193161838ce28da80 100644 --- a/src/operators/kernel/cl/feed_kernel.cpp +++ b/src/operators/kernel/cl/feed_kernel.cpp @@ -36,10 +36,7 @@ void FeedKernel::Compute(const FeedParam ¶m) { const float *input_data = input->data(); int numel = input->numel(); cl_mem cl_image = output->GetCLImage(); -<<<<<<< HEAD -======= int c = input->dims()[1]; ->>>>>>> upstream/develop int height = output->dims()[2]; int width = output->dims()[3]; CLTensor input_cl_tensor(this->cl_helper_.CLContext(), @@ -55,11 +52,8 @@ void FeedKernel::Compute(const FeedParam ¶m) { CL_CHECK_ERRORS(status); status = clSetKernelArg(kernel, 3, sizeof(cl_int), &height); CL_CHECK_ERRORS(status); -<<<<<<< HEAD -======= status = clSetKernelArg(kernel, 4, sizeof(cl_int), &c); CL_CHECK_ERRORS(status); ->>>>>>> upstream/develop size_t global_work_size[2] = {width, height}; diff --git a/src/operators/kernel/deconv_add_kernel.h b/src/operators/kernel/deconv_add_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..61170f95e2f38319a454eb18461a171347ffed7a --- /dev/null +++ b/src/operators/kernel/deconv_add_kernel.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_DECONVADD_OP + +#pragma once + +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +using framework::OpKernelBase; + +template +class DeconvAddKernel + : public OpKernelBase> { + public: + void Compute(const FusionDeconvAddParam ¶m); + + bool Init(FusionDeconvAddParam *param); +}; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/deconv_add_relu_kernel.h b/src/operators/kernel/deconv_add_relu_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..dc48272157f6e8a5cba4fd09f8acca1b54e90c12 --- /dev/null +++ b/src/operators/kernel/deconv_add_relu_kernel.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_DECONVADDRELU_OP + +#pragma once + +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +using framework::OpKernelBase; + +template +class DeconvAddReluKernel + : public OpKernelBase> { + public: + void Compute(const FusionDeconvAddReluParam ¶m); + + bool Init(FusionDeconvAddReluParam *param); +}; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/V2/conv_add_kernel.cpp b/src/operators/kernel/fpga/V2/conv_add_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..22841e705c255433bebeab479a2e2b8d3a3b7187 --- /dev/null +++ b/src/operators/kernel/fpga/V2/conv_add_kernel.cpp @@ -0,0 +1,61 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_CONVADD_OP + +#include "operators/kernel/conv_add_kernel.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool ConvAddKernel::Init(FusionConvAddParam *param) { + bool relu_enabled = false; + auto input = const_cast(param->Input()); + const Tensor *bias = param->Bias(); + auto bias_ptr = bias->data(); + auto filter = const_cast(param->Filter()); + auto out = param->Output(); + + PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], + "Output channel should be equal to bias number"); + int channel = out->dims()[1]; + auto bs_ptr = + (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT + for (int i = 0; i < channel; i++) { + bs_ptr[i + channel] = 1; + bs_ptr[i] = bias_ptr[i]; + } + + fpga::format_conv_data(filter, out, bs_ptr, param->Groups()); + + fpga::SplitConvArgs conv_arg = {0}; + fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, + param->Groups(), param->Strides()[0], + param->Strides()[1], param->Paddings()[0], + param->Paddings()[1], bs_ptr); + param->SetFpgaArgs(conv_arg); + return true; +} + +template <> +void ConvAddKernel::Compute( + const FusionConvAddParam ¶m) { + fpga::ComputeFpgaConv(param.FpgaArgs()); +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/V2/deconv_add_kernel.cpp b/src/operators/kernel/fpga/V2/deconv_add_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..39d7e818976b56eaea8649392784e7b5dc8b7e1f --- /dev/null +++ b/src/operators/kernel/fpga/V2/deconv_add_kernel.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_DECONVADD_OP + +#include "operators/kernel/deconv_add_kernel.h" +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool DeconvAddKernel::Init(FusionDeconvAddParam *param) { + return true; +} + +template <> +void DeconvAddKernel::Compute( + const FusionDeconvAddParam ¶m) {} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp b/src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e84c0ad4b6edda02b2fc51decce6852408218dcc --- /dev/null +++ b/src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp @@ -0,0 +1,36 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_DECONVADDRELU_OP + +#include "operators/kernel/deconv_add_relu_kernel.h" +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool DeconvAddReluKernel::Init(FusionDeconvAddReluParam *param) { + return true; +} + +template <> +void DeconvAddReluKernel::Compute( + const FusionDeconvAddReluParam ¶m) {} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/V2/slice_kernel.cpp b/src/operators/kernel/fpga/V2/slice_kernel.cpp index e4f5d3bc6b27a31ddc7537f69ca225338cf7c025..bc3fbfd796fac693a319ed2ab24023b3ffb84863 100644 --- a/src/operators/kernel/fpga/V2/slice_kernel.cpp +++ b/src/operators/kernel/fpga/V2/slice_kernel.cpp @@ -24,10 +24,7 @@ bool SliceKernel::Init(SliceParam* param) { } template <> void SliceKernel::Compute(const SliceParam& param) {} -<<<<<<< HEAD -======= ->>>>>>> upstream/develop } // namespace operators } // namespace paddle_mobile #endif diff --git a/src/operators/kernel/fpga/V2/softmax_kernel.cpp b/src/operators/kernel/fpga/V2/softmax_kernel.cpp index dcca441875613d64d172dffb4bf56ddcceef7b27..bbdb35b715b60b25079c007a74b8b1e901cc9a59 100644 --- a/src/operators/kernel/fpga/V2/softmax_kernel.cpp +++ b/src/operators/kernel/fpga/V2/softmax_kernel.cpp @@ -49,16 +49,12 @@ void SoftmaxKernel::Compute(const SoftmaxParam ¶m) { Tensor *out = param.Out(); fpga::PerformBypass(param.FpgaArgs()); -<<<<<<< HEAD - math::SoftmaxFuntor()(in_x, out); -======= fpga::fpga_invalidate( (void *)in_x->data(), // NOLINT fpga::get_aligned_channel_num((int)in_x->dims()[1]) * // NOLINT sizeof(float)); math::SoftmaxFuntor()(in_x, out); fpga::fpga_flush(out->data(), out->memory_size()); ->>>>>>> upstream/develop } } // namespace operators diff --git a/src/operators/kernel/fpga/V2/split_kernel.cpp b/src/operators/kernel/fpga/V2/split_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..faa1da9186d2a74961450925dea6e3d0f98856bc --- /dev/null +++ b/src/operators/kernel/fpga/V2/split_kernel.cpp @@ -0,0 +1,30 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef SPLIT_OP + +#include "operators/kernel/split_kernel.h" + +namespace paddle_mobile { +namespace operators { +template <> +bool SplitKernel::Init(SplitParam* param) { + return true; +} +template <> +void SplitKernel::Compute(const SplitParam& param) {} + +} // namespace operators +} // namespace paddle_mobile +#endif diff --git a/src/operators/kernel/fpga/V2/transpose2_kernel.cpp b/src/operators/kernel/fpga/V2/transpose2_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..404ce10f468691bd6b2740a9427d0b774d682eda --- /dev/null +++ b/src/operators/kernel/fpga/V2/transpose2_kernel.cpp @@ -0,0 +1,35 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#ifdef TRANSPOSE2_OP + +#include "operators/kernel/transpose2_kernel.h" +#include "operators/kernel/central-arm-func/transpose2_arm_func.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool Transpose2Kernel::Init(Transpose2Param *param) { + return true; +} + +template <> +void Transpose2Kernel::Compute(const Transpose2Param ¶m) { + //Transpose2Compute(param); +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/op_param.h b/src/operators/op_param.h index e41162a05067b3a35e065f975d14dccf5f3a9142..c2419faa7ff6d1ee20dedb9857894cb2d5bf8466 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -1532,8 +1532,6 @@ class ReluParam : public ReluParamBase { }; #endif -<<<<<<< HEAD -======= #endif #ifdef TANH_OP @@ -1555,7 +1553,6 @@ class TanhParam : public OpParam { RType *input_x_; RType *out_; }; ->>>>>>> upstream/develop #endif #ifdef PRELU_OP @@ -2224,7 +2221,10 @@ class ConvTransposeParam : public OpParam { const Scope &scope) { filter_ = FilterFrom(inputs, scope); input_ = InputFrom(inputs, scope); - output_ = OutputFrom(outputs, scope); + //output_ = OutputFrom(outputs, scope); + if (outputs.count("Output")) { + output_ = OpParam::OutputFrom(outputs, scope); + } strides_ = GetAttr>("strides", attrs); paddings_ = GetAttr>("paddings", attrs); dilations_ = GetAttr>("dilations", attrs); @@ -2265,6 +2265,37 @@ class ConvTransposeParam : public OpParam { #endif }; #endif +#ifdef FUSION_DECONVADD_OP +template +class FusionDeconvAddParam : public ConvTransposeParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; +public: + FusionDeconvAddParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs, + const Scope &scope) + :ConvTransposeParam(inputs, outputs, attrs, scope) { + bias_ = OpParam::InputYFrom(inputs, scope); + axis_ = OpParam::GetAttr("axis", attrs); + output_ = OpParam::OutFrom(outputs, scope); + } + RType *Bias() const { return bias_; } + + const int &Axis() const { return axis_; } + + RType *Output() const { return output_; } + + protected: + RType *bias_; + int axis_; + RType *output_; +}; +#endif + +#ifdef FUSION_DECONVADDRELU_OP +template +using FusionDeconvAddReluParam = FusionDeconvAddParam; +#endif #ifdef FUSION_DECONVRELU_OP template diff --git a/src/operators/split_op.cpp b/src/operators/split_op.cpp index 52732b41288fdc94a7dfc07ef6cfc8d12a969b7b..52f3499b0df5ab1d384890e405c99c533db9f974 100644 --- a/src/operators/split_op.cpp +++ b/src/operators/split_op.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #ifdef SPLIT_OP - +#include #include "operators/split_op.h" namespace paddle_mobile { @@ -83,5 +83,8 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(split, ops::SplitOp); #endif +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(split, ops::SplitOp); +#endif #endif // SPLIT_OP diff --git a/src/operators/tanh_op.cpp b/src/operators/tanh_op.cpp index 454cdfa26942eda225a811317e907b1989bcf61b..dd6f9083afd6919cfa3320e5e20275a785adf092 100644 --- a/src/operators/tanh_op.cpp +++ b/src/operators/tanh_op.cpp @@ -29,7 +29,7 @@ void TanhOp::InferShape() const { namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_FPGA -REGISTER_OPERATOR_FPGA(Tanh, ops::TanhOp); +REGISTER_OPERATOR_FPGA(tanh, ops::TanhOp); #endif #endif diff --git a/src/operators/transpose2_op.cpp b/src/operators/transpose2_op.cpp index 64d07991f60b4057e3d2841afa1bfe6483f31a88..03db27a9a2f8fc8974a1b1c97b1d71782388103e 100644 --- a/src/operators/transpose2_op.cpp +++ b/src/operators/transpose2_op.cpp @@ -60,5 +60,8 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(transpose2, ops::Transpose2Op); #endif +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(transpose2, ops::Transpose2Op); +#endif #endif // TRANSPOSE_OP diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4fa8f62c2f4c76032e1ddd4a3a32847bcb9c1dba..e7c45edd6b63a89afb97331aaf3fa33bb869526f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -66,15 +66,12 @@ list(FIND NET "FPGA_NET_V1" CON) if (CON GREATER -1) ADD_EXECUTABLE(test-resnet50 fpga/test_resnet50.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-resnet50 paddle-mobile) + + ADD_EXECUTABLE(test-densebox net/test_densebox_combine.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-densebox paddle-mobile) + set(FOUND_MATCH ON) endif () -<<<<<<< HEAD - -list(FIND NET "FPGA_NET_V2" CON) -if (CON GREATER -1) - ADD_EXECUTABLE(test-resnet50 fpga/test_resnet50.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-resnet50 paddle-mobile) -======= list(FIND NET "FPGA_NET_V2" CON) if (CON GREATER -1) @@ -83,7 +80,10 @@ if (CON GREATER -1) ADD_EXECUTABLE(test-pe fpga/test_pe.cpp) target_link_libraries(test-pe paddle-mobile) ->>>>>>> upstream/develop + + ADD_EXECUTABLE(test-densebox net/test_densebox_combine.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-densebox paddle-mobile) + set(FOUND_MATCH ON) endif () diff --git a/test/net/test_yologpu.cpp b/test/net/test_yologpu.cpp index 301580597521793a1af21d51b7961555685272ef..0215ded59e5f74f0c103d4b51abe06b487bd50ab 100644 --- a/test/net/test_yologpu.cpp +++ b/test/net/test_yologpu.cpp @@ -13,19 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -<<<<<<< HEAD -#include "../test_helper.h" -#include "../test_include.h" - -int main() { - paddle_mobile::PaddleMobile paddle_mobile; - // paddle_mobile.SetThreadNum(4); - auto time1 = paddle_mobile::time(); - // auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model", - // std::string(g_mobilenet_detect) + "/params", true); - - auto isok = paddle_mobile.Load(std::string(g_yolo_mul), true); -======= #include #include "../../src/common/types.h" #include "../../src/io/paddle_test_inference_api.h" @@ -99,7 +86,6 @@ void t2() { std::string(g_yolo_mul) + "/params", true); // auto isok = paddle_mobile.Load(std::string(g_yolo_mul), true); ->>>>>>> upstream/develop if (isok) { auto time2 = paddle_mobile::time(); std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms" @@ -138,8 +124,6 @@ void t2() { // std::cout << i << std::endl; // } } -<<<<<<< HEAD -======= } void t3() { @@ -201,6 +185,5 @@ int main() { // th2.join(); // th3.join(); th1.join(); ->>>>>>> upstream/develop return 0; }