diff --git a/.gitignore b/.gitignore index 964bfa4e48ee8e7c9387339d5775a3df90c63eb4..a7b444b5881c8b993c6edbb4a7ba555359dcab39 100644 --- a/.gitignore +++ b/.gitignore @@ -84,3 +84,6 @@ SwiftProtobuf.framework paddle-mobile.xcworkspace metal/models/ metal/images/ + + +tools/libomp.a \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c388d8b2a6374c68aecf86b215c8e8462b13c2b..2b3e6227acb58b3d8b3bf132fecf60c5298f172f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,7 +44,7 @@ if (LOG_PROFILE) add_definitions(-DPADDLE_MOBILE_PROFILE) endif() -if(USE_OPENMP AND NOT IS_IOS) +if(USE_OPENMP) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") add_definitions(-DPADDLE_MOBILE_USE_OPENMP) endif() @@ -130,8 +130,8 @@ endif () if (IS_IOS) else() - list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobile.h) - list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobile.mm) + list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobileCPU.h) + list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobileCPU.mm) list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/op_symbols.h) endif () diff --git a/src/fpga/api.cpp b/src/fpga/api.cpp index 11b26fe6893dc24664b563f5a2212fae77926d5e..10787b915594a12a826a087e5453b2c2e8c03f9a 100644 --- a/src/fpga/api.cpp +++ b/src/fpga/api.cpp @@ -29,9 +29,7 @@ namespace fpga { static int fd = -1; static const char *device_path = "/dev/fpgadrv0"; -#ifdef PADDLE_MOBILE_OS_LINUX static std::map memory_map; -#endif static inline int do_ioctl(int req, const void *arg) { #ifdef PADDLE_MOBILE_OS_LINUX @@ -53,32 +51,38 @@ int open_device() { // memory management; void *fpga_malloc(size_t size) { static uint64_t counter = 0; - counter += size; - DLOG << size << " bytes allocated. Total " << counter << " bytes"; + #ifdef PADDLE_MOBILE_OS_LINUX auto ptr = mmap64(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - memory_map.insert(std::make_pair(ptr, size)); - return ptr; #else - return malloc(size); + auto ptr = malloc(size); #endif + counter += size; + memory_map.insert(std::make_pair(ptr, size)); + DLOG << "Address: " << ptr << ", " << size << " bytes allocated. Total " + << counter << " bytes"; + return ptr; } void fpga_free(void *ptr) { -#ifdef PADDLE_MOBILE_OS_LINUX static uint64_t counter = 0; size_t size = 0; + auto iter = memory_map.find(ptr); // std::map::iterator if (iter != memory_map.end()) { size = iter->second; - munmap(ptr, size); memory_map.erase(iter); - } - counter += size; - DLOG << size << " bytes freed. Total " << counter << " bytes"; +#ifdef PADDLE_MOBILE_OS_LINUX + munmap(ptr, size); #else - free(ptr); + free(ptr); #endif + counter += size; + DLOG << "Address: " << ptr << ", " << size << " bytes freed. Total " + << counter << " bytes"; + } else { + DLOG << "Invalid pointer"; + } } void fpga_copy(void *dest, const void *src, size_t num) { @@ -86,14 +90,14 @@ void fpga_copy(void *dest, const void *src, size_t num) { } int fpga_flush(void *address, size_t size) { - struct MemoryCacheArgs args; + struct MemoryCacheArgs args = {nullptr}; args.address = address; args.size = size; return do_ioctl(IOCTL_MEMCACHE_FLUSH, &args); } int fpga_invalidate(void *address, size_t size) { - struct MemoryCacheArgs args; + struct MemoryCacheArgs args = {nullptr}; args.address = address; args.size = size; return do_ioctl(IOCTL_MEMCACHE_INVAL, &args); @@ -211,7 +215,8 @@ int PerformBypass(const struct BypassArgs &args) { int ComputeFPGAConcat(const struct ConcatArgs &args) { #ifdef FPGA_TEST_MODE DLOG << "=============ComputeFpgaConcat==========="; - DLOG << " out_address:" << args.image_out + DLOG << " Image_num: " << args.image_num + << " out_address:" << args.image_out << " out_scale_address:" << args.scale_out; DLOG << " image_height:" << args.height << " image_width:" << args.width; for (int i = 0; i < args.image_num; i++) { @@ -235,7 +240,7 @@ void format_image(framework::Tensor *image_tensor) { auto channel = dims[1], height = dims[2], width = dims[3]; auto data_ptr = image_tensor->data(); size_t memory_size = channel * height * width * sizeof(float); - float *new_data = (float *)fpga_malloc(memory_size); + auto new_data = (float *)fpga_malloc(memory_size); fpga_copy(new_data, data_ptr, memory_size); image::format_image(&new_data, channel, height, width); image_tensor->reset_data_ptr(new_data); @@ -332,7 +337,7 @@ void format_concat_output(framework::Tensor *out, int height, int width, sum_cw = align_to_x(width * sum_channel, IMAGE_ALIGNMENT); auto data_ptr = fpga_malloc(height * sum_cw * sizeof(half)); - auto ddim = framework::make_ddim({-1, sum_channel, height, width}); + auto ddim = framework::make_ddim({1, sum_channel, height, width}); out->Resize(ddim); out->reset_data_ptr(data_ptr); } @@ -346,12 +351,12 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input, auto out_ptr = out->data(); arg->group_num = (uint32_t)group_num; - arg->split_num = (uint32_t)fpga::get_plit_num(filter); + // Either group_num or split_num = 1; + arg->split_num = group_num == 1 ? (uint32_t)get_plit_num(filter) : 1; arg->filter_num = (uint32_t)filter->dims()[0]; arg->output.address = out_ptr; arg->output.scale_address = out->scale; - arg->conv_args = (fpga::ConvArgs *)fpga::fpga_malloc(arg->split_num * - sizeof(fpga::ConvArgs)); + arg->conv_args = (ConvArgs *)fpga_malloc(arg->split_num * sizeof(ConvArgs)); arg->concat_arg.image_num = arg->split_num; arg->concat_arg.image_out = out_ptr; @@ -360,15 +365,14 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input, arg->concat_arg.width = (uint32_t)filter->dims()[3]; int n = arg->split_num; - arg->concat_arg.images_in = (half **)fpga::fpga_malloc(n * sizeof(int *)); - arg->concat_arg.scales_in = (float **)fpga::fpga_malloc(n * sizeof(float *)); - arg->concat_arg.channel_num = - (uint32_t *)fpga::fpga_malloc(n * sizeof(uint32_t)); + arg->concat_arg.images_in = (half **)fpga_malloc(n * sizeof(int *)); + arg->concat_arg.scales_in = (float **)fpga_malloc(n * sizeof(float *)); + arg->concat_arg.channel_num = (uint32_t *)fpga_malloc(n * sizeof(uint32_t)); arg->concat_arg.image_out = out_ptr; auto channel = (int)out->dims()[1]; - int filter_num_per_div = fpga::get_filter_num_per_div(filter, group_num); - int element_num = fpga::get_aligned_filter_element_num( + int filter_num_per_div = get_filter_num_per_div(filter, group_num); + int element_num = get_aligned_filter_element_num( filter->dims()[1] * filter->dims()[2] * filter->dims()[3]); for (int i = 0; i < n; i++) { @@ -390,16 +394,17 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input, &((int8_t *)filter_ptr)[i * element_num * filter_num_per_div]; arg->conv_args[i].sb_address = &bs_ptr[i * filter_num_per_div * 2]; arg->conv_args[i].filter_num = - (uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num( - channel - (n - 1) * filter_num_per_div) + (uint32_t)(i == n - 1 ? channel - (n - 1) * filter_num_per_div : filter_num_per_div); if (n > 1) { arg->conv_args[i].output.scale_address = - (float *)fpga::fpga_malloc(2 * sizeof(float)); - arg->conv_args[i].output.address = - fpga::fpga_malloc(input->dims()[2] * input->dims()[3] * - arg->conv_args[i].filter_num * sizeof(half)); + (float *)fpga_malloc(2 * sizeof(float)); + arg->conv_args[i].output.address = fpga_malloc( + input->dims()[2] * + align_to_x(input->dims()[3] * arg->conv_args[i].filter_num, + IMAGE_ALIGNMENT) * + sizeof(half)); } else { @@ -408,7 +413,7 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input, } arg->concat_arg.images_in[i] = (half *)arg->conv_args[i].output.address; - arg->concat_arg.scales_in[i] = (float *)arg->conv_args[i].sb_address; + arg->concat_arg.scales_in[i] = arg->conv_args[i].output.scale_address; arg->concat_arg.channel_num[i] = arg->conv_args[i].filter_num; } } diff --git a/src/fpga/image.cpp b/src/fpga/image.cpp index 0603d164dfa88eb5620ebf588c610ea25a78be5f..ad5053f9780895d94cc3095dc694e86dbbb1abac 100644 --- a/src/fpga/image.cpp +++ b/src/fpga/image.cpp @@ -74,15 +74,17 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out, int align_each_in_area_cw = 0; int align_each_out_area_cw_differ = 0; int tmp_channel = 0; - *scale_out = 0; + scale_out[0] = 0.0; + scale_out[1] = 0.0; for (i = 0; i < image_num; i++) { each_out_line_channel += channel_num[i]; - *scale_out = std::max(*scale_out, scales_in[i][0]); + scale_out[0] = std::max(*scale_out, scales_in[i][0]); fpga_invalidate(images_in[i], height * align_to_x(channel_num[i] * width, IMAGE_ALIGNMENT) * sizeof(int16_t)); } + scale_out[1] = 1 / scale_out[0]; align_each_out_area_cw = align_to_x(each_out_line_channel * width, IMAGE_ALIGNMENT); align_each_out_area_cw_differ = diff --git a/src/io/executor.cpp b/src/io/executor.cpp index 30581abb2e42f9f28bbf9e9c3ba01be0964d4d56..c8d8f52a427bb1ee2b9fa04c9ef09f8e626f11b0 100644 --- a/src/io/executor.cpp +++ b/src/io/executor.cpp @@ -79,7 +79,7 @@ Executor::Executor(const framework::Program p, int batch_size, std::vector> ops = block_desc->Ops(); for (int j = 0; j < ops.size(); ++j) { std::shared_ptr op = ops[j]; - DLOG << "create op: " << op->Type(); + DLOG << "create op: " << j << " " << op->Type(); auto op_base = framework::OpRegistry::CreateOp( op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(), program_.scope); @@ -103,7 +103,9 @@ Executor::Executor(const framework::Program p, int batch_size, std::shared_ptr to_predict_block = to_predict_program_->Block(0); auto &ops = ops_of_block_[*to_predict_block.get()]; + int i = 0; for (const auto &op : ops) { + DLOG << "Init op: " << i++ << " " << op->Type(); op->Init(); } } @@ -702,6 +704,7 @@ void Executor::Predict_From_To(int start, int end) { clock_gettime(CLOCK_MONOTONIC, &ts); profile[i].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec; #endif + DLOG << "Running op: " << i << " " << ops[i]->Type(); ops[i]->Run(); #ifdef PADDLE_MOBILE_PROFILE diff --git a/src/ios_io/PaddleMobile.h b/src/ios_io/PaddleMobileCPU.h similarity index 55% rename from src/ios_io/PaddleMobile.h rename to src/ios_io/PaddleMobileCPU.h index 5854c5c3a4d4c899feb88822b2f7993860d1ed76..c68d81f328f4ce9a9bf16624f677b2996644c35c 100644 --- a/src/ios_io/PaddleMobile.h +++ b/src/ios_io/PaddleMobileCPU.h @@ -17,7 +17,17 @@ #import #import -@interface PaddleMobile : NSObject +@interface PaddleMobileCPUResult: NSObject + +@property (assign, nonatomic, readonly) float *output; + +@property (assign, nonatomic, readonly) int outputSize; + +-(void)releaseOutput; + +@end + +@interface PaddleMobileCPU : NSObject /* 创建对象 @@ -34,13 +44,36 @@ */ - (BOOL)load:(NSString *)modelAndWeightPath; +/* + * 从内存中加载模型 + * */ +- (BOOL)LoadCombinedMemory:(size_t)modelLen + andModelBuf:(const uint8_t *)modelBuf + andModelParamsLen:(size_t)combinedParamsLen + andCombinedParamsBuf:(const uint8_t *)combinedParamsBuf; + +/* + * 对图像进行预处理, 需要外部开辟 output 内存, 外部释放 output 内存 + * */ +-(void)preprocess:(CGImageRef)image + output:(float *)output + means:(NSArray *)means + scale:(float)scale + dim:(NSArray *)dim; + +/* + * 预测预处理后的数据, 返回结果使用结束需要调用其 realseOutput 函数进行释放 + * */ +- (PaddleMobileCPUResult *)predictInput:(float *)input + dim:(NSArray *)dim; + /* 进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict */ - (NSArray *)predict:(CGImageRef)image dim:(NSArray *)dim means:(NSArray *)means scale:(float)scale; /* - 进行预测 + 进行预测, 默认 means 为 0, scale 为 1.0 */ - (NSArray *)predict:(CGImageRef)image dim:(NSArray *)dim; diff --git a/src/ios_io/PaddleMobile.mm b/src/ios_io/PaddleMobileCPU.mm similarity index 55% rename from src/ios_io/PaddleMobile.mm rename to src/ios_io/PaddleMobileCPU.mm index 5c7b801be0ea7967ea0c94813325d41071bb890b..5a21418ef5fa9cbf7b24436cb778fc8c6c164e16 100644 --- a/src/ios_io/PaddleMobile.mm +++ b/src/ios_io/PaddleMobileCPU.mm @@ -12,24 +12,51 @@ See the License for the specific language governing permissions and limitations under the License. */ -#import "PaddleMobile.h" +#import "PaddleMobileCPU.h" #import "op_symbols.h" +#include "framework/tensor.h" #import "io/paddle_mobile.h" #import #import -@interface PaddleMobile() + +@interface PaddleMobileCPUResult() + +-(void)toSetOutput:(float *)output; + +-(void)toSetOutputSize:(int)outputSize; + +@end + +@implementation PaddleMobileCPUResult + +-(void)releaseOutput { + delete [] _output; + _output = nil; + _outputSize = 0; +} + +-(void)toSetOutput:(float *)output { + _output = output; +} + +-(void)toSetOutputSize:(int)outputSize { + _outputSize = outputSize; +} + +@end + + +@interface PaddleMobileCPU() { paddle_mobile::PaddleMobile *pam_; BOOL loaded_; - std::vector *predict_input_; - } @end -@implementation PaddleMobile +@implementation PaddleMobileCPU static std::mutex shared_mutex; @@ -66,6 +93,14 @@ static std::mutex shared_mutex; } } +- (BOOL)LoadCombinedMemory:(size_t)modelLen + andModelBuf:(const uint8_t *)modelBuf + andModelParamsLen:(size_t)combinedParamsLen + andCombinedParamsBuf:(const uint8_t *)combinedParamsBuf { + pam_->SetThreadNum(2); + return loaded_ = pam_->LoadCombinedMemory(modelLen, modelBuf, combinedParamsLen, combinedParamsBuf); +} + - (BOOL)load:(NSString *)modelAndWeightPath{ std::string model_path_str = std::string([modelAndWeightPath UTF8String]); if (loaded_ = pam_->Load(model_path_str)) { @@ -75,6 +110,57 @@ static std::mutex shared_mutex; } } + +-(void)preprocess:(CGImageRef)image + output:(float *)output + means:(NSArray *)means + scale:(float)scale + dim:(NSArray *)dim { + std::lock_guard lock(shared_mutex); + + // dim to c++ vector, get numel + std::vector dim_vec; + int numel = 1; + for (int k = 0; k < dim.count; ++k) { + int d = dim[k].intValue; + numel *= d; + dim_vec.push_back(d); + } + + const int sourceRowBytes = CGImageGetBytesPerRow(image); + const int imageWidth = CGImageGetWidth(image); + const int imageHeight = CGImageGetHeight(image); + const int imageChannels = 4; + CGDataProviderRef provider = CGImageGetDataProvider(image); + CFDataRef cfData = CGDataProviderCopyData(provider); + const UInt8 *input = CFDataGetBytePtr(cfData); + + int wanted_input_width = dim_vec[3]; + int wanted_input_height = dim_vec[2]; + int wanted_input_channels = dim_vec[1]; + + for (int c = 0; c < wanted_input_channels; ++c) { + float *out_channel = output + c * wanted_input_height * wanted_input_width; + for (int y = 0; y < wanted_input_height; ++y) { + float *out_row = out_channel + y * wanted_input_width; + for (int x = 0; x < wanted_input_width; ++x) { + int in_row = (y * imageHeight) / wanted_input_height; + int in_col = (x * imageWidth) / wanted_input_width; + const UInt8 *in_pixel = input + (in_row * imageWidth * imageChannels) + (in_col * imageChannels); + float *out_pos = out_row + x; + if (c == 0) { + *out_pos = (in_pixel[c] - means[c].floatValue) * scale; + }else if (c == 1){ + *out_pos = (in_pixel[c] - means[c].floatValue) * scale; + }else if (c == 2){ + *out_pos = (in_pixel[c] - means[c].floatValue) * scale; + } + } + } + } + +} + -(void)preprocess:(const UInt8 *)input output:(float *)output imageWidth:(int)imageWidth imageHeight:(int)imageHeight imageChannels:(int)imageChannels means:(NSArray *)means scale:(float)scale dim:(std::vector)dim{ if (means == nil) { means = @[@0, @0, @0]; @@ -105,27 +191,54 @@ static std::mutex shared_mutex; } } -- (NSArray *)predict:(CGImageRef)image dim:(NSArray *)dim means:(NSArray *)means scale:(float)scale{ -// printf(" hi i am here"); - if (predict_input_) { -// printf(" fukc -- "); -// printf(" %d \n", predict_input_->size()); - // dim to c++ vector, get numel - std::vector dim_vec = {1, 3, 300, 300}; -// int numel = 1; -// for (int k = 0; k < dim.count; ++k) { -// int d = dim[k].intValue; -// numel *= d; -// dim_vec.push_back(d); -// } - - - std::vector cpp_result = pam_->Predict(*predict_input_, dim_vec); +- (PaddleMobileCPUResult *)predictInput:(float *)input + dim:(NSArray *)dim { + std::lock_guard lock(shared_mutex); + if (!loaded_) { + printf("PaddleMobile doesn't be loaded yet"); + return nil; + } + + if (dim.count != 4) { + printf("dim must have 4 elements"); return nil; } -// printf(" predict one "); -// std::lock_guard lock(shared_mutex); + // dim to c++ vector, get numel + std::vector dim_vec; + int numel = 1; + for (int k = 0; k < dim.count; ++k) { + int d = dim[k].intValue; + numel *= d; + dim_vec.push_back(d); + } + + paddle_mobile::framework::Tensor input_tensor; + + paddle_mobile::framework::DDim dims = paddle_mobile::framework::make_ddim(dim_vec); + + float *input_ptr = input_tensor.mutable_data(dims); + + memcpy(input_ptr, input, + numel * sizeof(float)); + + std::shared_ptr output = pam_->Predict(input_tensor); + + float *output_pointer = new float[output->numel()]; + + memcpy(output_pointer, output->data(), + output->numel() * sizeof(float)); + + PaddleMobileCPUResult *cpuResult = [[PaddleMobileCPUResult alloc] init]; + [cpuResult toSetOutput: output_pointer]; + [cpuResult toSetOutputSize: output->numel()]; + + return cpuResult; +} + +- (NSArray *)predict:(CGImageRef)image dim:(NSArray *)dim means:(NSArray *)means scale:(float)scale{ +// printf(" predict one "); + std::lock_guard lock(shared_mutex); if (!loaded_) { printf("PaddleMobile doesn't be loaded yet"); return nil; @@ -164,15 +277,13 @@ static std::mutex shared_mutex; } // input - std::vector *predict_input = new std::vector(); + std::vector predict_input; for (int j = 0; j < numel; ++j) { - predict_input->push_back(dataPointer[j]); + predict_input.push_back(dataPointer[j]); } - predict_input_ = predict_input; - // predict - std::vector cpp_result = pam_->Predict(*predict_input, dim_vec); + std::vector cpp_result = pam_->Predict(predict_input, dim_vec); // result long count = 0; diff --git a/src/ios_io/op_symbols.h b/src/ios_io/op_symbols.h index 0fe1137278d19ab4c9c9aaecf2db108e4a184993..af0401c15ab28b0baa0cdbffb16a46215a26953e 100644 --- a/src/ios_io/op_symbols.h +++ b/src/ios_io/op_symbols.h @@ -15,27 +15,46 @@ #pragma once #include "operators/batchnorm_op.h" +#include "operators/bilinear_interp_op.h" #include "operators/box_coder_op.h" #include "operators/concat_op.h" #include "operators/conv_op.h" +#include "operators/conv_transpose_op.h" +#include "operators/crf_op.h" #include "operators/depthwise_conv_op.h" #include "operators/dropout_op.h" #include "operators/elementwise_add_op.h" #include "operators/feed_op.h" #include "operators/fetch_op.h" +#include "operators/flatten_op.h" #include "operators/fusion_conv_add.h" +#include "operators/fusion_conv_add_add_prelu_op.h" +#include "operators/fusion_conv_add_bn_op.h" #include "operators/fusion_conv_add_bn_relu_op.h" +#include "operators/fusion_conv_add_prelu_op.h" +#include "operators/fusion_conv_add_relu_op.h" +#include "operators/fusion_conv_bn_add_relu_op.h" #include "operators/fusion_conv_bn_relu_op.h" #include "operators/fusion_dwconv_bn_relu_op.h" +#include "operators/fusion_elementwise_add_relu_op.h" #include "operators/fusion_fc_op.h" +#include "operators/fusion_fc_relu_op.h" +#include "operators/gru_op.h" #include "operators/im2sequence_op.h" +#include "operators/lookup_op.h" #include "operators/lrn_op.h" #include "operators/mul_op.h" #include "operators/multiclass_nms_op.h" #include "operators/pool_op.h" +#include "operators/prelu_op.h" #include "operators/prior_box_op.h" #include "operators/relu_op.h" #include "operators/reshape_op.h" +#include "operators/resize_op.h" +#include "operators/scale_op.h" +#include "operators/shape_op.h" #include "operators/sigmoid_op.h" +#include "operators/slice_op.h" #include "operators/softmax_op.h" +#include "operators/split_op.h" #include "operators/transpose_op.h" diff --git a/src/operators/feed_op.h b/src/operators/feed_op.h index b0585d5e8377fbd4a9bef46a9637c608b3ca4e37..cccd4f52ebdc368e4f68eaf9dc3f25ee3693fdd2 100644 --- a/src/operators/feed_op.h +++ b/src/operators/feed_op.h @@ -53,9 +53,9 @@ class FeedOp : public framework::OperatorBase { auto input_ptr = input->data(); fpga::format_image(input); Tensor *output = param_.Out(); - auto output_ptr = output->data(); + auto output_ptr = output->data(); - fpga::BypassArgs args; + fpga::BypassArgs args = {fpga::DATA_TYPE_FP32}; args.input_data_type = fpga::DATA_TYPE_FP32; args.output_data_type = fpga::DATA_TYPE_FP16; diff --git a/src/operators/kernel/fpga/concat_kernel.cpp b/src/operators/kernel/fpga/concat_kernel.cpp index 9de1511746f70c225e2d978a43b43cb34ad9143f..86da2833ed6e1443707054896127e87c0ca297b9 100644 --- a/src/operators/kernel/fpga/concat_kernel.cpp +++ b/src/operators/kernel/fpga/concat_kernel.cpp @@ -43,7 +43,7 @@ bool ConcatKernel::Init(ConcatParam *param) { fpga::format_concat_output(out, (int)height, (int)width, (int)image_num, channel_num); - fpga::ConcatArgs concatArgs; + fpga::ConcatArgs concatArgs = {0}; concatArgs.image_num = (uint32_t)image_num; concatArgs.images_in = images_in; concatArgs.scales_in = scales_in; diff --git a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp index 21a03bcc3aca243ce3f66bcda6119b63a742560a..671df76967b4537d111695cdbe091b9c7de2c5a2 100644 --- a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp +++ b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp @@ -66,7 +66,7 @@ bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_fp16_ofm(out); - fpga::WrapperConvArgs conv_arg; + fpga::WrapperConvArgs conv_arg = {0}; fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, param->Groups(), param->Strides()[0], param->Strides()[1], param->Paddings()[0], param->Paddings()[1], bs_ptr); diff --git a/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp index 749e61f45d2865b7dd87be44339a4336a987f636..d435692db6b40568afc599733c2adb6b05b00ffa 100644 --- a/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp @@ -64,7 +64,7 @@ bool ConvAddBNReluKernel::Init( fpga::format_fp16_ofm(out); - fpga::WrapperConvArgs conv_arg; + fpga::WrapperConvArgs conv_arg = {0}; fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, param->Groups(), param->Strides()[0], param->Strides()[1], param->Paddings()[0], param->Paddings()[1], bs_ptr); diff --git a/src/operators/kernel/fpga/conv_add_relu_kernel.cpp b/src/operators/kernel/fpga/conv_add_relu_kernel.cpp index 2570b80857d8b1d0c98828e0197ffe37afcf749f..32d90b36e4c14a60219a3779da03100651aa2f13 100644 --- a/src/operators/kernel/fpga/conv_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/conv_add_relu_kernel.cpp @@ -46,7 +46,7 @@ bool ConvAddReluKernel::Init(FusionConvAddReluParam *param) { fpga::format_fp16_ofm(out); - fpga::WrapperConvArgs conv_arg; + fpga::WrapperConvArgs conv_arg = {0}; fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, param->Groups(), param->Strides()[0], param->Strides()[1], param->Paddings()[0], param->Paddings()[1], bs_ptr); diff --git a/src/operators/kernel/fpga/conv_bn_kernel.cpp b/src/operators/kernel/fpga/conv_bn_kernel.cpp index 34954fd6d4a573321ef34b5c09567d90b4fc9022..4263c9c40491366813d3c9a5bf7dbc8ae976d39e 100644 --- a/src/operators/kernel/fpga/conv_bn_kernel.cpp +++ b/src/operators/kernel/fpga/conv_bn_kernel.cpp @@ -58,7 +58,7 @@ bool ConvBNKernel::Init(FusionConvBNParam *param) { fpga::format_fp16_ofm(out); - fpga::WrapperConvArgs conv_arg; + fpga::WrapperConvArgs conv_arg = {0}; fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, param->Groups(), param->Strides()[0], param->Strides()[1], param->Paddings()[0], param->Paddings()[1], bs_ptr); diff --git a/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp b/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp index 04d6892e3f4e526a5baa13cd86f8b2a4fe1de176..3d6e0faa5fe3d4ef3514bbe1679298b11d96727c 100644 --- a/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp @@ -58,7 +58,7 @@ bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { fpga::format_fp16_ofm(out); - fpga::WrapperConvArgs conv_arg; + fpga::WrapperConvArgs conv_arg = {0}; fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, param->Groups(), param->Strides()[0], param->Strides()[1], param->Paddings()[0], param->Paddings()[1], bs_ptr); diff --git a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp index a3314c1b2c2c3a3a79e582fe4c79d34f6eb5b47c..f0d8533641941fe43a6d06b49266ac06646a7b4d 100644 --- a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp @@ -30,7 +30,7 @@ bool ElementwiseAddReluKernel::Init( fpga::format_fp16_ofm(out); auto out_ptr = out->mutable_data(); - fpga::EWAddArgs ewaddArgs; + fpga::EWAddArgs ewaddArgs = {0}; ewaddArgs.relu_enabled = relu_enabled; ewaddArgs.const0 = 1; ewaddArgs.const1 = 1; diff --git a/src/operators/kernel/fpga/fc_relu_kernel.cpp b/src/operators/kernel/fpga/fc_relu_kernel.cpp index 64aa255ff2308c91a71c6e6f018d2ba435f243df..38b39f982ce41c7d5a88b82f21e446b05c859a2c 100644 --- a/src/operators/kernel/fpga/fc_relu_kernel.cpp +++ b/src/operators/kernel/fpga/fc_relu_kernel.cpp @@ -51,7 +51,7 @@ bool FusionFcReluKernel::Init(FusionFcReluParam *param) { fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_fp16_ofm(out); - fpga::WrapperConvArgs conv_arg; + fpga::WrapperConvArgs conv_arg = {0}; fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0, 0, bs_ptr); param->SetFpgaArgs(conv_arg); diff --git a/src/operators/kernel/fpga/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/fusion_fc_kernel.cpp index 5930f3d4115a469a9e5515b007be090de7d0219c..6dee8ea6a7e1b26bec4ffd3ed324db4a4ac3be2d 100644 --- a/src/operators/kernel/fpga/fusion_fc_kernel.cpp +++ b/src/operators/kernel/fpga/fusion_fc_kernel.cpp @@ -52,7 +52,7 @@ bool FusionFcKernel::Init(FusionFcParam *param) { fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_fp16_ofm(out); - fpga::WrapperConvArgs conv_arg; + fpga::WrapperConvArgs conv_arg = {0}; fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0, 0, bs_ptr); param->SetFpgaArgs(conv_arg); diff --git a/src/operators/kernel/fpga/pool_kernel.cpp b/src/operators/kernel/fpga/pool_kernel.cpp index 96599f3059f45fc8163e979da613360ab2f8c298..4dad2f789baeb6e381c66ed861b8a8360fa2996e 100644 --- a/src/operators/kernel/fpga/pool_kernel.cpp +++ b/src/operators/kernel/fpga/pool_kernel.cpp @@ -30,7 +30,7 @@ bool PoolKernel::Init(PoolParam *param) { vector strides = param->Strides(); vector paddings = param->Paddings(); - fpga::PoolingArgs poolArgs; + fpga::PoolingArgs poolArgs = {0}; poolArgs.image.address = input_ptr; poolArgs.image.channels = (uint32_t)input->dims()[1]; poolArgs.image.height = (uint32_t)input->dims()[2]; diff --git a/src/operators/kernel/fpga/softmax_kernel.cpp b/src/operators/kernel/fpga/softmax_kernel.cpp index 0bc874c570248533447521d746dc653fc0e17114..7cfd0c7d76c1a8e73955dbec1971d86ceebde259 100644 --- a/src/operators/kernel/fpga/softmax_kernel.cpp +++ b/src/operators/kernel/fpga/softmax_kernel.cpp @@ -26,10 +26,11 @@ template <> bool SoftmaxKernel::Init(SoftmaxParam *param) { auto input = const_cast(param->InputX()); auto input_ptr = input->data(); - auto float_input = new Tensor(*input); + auto float_input = new Tensor; + float_input->mutable_data(input->dims()); fpga::format_fp32_ofm(float_input); - fpga::BypassArgs args; + fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; args.input_layout_type = fpga::LAYOUT_HWC; args.output_layout_type = fpga::LAYOUT_CHW; args.input_data_type = fpga::DATA_TYPE_FP16; diff --git a/src/operators/op_param.h b/src/operators/op_param.h index f11e6c37cd9d1aa3625240619e0e31ea6f4d5a0d..5b53743b75bfe65a9e029e44114b339603388c08 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -341,22 +341,23 @@ class OpParam { } }; -#ifdef CONV_OP template -class ConvParam : OpParam { +class ConvParam : public OpParam { typedef typename DtypeTensorTrait::gtype GType; typedef typename DtypeTensorTrait::rtype RType; public: ConvParam(const VariableNameMap &inputs, const VariableNameMap &outputs, const AttributeMap &attrs, const Scope &scope) { - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_ = OutputFrom(outputs, scope); - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); + filter_ = OpParam::FilterFrom(inputs, scope); + input_ = OpParam::InputFrom(inputs, scope); + if (outputs.count("Output")) { + output_ = OpParam::OutputFrom(outputs, scope); + } + strides_ = OpParam::GetAttr>("strides", attrs); + paddings_ = OpParam::GetAttr>("paddings", attrs); + dilations_ = OpParam::GetAttr>("dilations", attrs); + groups = OpParam::GetAttr("groups", attrs); } const RType *Input() const { return input_; } @@ -384,7 +385,6 @@ class ConvParam : OpParam { }; template Print &operator<<(Print &printer, const ConvParam &conv_param); -#endif template class ElementwiseAddParam : OpParam { @@ -1294,52 +1294,29 @@ using FusionFcReluParam = FusionFcParam; #endif template -class FusionConvAddParam : public OpParam { +class FusionConvAddParam : public ConvParam { typedef typename DtypeTensorTrait::gtype GType; typedef typename DtypeTensorTrait::rtype RType; public: FusionConvAddParam(const VariableNameMap &inputs, const VariableNameMap &outputs, const AttributeMap &attrs, - const Scope &scope) { - bias_ = InputYFrom(inputs, scope); - axis_ = GetAttr("axis", attrs); - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_ = OutFrom(outputs, scope); - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); + const Scope &scope) + : ConvParam(inputs, outputs, attrs, scope) { + bias_ = OpParam::InputYFrom(inputs, scope); + axis_ = OpParam::GetAttr("axis", attrs); + output_ = OpParam::OutFrom(outputs, scope); } RType *Bias() const { return bias_; } const int &Axis() const { return axis_; } - const RType *Input() const { return input_; } - - const RType *Filter() const { return filter_; } - RType *Output() const { return output_; } - const vector &Strides() const { return strides_; } - - const vector &Paddings() const { return paddings_; } - - const vector &Dilations() const { return dilations_; } - - const int &Groups() const { return groups; } - protected: RType *bias_; int axis_; - RType *input_; RType *output_; - RType *filter_; - vector strides_; - vector paddings_; - vector dilations_; - int groups; #ifdef PADDLE_MOBILE_FPGA private: @@ -1366,58 +1343,33 @@ class FusionConvAddReluParam : public FusionConvAddParam { #endif #ifdef FUSION_CONVADDPRELU_OP -template -class FusionConvAddPReluParam : public OpParam { - typedef typename DtypeTensorTrait::gtype GType; - typedef typename DtypeTensorTrait::rtype RType; +template +class FusionConvAddPReluParam : public ConvParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; public: FusionConvAddPReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs, - const AttributeMap &attrs, const Scope &scope) { - alpha_ = InputAlphaFrom(inputs, scope); - mode_ = GetAttr("mode", attrs); + const AttributeMap &attrs, const Scope &scope) + : ConvParam(inputs, outputs, attrs, scope) { + alpha_ = OpParam::InputAlphaFrom(inputs, scope); + mode_ = OpParam::GetAttr("mode", attrs); framework::DDim dims = alpha_->dims(); - bias_ = InputYFrom(inputs, scope); - axis_ = GetAttr("axis", attrs); - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_ = OutFrom(outputs, scope); - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); + bias_ = OpParam::InputYFrom(inputs, scope); + axis_ = OpParam::GetAttr("axis", attrs); + output_ = OpParam::OutFrom(outputs, scope); } const RType *InputAlpha() const { return alpha_; } const std::string &Mode() const { return mode_; } RType *Bias() const { return bias_; } - const int &Axis() const { return axis_; } - - const RType *Input() const { return input_; } - - const RType *Filter() const { return filter_; } - RType *Output() const { return output_; } - const vector &Strides() const { return strides_; } - - const vector &Paddings() const { return paddings_; } - - const vector &Dilations() const { return dilations_; } - - const int &Groups() const { return groups; } - protected: RType *bias_; int axis_; - RType *input_; RType *output_; - RType *filter_; - vector strides_; - vector paddings_; - vector dilations_; - int groups; RType *alpha_; std::string mode_; #ifdef PADDLE_MOBILE_FPGA @@ -1433,35 +1385,30 @@ class FusionConvAddPReluParam : public OpParam { #endif #ifdef FUSION_CONVADDADDPRELU_OP -template -class FusionConvAddAddPReluParam : public OpParam { - typedef typename DtypeTensorTrait::gtype GType; - typedef typename DtypeTensorTrait::rtype RType; +template +class FusionConvAddAddPReluParam : public ConvParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; public: FusionConvAddAddPReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs, - const AttributeMap &attrs, const Scope &scope) { - bias1_ = InputYFrom1(inputs, scope); - alpha_ = InputAlphaFrom(inputs, scope); - mode_ = GetAttr("mode", attrs); + const AttributeMap &attrs, const Scope &scope) + : ConvParam(inputs, outputs, attrs, scope) { + bias1_ = OpParam::InputYFrom1(inputs, scope); + alpha_ = OpParam::InputAlphaFrom(inputs, scope); + mode_ = OpParam::GetAttr("mode", attrs); framework::DDim dims = alpha_->dims(); - bias_ = InputYFrom(inputs, scope); - axis_ = GetAttr("axis", attrs); - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_ = OutFrom(outputs, scope); - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); - keyOutput_ = getkey("addOut", inputs, 0); - keyX1_ = getkey("addX", inputs, 1); - keyY1_ = getkey("Y", inputs, 1); + bias_ = OpParam::InputYFrom(inputs, scope); + output_ = OpParam::OutFrom(outputs, scope); + axis_ = OpParam::GetAttr("axis", attrs); + keyOutput_ = OpParam::getkey("addOut", inputs, 0); + keyX1_ = OpParam::getkey("addX", inputs, 1); + keyY1_ = OpParam::getkey("Y", inputs, 1); if (keyX1_ == keyOutput_) { - bias1_ = InputYFrom1(inputs, scope); + bias1_ = OpParam::InputYFrom1(inputs, scope); } else if (keyY1_ == keyOutput_) { - bias1_ = InputXFrom1(inputs, scope); + bias1_ = OpParam::InputXFrom1(inputs, scope); } } const RType *InputAlpha() const { return alpha_; } @@ -1471,31 +1418,12 @@ class FusionConvAddAddPReluParam : public OpParam { RType *Bias() const { return bias_; } const int &Axis() const { return axis_; } - - const RType *Input() const { return input_; } - - const RType *Filter() const { return filter_; } - RType *Output() const { return output_; } - const vector &Strides() const { return strides_; } - - const vector &Paddings() const { return paddings_; } - - const vector &Dilations() const { return dilations_; } - - const int &Groups() const { return groups; } - protected: RType *bias_; int axis_; - RType *input_; RType *output_; - RType *filter_; - vector strides_; - vector paddings_; - vector dilations_; - int groups; RType *alpha_; std::string mode_; RType *bias1_; @@ -1516,49 +1444,32 @@ class FusionConvAddAddPReluParam : public OpParam { #ifdef FUSION_CONVADDBNRELU_OP template -class FusionConvAddBNReluParam : public OpParam { +class FusionConvAddBNReluParam : public ConvParam { typedef typename DtypeTensorTrait::gtype GType; typedef typename DtypeTensorTrait::rtype RType; public: FusionConvAddBNReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs, - const AttributeMap &attrs, const Scope &scope) { - bias_ = InputYFrom(inputs, scope); - axis_ = GetAttr("axis", attrs); - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_ = OutFrom(outputs, scope); - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); - input_bias_ = InputBiasFrom(inputs, scope); - input_mean_ = InputMeanFrom(inputs, scope); - input_scale_ = InputScaleFrom(inputs, scope); - input_variance_ = InputVarianceFrom(inputs, scope); - epsilon_ = GetAttr("epsilon", attrs); - momentum_ = GetAttr("momentum", attrs); - // is_test_ = GetAttr("is_test", attrs); + const AttributeMap &attrs, const Scope &scope) + : ConvParam(inputs, outputs, attrs, scope) { + bias_ = OpParam::InputYFrom(inputs, scope); + axis_ = OpParam::GetAttr("axis", attrs); + output_ = OpParam::OutFrom(outputs, scope); + input_bias_ = OpParam::InputBiasFrom(inputs, scope); + input_mean_ = OpParam::InputMeanFrom(inputs, scope); + input_scale_ = OpParam::InputScaleFrom(inputs, scope); + input_variance_ = OpParam::InputVarianceFrom(inputs, scope); + epsilon_ = OpParam::GetAttr("epsilon", attrs); + momentum_ = OpParam::GetAttr("momentum", attrs); + // is_test_ = OpParam::GetAttr("is_test", attrs); } RType *Bias() const { return bias_; } const int &Axis() const { return axis_; } - const RType *Input() const { return input_; } - - const RType *Filter() const { return filter_; } - RType *Output() const { return output_; } - const vector &Strides() const { return strides_; } - - const vector &Paddings() const { return paddings_; } - - const vector &Dilations() const { return dilations_; } - - const int &Groups() const { return groups; } - const RType *InputBias() const { return input_bias_; } const RType *InputMean() const { return input_mean_; } @@ -1584,13 +1495,7 @@ class FusionConvAddBNReluParam : public OpParam { protected: RType *bias_; int axis_; - RType *input_; RType *output_; - RType *filter_; - vector strides_; - vector paddings_; - vector dilations_; - int groups; RType *input_bias_; RType *input_mean_; RType *input_scale_; @@ -1614,57 +1519,40 @@ class FusionConvAddBNReluParam : public OpParam { #ifdef FUSION_CONVBNADDRELU_OP template -class FusionConvBNAddReluParam : public OpParam { +class FusionConvBNAddReluParam : public ConvParam { typedef typename DtypeTensorTrait::gtype GType; typedef typename DtypeTensorTrait::rtype RType; public: FusionConvBNAddReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs, - const AttributeMap &attrs, const Scope &scope) { - bias_ = InputYFrom(inputs, scope); - axis_ = GetAttr("axis", attrs); - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_ = OutFrom(outputs, scope); - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); - input_bias_ = InputBiasFrom(inputs, scope); - input_mean_ = InputMeanFrom(inputs, scope); - input_scale_ = InputScaleFrom(inputs, scope); - input_variance_ = InputVarianceFrom(inputs, scope); - epsilon_ = GetAttr("epsilon", attrs); - momentum_ = GetAttr("momentum", attrs); - keyBNY_ = getkey("BNY", inputs, 0); - keyX_ = getkey("X", inputs, 0); - keyY_ = getkey("Y", inputs, 0); + const AttributeMap &attrs, const Scope &scope) + : ConvParam(inputs, outputs, attrs, scope) { + bias_ = OpParam::InputYFrom(inputs, scope); + axis_ = OpParam::GetAttr("axis", attrs); + output_ = OpParam::OutFrom(outputs, scope); + input_bias_ = OpParam::InputBiasFrom(inputs, scope); + input_mean_ = OpParam::InputMeanFrom(inputs, scope); + input_scale_ = OpParam::InputScaleFrom(inputs, scope); + input_variance_ = OpParam::InputVarianceFrom(inputs, scope); + epsilon_ = OpParam::GetAttr("epsilon", attrs); + momentum_ = OpParam::GetAttr("momentum", attrs); + keyBNY_ = OpParam::getkey("BNY", inputs, 0); + keyX_ = OpParam::getkey("X", inputs, 0); + keyY_ = OpParam::getkey("Y", inputs, 0); if (keyX_ == keyBNY_) { - bias_ = InputYFrom(inputs, scope); + bias_ = OpParam::InputYFrom(inputs, scope); } else if (keyY_ == keyBNY_) { - bias_ = InputXFrom(inputs, scope); + bias_ = OpParam::InputXFrom(inputs, scope); } - // is_test_ = GetAttr("is_test", attrs); + // is_test_ = OpParam::GetAttr("is_test", attrs); } RType *Bias() const { return bias_; } const int &Axis() const { return axis_; } - const RType *Input() const { return input_; } - - const RType *Filter() const { return filter_; } - RType *Output() const { return output_; } - const vector &Strides() const { return strides_; } - - const vector &Paddings() const { return paddings_; } - - const vector &Dilations() const { return dilations_; } - - const int &Groups() const { return groups; } - const RType *InputBias() const { return input_bias_; } const RType *InputMean() const { return input_mean_; } @@ -1690,13 +1578,7 @@ class FusionConvBNAddReluParam : public OpParam { protected: RType *bias_; int axis_; - RType *input_; RType *output_; - RType *filter_; - vector strides_; - vector paddings_; - vector dilations_; - int groups; RType *input_bias_; RType *input_mean_; RType *input_scale_; @@ -1723,44 +1605,26 @@ class FusionConvBNAddReluParam : public OpParam { #ifdef FUSION_CONVBN_OP template -class FusionConvBNParam : public OpParam { +class FusionConvBNParam : public ConvParam { typedef typename DtypeTensorTrait::gtype GType; typedef typename DtypeTensorTrait::rtype RType; public: FusionConvBNParam(const VariableNameMap &inputs, const VariableNameMap &outputs, const AttributeMap &attrs, - const Scope &scope) { - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_y_ = OutputYFrom(outputs, scope); - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); - input_bias_ = InputBiasFrom(inputs, scope); - input_mean_ = InputMeanFrom(inputs, scope); - input_scale_ = InputScaleFrom(inputs, scope); - input_variance_ = InputVarianceFrom(inputs, scope); - epsilon_ = GetAttr("epsilon", attrs); - momentum_ = GetAttr("momentum", attrs); - // is_test_ = GetAttr("is_test", attrs); + const Scope &scope) + : ConvParam(inputs, outputs, attrs, scope) { + output_y_ = OpParam::OutputYFrom(outputs, scope); + input_bias_ = OpParam::InputBiasFrom(inputs, scope); + input_mean_ = OpParam::InputMeanFrom(inputs, scope); + input_scale_ = OpParam::InputScaleFrom(inputs, scope); + input_variance_ = OpParam::InputVarianceFrom(inputs, scope); + epsilon_ = OpParam::GetAttr("epsilon", attrs); + momentum_ = OpParam::GetAttr("momentum", attrs); + // is_test_ = OpParam::GetAttr("is_test", attrs); } - - const RType *Input() const { return input_; } - - const RType *Filter() const { return filter_; } - RType *Output() const { return output_y_; } - const vector &Strides() const { return strides_; } - - const vector &Paddings() const { return paddings_; } - - const vector &Dilations() const { return dilations_; } - - const int &Groups() const { return groups; } - const RType *InputBias() const { return input_bias_; } const RType *InputMean() const { return input_mean_; } @@ -1784,13 +1648,7 @@ class FusionConvBNParam : public OpParam { const RType *NewBias() const { return new_bias_; } protected: - RType *input_; RType *output_y_; - RType *filter_; - vector strides_; - vector paddings_; - vector dilations_; - int groups; RType *input_bias_; RType *input_mean_; RType *input_scale_; @@ -1814,49 +1672,32 @@ class FusionConvBNParam : public OpParam { #ifdef FUSION_CONVADDBN_OP template -class FusionConvAddBNParam : public OpParam { +class FusionConvAddBNParam : public ConvParam { typedef typename DtypeTensorTrait::gtype GType; typedef typename DtypeTensorTrait::rtype RType; public: FusionConvAddBNParam(const VariableNameMap &inputs, const VariableNameMap &outputs, - const AttributeMap &attrs, const Scope &scope) { - bias_ = InputYFrom(inputs, scope); - axis_ = GetAttr("axis", attrs); - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_y_ = OutputYFrom(outputs, scope); - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); - input_bias_ = InputBiasFrom(inputs, scope); - input_mean_ = InputMeanFrom(inputs, scope); - input_scale_ = InputScaleFrom(inputs, scope); - input_variance_ = InputVarianceFrom(inputs, scope); - epsilon_ = GetAttr("epsilon", attrs); - momentum_ = GetAttr("momentum", attrs); - // is_test_ = GetAttr("is_test", attrs); + const AttributeMap &attrs, const Scope &scope) + : ConvParam(inputs, outputs, attrs, scope) { + bias_ = OpParam::InputYFrom(inputs, scope); + axis_ = OpParam::GetAttr("axis", attrs); + output_y_ = OpParam::OutputYFrom(outputs, scope); + input_bias_ = OpParam::InputBiasFrom(inputs, scope); + input_mean_ = OpParam::InputMeanFrom(inputs, scope); + input_scale_ = OpParam::InputScaleFrom(inputs, scope); + input_variance_ = OpParam::InputVarianceFrom(inputs, scope); + epsilon_ = OpParam::GetAttr("epsilon", attrs); + momentum_ = OpParam::GetAttr("momentum", attrs); + // is_test_ = OpParam::GetAttr("is_test", attrs); } RType *Bias() const { return bias_; } const int &Axis() const { return axis_; } - const RType *Input() const { return input_; } - - const RType *Filter() const { return filter_; } - RType *Output() const { return output_y_; } - const vector &Strides() const { return strides_; } - - const vector &Paddings() const { return paddings_; } - - const vector &Dilations() const { return dilations_; } - - const int &Groups() const { return groups; } - const RType *InputBias() const { return input_bias_; } const RType *InputMean() const { return input_mean_; } @@ -1882,13 +1723,7 @@ class FusionConvAddBNParam : public OpParam { protected: RType *bias_; int axis_; - RType *input_; RType *output_y_; - RType *filter_; - vector strides_; - vector paddings_; - vector dilations_; - int groups; RType *input_bias_; RType *input_mean_; RType *input_scale_; @@ -1912,44 +1747,26 @@ class FusionConvAddBNParam : public OpParam { #ifdef FUSION_DWCONVBNRELU_OP template -class FusionDWConvBNReluParam : public OpParam { +class FusionDWConvBNReluParam : public ConvParam { typedef typename DtypeTensorTrait::gtype GType; typedef typename DtypeTensorTrait::rtype RType; public: FusionDWConvBNReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs, - const AttributeMap &attrs, const Scope &scope) { - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_ = OutFrom(outputs, scope); - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); - input_bias_ = InputBiasFrom(inputs, scope); - input_mean_ = InputMeanFrom(inputs, scope); - input_scale_ = InputScaleFrom(inputs, scope); - input_variance_ = InputVarianceFrom(inputs, scope); - epsilon_ = GetAttr("epsilon", attrs); - momentum_ = GetAttr("momentum", attrs); - // is_test_ = GetAttr("is_test", attrs); + const AttributeMap &attrs, const Scope &scope) + : ConvParam(inputs, outputs, attrs, scope) { + output_ = OpParam::OutFrom(outputs, scope); + input_bias_ = OpParam::InputBiasFrom(inputs, scope); + input_mean_ = OpParam::InputMeanFrom(inputs, scope); + input_scale_ = OpParam::InputScaleFrom(inputs, scope); + input_variance_ = OpParam::InputVarianceFrom(inputs, scope); + epsilon_ = OpParam::GetAttr("epsilon", attrs); + momentum_ = OpParam::GetAttr("momentum", attrs); + // is_test_ = OpParam::GetAttr("is_test", attrs); } - - const RType *Input() const { return input_; } - - const RType *Filter() const { return filter_; } - RType *Output() const { return output_; } - const vector &Strides() const { return strides_; } - - const vector &Paddings() const { return paddings_; } - - const vector &Dilations() const { return dilations_; } - - const int &Groups() const { return groups; } - const RType *InputBias() const { return input_bias_; } const RType *InputMean() const { return input_mean_; } @@ -1973,13 +1790,7 @@ class FusionDWConvBNReluParam : public OpParam { const RType *NewBias() const { return new_bias_; } protected: - RType *input_; RType *output_; - RType *filter_; - vector strides_; - vector paddings_; - vector dilations_; - int groups; RType *input_bias_; RType *input_mean_; RType *input_scale_; @@ -1995,45 +1806,26 @@ class FusionDWConvBNReluParam : public OpParam { #ifdef FUSION_CONVBNRELU_OP template -class FusionConvBNReluParam : public OpParam { +class FusionConvBNReluParam : public ConvParam { typedef typename DtypeTensorTrait::gtype GType; typedef typename DtypeTensorTrait::rtype RType; public: FusionConvBNReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs, - const AttributeMap &attrs, const Scope &scope) { - filter_ = FilterFrom(inputs, scope); - input_ = InputFrom(inputs, scope); - output_ = OutFrom(outputs, scope); - - strides_ = GetAttr>("strides", attrs); - paddings_ = GetAttr>("paddings", attrs); - dilations_ = GetAttr>("dilations", attrs); - groups = GetAttr("groups", attrs); - input_bias_ = InputBiasFrom(inputs, scope); - input_mean_ = InputMeanFrom(inputs, scope); - input_scale_ = InputScaleFrom(inputs, scope); - input_variance_ = InputVarianceFrom(inputs, scope); - epsilon_ = GetAttr("epsilon", attrs); - momentum_ = GetAttr("momentum", attrs); - // is_test_ = GetAttr("is_test", attrs); + const AttributeMap &attrs, const Scope &scope) + : ConvParam(inputs, outputs, attrs, scope) { + output_ = OpParam::OutFrom(outputs, scope); + input_bias_ = OpParam::InputBiasFrom(inputs, scope); + input_mean_ = OpParam::InputMeanFrom(inputs, scope); + input_scale_ = OpParam::InputScaleFrom(inputs, scope); + input_variance_ = OpParam::InputVarianceFrom(inputs, scope); + epsilon_ = OpParam::GetAttr("epsilon", attrs); + momentum_ = OpParam::GetAttr("momentum", attrs); + // is_test_ = OpParam::GetAttr("is_test", attrs); } - - const RType *Input() const { return input_; } - - const RType *Filter() const { return filter_; } - RType *Output() const { return output_; } - const vector &Strides() const { return strides_; } - - const vector &Paddings() const { return paddings_; } - - const vector &Dilations() const { return dilations_; } - - const int &Groups() const { return groups; } - const RType *InputBias() const { return input_bias_; } const RType *InputMean() const { return input_mean_; } @@ -2057,13 +1849,7 @@ class FusionConvBNReluParam : public OpParam { const RType *NewBias() const { return new_bias_; } protected: - RType *input_; RType *output_; - RType *filter_; - vector strides_; - vector paddings_; - vector dilations_; - int groups; RType *input_bias_; RType *input_mean_; RType *input_scale_; diff --git a/src/operators/prelu_op.cpp b/src/operators/prelu_op.cpp index 245154ca5ea6971dee33e14550bf1e090fa0ec71..332b5cc9bbbabf9498858b96e0028a9e3992f3ea 100644 --- a/src/operators/prelu_op.cpp +++ b/src/operators/prelu_op.cpp @@ -34,11 +34,9 @@ void PReluOp::InferShape() const { * */ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU -USE_OP_CPU(prelu); REGISTER_OPERATOR_CPU(prelu, ops::PReluOp); #endif #ifdef PADDLE_MOBILE_MALI_GPU -USE_OP_MALI_GPU(prelu); REGISTER_OPERATOR_MALI_GPU(prelu, ops::PReluOp); #endif #ifdef PADDLE_MOBILE_FPGA diff --git a/src/operators/prelu_op.h b/src/operators/prelu_op.h index af33476b7298a5728a6ef944506d55f422a2fa8c..7b6b778fa6e8f0951faffda6803b25b6b23ea17c 100644 --- a/src/operators/prelu_op.h +++ b/src/operators/prelu_op.h @@ -50,4 +50,14 @@ class PReluOp : public framework::OperatorWithKernel< } // namespace operators } // namespace paddle_mobile +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(prelu); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +USE_OP_MALI_GPU(prelu); +#endif +#ifdef PADDLE_MOBILE_FPGA +USE_OP_FPGA(prelu); +#endif + #endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 44da835e7e031b915fe61f53ef6cf3bc0e12a749..f4dc1421a4cd0f1062e8ad1240caa237e58c9371 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -33,6 +33,27 @@ elseif("FPGAnets" IN_LIST NET) ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-resnet paddle-mobile) + ADD_EXECUTABLE(test-resnet50 fpga/test_resnet50.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-resnet50 paddle-mobile) + + ADD_EXECUTABLE(test-fpga-EW fpga/test_fpga_EW.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-fpga-EW paddle-mobile) + + ADD_EXECUTABLE(test-fpga-conv fpga/test_fpga_conv.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-fpga-conv paddle-mobile) + + ADD_EXECUTABLE(test-fpga-pooling fpga/test_fpga_pooling.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-fpga-pooling paddle-mobile) + + ADD_EXECUTABLE(test-fpga-bypass fpga/test_fpga_bypass.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-fpga-bypass paddle-mobile) + + ADD_EXECUTABLE(test-fpga-softmax fpga/test_fpga_softmax.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-fpga-softmax paddle-mobile) + + ADD_EXECUTABLE(test-fpga-concat fpga/test_fpga_concat.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-fpga-concat paddle-mobile) + ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-tensor-quant paddle-mobile) @@ -242,13 +263,4 @@ else () #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp) - - - - endif() - -# if(FPGA) -# ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h test_include.h executor_for_test.h) -# target_link_libraries(test-tensor-quant paddle-mobile) -# endif() diff --git a/test/fpga/test_resnet50.cpp b/test/fpga/test_resnet50.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cca6793f10da5a0784cf8a3ba2d0104f3508028d --- /dev/null +++ b/test/fpga/test_resnet50.cpp @@ -0,0 +1,39 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "../test_include.h" +static const char *g_resnet_combine = "../models/resnet50"; + +int main() { + DLOG << paddle_mobile::fpga::open_device(); + paddle_mobile::PaddleMobile paddle_mobile; + if (paddle_mobile.Load(std::string(g_resnet_combine) + "/model", + std::string(g_resnet_combine) + "/params", true)) { + std::vector dims{1, 3, 224, 224}; + Tensor input_tensor; + SetupTensor(&input_tensor, {1, 3, 224, 224}, static_cast(0), + static_cast(1)); + + std::vector input(input_tensor.data(), + input_tensor.data() + input_tensor.numel()); + + paddle_mobile.FeedData(input_tensor); + paddle_mobile.Predict_To(-1); + // paddle_mobile.Predict_From(73); + // paddle_mobile.Predict_From_To(72, 73); + + DLOG << "Computation done"; + return 0; + } +}