提交 c1a578f1 编写于 作者: xiebaiyuan's avatar xiebaiyuan

Merge remote-tracking branch 'upstream/develop' into develop

...@@ -84,3 +84,6 @@ SwiftProtobuf.framework ...@@ -84,3 +84,6 @@ SwiftProtobuf.framework
paddle-mobile.xcworkspace paddle-mobile.xcworkspace
metal/models/ metal/models/
metal/images/ metal/images/
tools/libomp.a
\ No newline at end of file
...@@ -44,7 +44,7 @@ if (LOG_PROFILE) ...@@ -44,7 +44,7 @@ if (LOG_PROFILE)
add_definitions(-DPADDLE_MOBILE_PROFILE) add_definitions(-DPADDLE_MOBILE_PROFILE)
endif() endif()
if(USE_OPENMP AND NOT IS_IOS) if(USE_OPENMP)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
add_definitions(-DPADDLE_MOBILE_USE_OPENMP) add_definitions(-DPADDLE_MOBILE_USE_OPENMP)
endif() endif()
...@@ -130,8 +130,8 @@ endif () ...@@ -130,8 +130,8 @@ endif ()
if (IS_IOS) if (IS_IOS)
else() else()
list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobile.h) list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobileCPU.h)
list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobile.mm) list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobileCPU.mm)
list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/op_symbols.h) list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/op_symbols.h)
endif () endif ()
......
...@@ -29,9 +29,7 @@ namespace fpga { ...@@ -29,9 +29,7 @@ namespace fpga {
static int fd = -1; static int fd = -1;
static const char *device_path = "/dev/fpgadrv0"; static const char *device_path = "/dev/fpgadrv0";
#ifdef PADDLE_MOBILE_OS_LINUX
static std::map<void *, size_t> memory_map; static std::map<void *, size_t> memory_map;
#endif
static inline int do_ioctl(int req, const void *arg) { static inline int do_ioctl(int req, const void *arg) {
#ifdef PADDLE_MOBILE_OS_LINUX #ifdef PADDLE_MOBILE_OS_LINUX
...@@ -53,32 +51,38 @@ int open_device() { ...@@ -53,32 +51,38 @@ int open_device() {
// memory management; // memory management;
void *fpga_malloc(size_t size) { void *fpga_malloc(size_t size) {
static uint64_t counter = 0; static uint64_t counter = 0;
counter += size;
DLOG << size << " bytes allocated. Total " << counter << " bytes";
#ifdef PADDLE_MOBILE_OS_LINUX #ifdef PADDLE_MOBILE_OS_LINUX
auto ptr = mmap64(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); auto ptr = mmap64(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
memory_map.insert(std::make_pair(ptr, size));
return ptr;
#else #else
return malloc(size); auto ptr = malloc(size);
#endif #endif
counter += size;
memory_map.insert(std::make_pair(ptr, size));
DLOG << "Address: " << ptr << ", " << size << " bytes allocated. Total "
<< counter << " bytes";
return ptr;
} }
void fpga_free(void *ptr) { void fpga_free(void *ptr) {
#ifdef PADDLE_MOBILE_OS_LINUX
static uint64_t counter = 0; static uint64_t counter = 0;
size_t size = 0; size_t size = 0;
auto iter = memory_map.find(ptr); // std::map<void *, size_t>::iterator auto iter = memory_map.find(ptr); // std::map<void *, size_t>::iterator
if (iter != memory_map.end()) { if (iter != memory_map.end()) {
size = iter->second; size = iter->second;
munmap(ptr, size);
memory_map.erase(iter); memory_map.erase(iter);
} #ifdef PADDLE_MOBILE_OS_LINUX
counter += size; munmap(ptr, size);
DLOG << size << " bytes freed. Total " << counter << " bytes";
#else #else
free(ptr); free(ptr);
#endif #endif
counter += size;
DLOG << "Address: " << ptr << ", " << size << " bytes freed. Total "
<< counter << " bytes";
} else {
DLOG << "Invalid pointer";
}
} }
void fpga_copy(void *dest, const void *src, size_t num) { void fpga_copy(void *dest, const void *src, size_t num) {
...@@ -86,14 +90,14 @@ void fpga_copy(void *dest, const void *src, size_t num) { ...@@ -86,14 +90,14 @@ void fpga_copy(void *dest, const void *src, size_t num) {
} }
int fpga_flush(void *address, size_t size) { int fpga_flush(void *address, size_t size) {
struct MemoryCacheArgs args; struct MemoryCacheArgs args = {nullptr};
args.address = address; args.address = address;
args.size = size; args.size = size;
return do_ioctl(IOCTL_MEMCACHE_FLUSH, &args); return do_ioctl(IOCTL_MEMCACHE_FLUSH, &args);
} }
int fpga_invalidate(void *address, size_t size) { int fpga_invalidate(void *address, size_t size) {
struct MemoryCacheArgs args; struct MemoryCacheArgs args = {nullptr};
args.address = address; args.address = address;
args.size = size; args.size = size;
return do_ioctl(IOCTL_MEMCACHE_INVAL, &args); return do_ioctl(IOCTL_MEMCACHE_INVAL, &args);
...@@ -211,7 +215,8 @@ int PerformBypass(const struct BypassArgs &args) { ...@@ -211,7 +215,8 @@ int PerformBypass(const struct BypassArgs &args) {
int ComputeFPGAConcat(const struct ConcatArgs &args) { int ComputeFPGAConcat(const struct ConcatArgs &args) {
#ifdef FPGA_TEST_MODE #ifdef FPGA_TEST_MODE
DLOG << "=============ComputeFpgaConcat==========="; DLOG << "=============ComputeFpgaConcat===========";
DLOG << " out_address:" << args.image_out DLOG << " Image_num: " << args.image_num
<< " out_address:" << args.image_out
<< " out_scale_address:" << args.scale_out; << " out_scale_address:" << args.scale_out;
DLOG << " image_height:" << args.height << " image_width:" << args.width; DLOG << " image_height:" << args.height << " image_width:" << args.width;
for (int i = 0; i < args.image_num; i++) { for (int i = 0; i < args.image_num; i++) {
...@@ -235,7 +240,7 @@ void format_image(framework::Tensor *image_tensor) { ...@@ -235,7 +240,7 @@ void format_image(framework::Tensor *image_tensor) {
auto channel = dims[1], height = dims[2], width = dims[3]; auto channel = dims[1], height = dims[2], width = dims[3];
auto data_ptr = image_tensor->data<float>(); auto data_ptr = image_tensor->data<float>();
size_t memory_size = channel * height * width * sizeof(float); size_t memory_size = channel * height * width * sizeof(float);
float *new_data = (float *)fpga_malloc(memory_size); auto new_data = (float *)fpga_malloc(memory_size);
fpga_copy(new_data, data_ptr, memory_size); fpga_copy(new_data, data_ptr, memory_size);
image::format_image(&new_data, channel, height, width); image::format_image(&new_data, channel, height, width);
image_tensor->reset_data_ptr(new_data); image_tensor->reset_data_ptr(new_data);
...@@ -332,7 +337,7 @@ void format_concat_output(framework::Tensor *out, int height, int width, ...@@ -332,7 +337,7 @@ void format_concat_output(framework::Tensor *out, int height, int width,
sum_cw = align_to_x(width * sum_channel, IMAGE_ALIGNMENT); sum_cw = align_to_x(width * sum_channel, IMAGE_ALIGNMENT);
auto data_ptr = fpga_malloc(height * sum_cw * sizeof(half)); auto data_ptr = fpga_malloc(height * sum_cw * sizeof(half));
auto ddim = framework::make_ddim({-1, sum_channel, height, width}); auto ddim = framework::make_ddim({1, sum_channel, height, width});
out->Resize(ddim); out->Resize(ddim);
out->reset_data_ptr(data_ptr); out->reset_data_ptr(data_ptr);
} }
...@@ -346,12 +351,12 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input, ...@@ -346,12 +351,12 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input,
auto out_ptr = out->data<float>(); auto out_ptr = out->data<float>();
arg->group_num = (uint32_t)group_num; arg->group_num = (uint32_t)group_num;
arg->split_num = (uint32_t)fpga::get_plit_num(filter); // Either group_num or split_num = 1;
arg->split_num = group_num == 1 ? (uint32_t)get_plit_num(filter) : 1;
arg->filter_num = (uint32_t)filter->dims()[0]; arg->filter_num = (uint32_t)filter->dims()[0];
arg->output.address = out_ptr; arg->output.address = out_ptr;
arg->output.scale_address = out->scale; arg->output.scale_address = out->scale;
arg->conv_args = (fpga::ConvArgs *)fpga::fpga_malloc(arg->split_num * arg->conv_args = (ConvArgs *)fpga_malloc(arg->split_num * sizeof(ConvArgs));
sizeof(fpga::ConvArgs));
arg->concat_arg.image_num = arg->split_num; arg->concat_arg.image_num = arg->split_num;
arg->concat_arg.image_out = out_ptr; arg->concat_arg.image_out = out_ptr;
...@@ -360,15 +365,14 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input, ...@@ -360,15 +365,14 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input,
arg->concat_arg.width = (uint32_t)filter->dims()[3]; arg->concat_arg.width = (uint32_t)filter->dims()[3];
int n = arg->split_num; int n = arg->split_num;
arg->concat_arg.images_in = (half **)fpga::fpga_malloc(n * sizeof(int *)); arg->concat_arg.images_in = (half **)fpga_malloc(n * sizeof(int *));
arg->concat_arg.scales_in = (float **)fpga::fpga_malloc(n * sizeof(float *)); arg->concat_arg.scales_in = (float **)fpga_malloc(n * sizeof(float *));
arg->concat_arg.channel_num = arg->concat_arg.channel_num = (uint32_t *)fpga_malloc(n * sizeof(uint32_t));
(uint32_t *)fpga::fpga_malloc(n * sizeof(uint32_t));
arg->concat_arg.image_out = out_ptr; arg->concat_arg.image_out = out_ptr;
auto channel = (int)out->dims()[1]; auto channel = (int)out->dims()[1];
int filter_num_per_div = fpga::get_filter_num_per_div(filter, group_num); int filter_num_per_div = get_filter_num_per_div(filter, group_num);
int element_num = fpga::get_aligned_filter_element_num( int element_num = get_aligned_filter_element_num(
filter->dims()[1] * filter->dims()[2] * filter->dims()[3]); filter->dims()[1] * filter->dims()[2] * filter->dims()[3]);
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
...@@ -390,16 +394,17 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input, ...@@ -390,16 +394,17 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input,
&((int8_t *)filter_ptr)[i * element_num * filter_num_per_div]; &((int8_t *)filter_ptr)[i * element_num * filter_num_per_div];
arg->conv_args[i].sb_address = &bs_ptr[i * filter_num_per_div * 2]; arg->conv_args[i].sb_address = &bs_ptr[i * filter_num_per_div * 2];
arg->conv_args[i].filter_num = arg->conv_args[i].filter_num =
(uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num( (uint32_t)(i == n - 1 ? channel - (n - 1) * filter_num_per_div
channel - (n - 1) * filter_num_per_div)
: filter_num_per_div); : filter_num_per_div);
if (n > 1) { if (n > 1) {
arg->conv_args[i].output.scale_address = arg->conv_args[i].output.scale_address =
(float *)fpga::fpga_malloc(2 * sizeof(float)); (float *)fpga_malloc(2 * sizeof(float));
arg->conv_args[i].output.address = arg->conv_args[i].output.address = fpga_malloc(
fpga::fpga_malloc(input->dims()[2] * input->dims()[3] * input->dims()[2] *
arg->conv_args[i].filter_num * sizeof(half)); align_to_x(input->dims()[3] * arg->conv_args[i].filter_num,
IMAGE_ALIGNMENT) *
sizeof(half));
} }
else { else {
...@@ -408,7 +413,7 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input, ...@@ -408,7 +413,7 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input,
} }
arg->concat_arg.images_in[i] = (half *)arg->conv_args[i].output.address; arg->concat_arg.images_in[i] = (half *)arg->conv_args[i].output.address;
arg->concat_arg.scales_in[i] = (float *)arg->conv_args[i].sb_address; arg->concat_arg.scales_in[i] = arg->conv_args[i].output.scale_address;
arg->concat_arg.channel_num[i] = arg->conv_args[i].filter_num; arg->concat_arg.channel_num[i] = arg->conv_args[i].filter_num;
} }
} }
......
...@@ -74,15 +74,17 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out, ...@@ -74,15 +74,17 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out,
int align_each_in_area_cw = 0; int align_each_in_area_cw = 0;
int align_each_out_area_cw_differ = 0; int align_each_out_area_cw_differ = 0;
int tmp_channel = 0; int tmp_channel = 0;
*scale_out = 0; scale_out[0] = 0.0;
scale_out[1] = 0.0;
for (i = 0; i < image_num; i++) { for (i = 0; i < image_num; i++) {
each_out_line_channel += channel_num[i]; each_out_line_channel += channel_num[i];
*scale_out = std::max(*scale_out, scales_in[i][0]); scale_out[0] = std::max(*scale_out, scales_in[i][0]);
fpga_invalidate(images_in[i], fpga_invalidate(images_in[i],
height * height *
align_to_x(channel_num[i] * width, IMAGE_ALIGNMENT) * align_to_x(channel_num[i] * width, IMAGE_ALIGNMENT) *
sizeof(int16_t)); sizeof(int16_t));
} }
scale_out[1] = 1 / scale_out[0];
align_each_out_area_cw = align_each_out_area_cw =
align_to_x(each_out_line_channel * width, IMAGE_ALIGNMENT); align_to_x(each_out_line_channel * width, IMAGE_ALIGNMENT);
align_each_out_area_cw_differ = align_each_out_area_cw_differ =
......
...@@ -79,7 +79,7 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size, ...@@ -79,7 +79,7 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
std::vector<std::shared_ptr<framework::OpDesc>> ops = block_desc->Ops(); std::vector<std::shared_ptr<framework::OpDesc>> ops = block_desc->Ops();
for (int j = 0; j < ops.size(); ++j) { for (int j = 0; j < ops.size(); ++j) {
std::shared_ptr<framework::OpDesc> op = ops[j]; std::shared_ptr<framework::OpDesc> op = ops[j];
DLOG << "create op: " << op->Type(); DLOG << "create op: " << j << " " << op->Type();
auto op_base = framework::OpRegistry<Dtype>::CreateOp( auto op_base = framework::OpRegistry<Dtype>::CreateOp(
op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(), op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(),
program_.scope); program_.scope);
...@@ -103,7 +103,9 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size, ...@@ -103,7 +103,9 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
std::shared_ptr<framework::BlockDesc> to_predict_block = std::shared_ptr<framework::BlockDesc> to_predict_block =
to_predict_program_->Block(0); to_predict_program_->Block(0);
auto &ops = ops_of_block_[*to_predict_block.get()]; auto &ops = ops_of_block_[*to_predict_block.get()];
int i = 0;
for (const auto &op : ops) { for (const auto &op : ops) {
DLOG << "Init op: " << i++ << " " << op->Type();
op->Init(); op->Init();
} }
} }
...@@ -702,6 +704,7 @@ void Executor<Dtype, P>::Predict_From_To(int start, int end) { ...@@ -702,6 +704,7 @@ void Executor<Dtype, P>::Predict_From_To(int start, int end) {
clock_gettime(CLOCK_MONOTONIC, &ts); clock_gettime(CLOCK_MONOTONIC, &ts);
profile[i].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec; profile[i].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
#endif #endif
DLOG << "Running op: " << i << " " << ops[i]->Type();
ops[i]->Run(); ops[i]->Run();
#ifdef PADDLE_MOBILE_PROFILE #ifdef PADDLE_MOBILE_PROFILE
......
...@@ -17,7 +17,17 @@ ...@@ -17,7 +17,17 @@
#import <CoreImage/CoreImage.h> #import <CoreImage/CoreImage.h>
#import <Foundation/Foundation.h> #import <Foundation/Foundation.h>
@interface PaddleMobile : NSObject @interface PaddleMobileCPUResult: NSObject
@property (assign, nonatomic, readonly) float *output;
@property (assign, nonatomic, readonly) int outputSize;
-(void)releaseOutput;
@end
@interface PaddleMobileCPU : NSObject
/* /*
创建对象 创建对象
...@@ -34,13 +44,36 @@ ...@@ -34,13 +44,36 @@
*/ */
- (BOOL)load:(NSString *)modelAndWeightPath; - (BOOL)load:(NSString *)modelAndWeightPath;
/*
* 从内存中加载模型
* */
- (BOOL)LoadCombinedMemory:(size_t)modelLen
andModelBuf:(const uint8_t *)modelBuf
andModelParamsLen:(size_t)combinedParamsLen
andCombinedParamsBuf:(const uint8_t *)combinedParamsBuf;
/*
* 对图像进行预处理, 需要外部开辟 output 内存, 外部释放 output 内存
* */
-(void)preprocess:(CGImageRef)image
output:(float *)output
means:(NSArray<NSNumber *> *)means
scale:(float)scale
dim:(NSArray<NSNumber *> *)dim;
/*
* 预测预处理后的数据, 返回结果使用结束需要调用其 realseOutput 函数进行释放
* */
- (PaddleMobileCPUResult *)predictInput:(float *)input
dim:(NSArray<NSNumber *> *)dim;
/* /*
进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict 进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict
*/ */
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale; - (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale;
/* /*
进行预测 进行预测, 默认 means 为 0, scale 为 1.0
*/ */
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim; - (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim;
......
...@@ -12,24 +12,51 @@ ...@@ -12,24 +12,51 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#import "PaddleMobile.h" #import "PaddleMobileCPU.h"
#import "op_symbols.h" #import "op_symbols.h"
#include "framework/tensor.h"
#import "io/paddle_mobile.h" #import "io/paddle_mobile.h"
#import <memory> #import <memory>
#import <vector> #import <vector>
@interface PaddleMobile()
@interface PaddleMobileCPUResult()
-(void)toSetOutput:(float *)output;
-(void)toSetOutputSize:(int)outputSize;
@end
@implementation PaddleMobileCPUResult
-(void)releaseOutput {
delete [] _output;
_output = nil;
_outputSize = 0;
}
-(void)toSetOutput:(float *)output {
_output = output;
}
-(void)toSetOutputSize:(int)outputSize {
_outputSize = outputSize;
}
@end
@interface PaddleMobileCPU()
{ {
paddle_mobile::PaddleMobile<paddle_mobile::CPU, paddle_mobile::Precision::FP32> *pam_; paddle_mobile::PaddleMobile<paddle_mobile::CPU, paddle_mobile::Precision::FP32> *pam_;
BOOL loaded_; BOOL loaded_;
std::vector<float> *predict_input_;
} }
@end @end
@implementation PaddleMobile @implementation PaddleMobileCPU
static std::mutex shared_mutex; static std::mutex shared_mutex;
...@@ -66,6 +93,14 @@ static std::mutex shared_mutex; ...@@ -66,6 +93,14 @@ static std::mutex shared_mutex;
} }
} }
- (BOOL)LoadCombinedMemory:(size_t)modelLen
andModelBuf:(const uint8_t *)modelBuf
andModelParamsLen:(size_t)combinedParamsLen
andCombinedParamsBuf:(const uint8_t *)combinedParamsBuf {
pam_->SetThreadNum(2);
return loaded_ = pam_->LoadCombinedMemory(modelLen, modelBuf, combinedParamsLen, combinedParamsBuf);
}
- (BOOL)load:(NSString *)modelAndWeightPath{ - (BOOL)load:(NSString *)modelAndWeightPath{
std::string model_path_str = std::string([modelAndWeightPath UTF8String]); std::string model_path_str = std::string([modelAndWeightPath UTF8String]);
if (loaded_ = pam_->Load(model_path_str)) { if (loaded_ = pam_->Load(model_path_str)) {
...@@ -75,6 +110,57 @@ static std::mutex shared_mutex; ...@@ -75,6 +110,57 @@ static std::mutex shared_mutex;
} }
} }
-(void)preprocess:(CGImageRef)image
output:(float *)output
means:(NSArray<NSNumber *> *)means
scale:(float)scale
dim:(NSArray<NSNumber *> *)dim {
std::lock_guard<std::mutex> lock(shared_mutex);
// dim to c++ vector, get numel
std::vector<int64_t > dim_vec;
int numel = 1;
for (int k = 0; k < dim.count; ++k) {
int d = dim[k].intValue;
numel *= d;
dim_vec.push_back(d);
}
const int sourceRowBytes = CGImageGetBytesPerRow(image);
const int imageWidth = CGImageGetWidth(image);
const int imageHeight = CGImageGetHeight(image);
const int imageChannels = 4;
CGDataProviderRef provider = CGImageGetDataProvider(image);
CFDataRef cfData = CGDataProviderCopyData(provider);
const UInt8 *input = CFDataGetBytePtr(cfData);
int wanted_input_width = dim_vec[3];
int wanted_input_height = dim_vec[2];
int wanted_input_channels = dim_vec[1];
for (int c = 0; c < wanted_input_channels; ++c) {
float *out_channel = output + c * wanted_input_height * wanted_input_width;
for (int y = 0; y < wanted_input_height; ++y) {
float *out_row = out_channel + y * wanted_input_width;
for (int x = 0; x < wanted_input_width; ++x) {
int in_row = (y * imageHeight) / wanted_input_height;
int in_col = (x * imageWidth) / wanted_input_width;
const UInt8 *in_pixel = input + (in_row * imageWidth * imageChannels) + (in_col * imageChannels);
float *out_pos = out_row + x;
if (c == 0) {
*out_pos = (in_pixel[c] - means[c].floatValue) * scale;
}else if (c == 1){
*out_pos = (in_pixel[c] - means[c].floatValue) * scale;
}else if (c == 2){
*out_pos = (in_pixel[c] - means[c].floatValue) * scale;
}
}
}
}
}
-(void)preprocess:(const UInt8 *)input output:(float *)output imageWidth:(int)imageWidth imageHeight:(int)imageHeight imageChannels:(int)imageChannels means:(NSArray<NSNumber *> *)means scale:(float)scale dim:(std::vector<int64_t>)dim{ -(void)preprocess:(const UInt8 *)input output:(float *)output imageWidth:(int)imageWidth imageHeight:(int)imageHeight imageChannels:(int)imageChannels means:(NSArray<NSNumber *> *)means scale:(float)scale dim:(std::vector<int64_t>)dim{
if (means == nil) { if (means == nil) {
means = @[@0, @0, @0]; means = @[@0, @0, @0];
...@@ -105,27 +191,54 @@ static std::mutex shared_mutex; ...@@ -105,27 +191,54 @@ static std::mutex shared_mutex;
} }
} }
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale{ - (PaddleMobileCPUResult *)predictInput:(float *)input
// printf(" hi i am here"); dim:(NSArray<NSNumber *> *)dim {
if (predict_input_) { std::lock_guard<std::mutex> lock(shared_mutex);
// printf(" fukc -- "); if (!loaded_) {
// printf(" %d \n", predict_input_->size()); printf("PaddleMobile doesn't be loaded yet");
// dim to c++ vector, get numel return nil;
std::vector<int64_t > dim_vec = {1, 3, 300, 300}; }
// int numel = 1;
// for (int k = 0; k < dim.count; ++k) { if (dim.count != 4) {
// int d = dim[k].intValue; printf("dim must have 4 elements");
// numel *= d;
// dim_vec.push_back(d);
// }
std::vector<float> cpp_result = pam_->Predict(*predict_input_, dim_vec);
return nil; return nil;
} }
// printf(" predict one ");
// std::lock_guard<std::mutex> lock(shared_mutex); // dim to c++ vector, get numel
std::vector<int64_t > dim_vec;
int numel = 1;
for (int k = 0; k < dim.count; ++k) {
int d = dim[k].intValue;
numel *= d;
dim_vec.push_back(d);
}
paddle_mobile::framework::Tensor input_tensor;
paddle_mobile::framework::DDim dims = paddle_mobile::framework::make_ddim(dim_vec);
float *input_ptr = input_tensor.mutable_data<float>(dims);
memcpy(input_ptr, input,
numel * sizeof(float));
std::shared_ptr<paddle_mobile::framework::Tensor> output = pam_->Predict(input_tensor);
float *output_pointer = new float[output->numel()];
memcpy(output_pointer, output->data<float>(),
output->numel() * sizeof(float));
PaddleMobileCPUResult *cpuResult = [[PaddleMobileCPUResult alloc] init];
[cpuResult toSetOutput: output_pointer];
[cpuResult toSetOutputSize: output->numel()];
return cpuResult;
}
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale{
// printf(" predict one ");
std::lock_guard<std::mutex> lock(shared_mutex);
if (!loaded_) { if (!loaded_) {
printf("PaddleMobile doesn't be loaded yet"); printf("PaddleMobile doesn't be loaded yet");
return nil; return nil;
...@@ -164,15 +277,13 @@ static std::mutex shared_mutex; ...@@ -164,15 +277,13 @@ static std::mutex shared_mutex;
} }
// input // input
std::vector<float> *predict_input = new std::vector<float>(); std::vector<float> predict_input;
for (int j = 0; j < numel; ++j) { for (int j = 0; j < numel; ++j) {
predict_input->push_back(dataPointer[j]); predict_input.push_back(dataPointer[j]);
} }
predict_input_ = predict_input;
// predict // predict
std::vector<float> cpp_result = pam_->Predict(*predict_input, dim_vec); std::vector<float> cpp_result = pam_->Predict(predict_input, dim_vec);
// result // result
long count = 0; long count = 0;
......
...@@ -15,27 +15,46 @@ ...@@ -15,27 +15,46 @@
#pragma once #pragma once
#include "operators/batchnorm_op.h" #include "operators/batchnorm_op.h"
#include "operators/bilinear_interp_op.h"
#include "operators/box_coder_op.h" #include "operators/box_coder_op.h"
#include "operators/concat_op.h" #include "operators/concat_op.h"
#include "operators/conv_op.h" #include "operators/conv_op.h"
#include "operators/conv_transpose_op.h"
#include "operators/crf_op.h"
#include "operators/depthwise_conv_op.h" #include "operators/depthwise_conv_op.h"
#include "operators/dropout_op.h" #include "operators/dropout_op.h"
#include "operators/elementwise_add_op.h" #include "operators/elementwise_add_op.h"
#include "operators/feed_op.h" #include "operators/feed_op.h"
#include "operators/fetch_op.h" #include "operators/fetch_op.h"
#include "operators/flatten_op.h"
#include "operators/fusion_conv_add.h" #include "operators/fusion_conv_add.h"
#include "operators/fusion_conv_add_add_prelu_op.h"
#include "operators/fusion_conv_add_bn_op.h"
#include "operators/fusion_conv_add_bn_relu_op.h" #include "operators/fusion_conv_add_bn_relu_op.h"
#include "operators/fusion_conv_add_prelu_op.h"
#include "operators/fusion_conv_add_relu_op.h"
#include "operators/fusion_conv_bn_add_relu_op.h"
#include "operators/fusion_conv_bn_relu_op.h" #include "operators/fusion_conv_bn_relu_op.h"
#include "operators/fusion_dwconv_bn_relu_op.h" #include "operators/fusion_dwconv_bn_relu_op.h"
#include "operators/fusion_elementwise_add_relu_op.h"
#include "operators/fusion_fc_op.h" #include "operators/fusion_fc_op.h"
#include "operators/fusion_fc_relu_op.h"
#include "operators/gru_op.h"
#include "operators/im2sequence_op.h" #include "operators/im2sequence_op.h"
#include "operators/lookup_op.h"
#include "operators/lrn_op.h" #include "operators/lrn_op.h"
#include "operators/mul_op.h" #include "operators/mul_op.h"
#include "operators/multiclass_nms_op.h" #include "operators/multiclass_nms_op.h"
#include "operators/pool_op.h" #include "operators/pool_op.h"
#include "operators/prelu_op.h"
#include "operators/prior_box_op.h" #include "operators/prior_box_op.h"
#include "operators/relu_op.h" #include "operators/relu_op.h"
#include "operators/reshape_op.h" #include "operators/reshape_op.h"
#include "operators/resize_op.h"
#include "operators/scale_op.h"
#include "operators/shape_op.h"
#include "operators/sigmoid_op.h" #include "operators/sigmoid_op.h"
#include "operators/slice_op.h"
#include "operators/softmax_op.h" #include "operators/softmax_op.h"
#include "operators/split_op.h"
#include "operators/transpose_op.h" #include "operators/transpose_op.h"
...@@ -53,9 +53,9 @@ class FeedOp : public framework::OperatorBase<DeviceType> { ...@@ -53,9 +53,9 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
fpga::format_image(input); fpga::format_image(input);
Tensor *output = param_.Out(); Tensor *output = param_.Out();
auto output_ptr = output->data<half>(); auto output_ptr = output->data<float>();
fpga::BypassArgs args; fpga::BypassArgs args = {fpga::DATA_TYPE_FP32};
args.input_data_type = fpga::DATA_TYPE_FP32; args.input_data_type = fpga::DATA_TYPE_FP32;
args.output_data_type = fpga::DATA_TYPE_FP16; args.output_data_type = fpga::DATA_TYPE_FP16;
......
...@@ -43,7 +43,7 @@ bool ConcatKernel<FPGA, float>::Init(ConcatParam<FPGA> *param) { ...@@ -43,7 +43,7 @@ bool ConcatKernel<FPGA, float>::Init(ConcatParam<FPGA> *param) {
fpga::format_concat_output(out, (int)height, (int)width, (int)image_num, fpga::format_concat_output(out, (int)height, (int)width, (int)image_num,
channel_num); channel_num);
fpga::ConcatArgs concatArgs; fpga::ConcatArgs concatArgs = {0};
concatArgs.image_num = (uint32_t)image_num; concatArgs.image_num = (uint32_t)image_num;
concatArgs.images_in = images_in; concatArgs.images_in = images_in;
concatArgs.scales_in = scales_in; concatArgs.scales_in = scales_in;
......
...@@ -66,7 +66,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) { ...@@ -66,7 +66,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg; fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1], param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
......
...@@ -64,7 +64,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init( ...@@ -64,7 +64,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg; fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1], param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
......
...@@ -46,7 +46,7 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) { ...@@ -46,7 +46,7 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg; fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1], param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
......
...@@ -58,7 +58,7 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) { ...@@ -58,7 +58,7 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg; fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1], param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
......
...@@ -58,7 +58,7 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) { ...@@ -58,7 +58,7 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg; fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1], param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
......
...@@ -30,7 +30,7 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init( ...@@ -30,7 +30,7 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<float>();
fpga::EWAddArgs ewaddArgs; fpga::EWAddArgs ewaddArgs = {0};
ewaddArgs.relu_enabled = relu_enabled; ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs.const0 = 1; ewaddArgs.const0 = 1;
ewaddArgs.const1 = 1; ewaddArgs.const1 = 1;
......
...@@ -51,7 +51,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) { ...@@ -51,7 +51,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg; fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0, fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0,
0, bs_ptr); 0, bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
......
...@@ -52,7 +52,7 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) { ...@@ -52,7 +52,7 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg; fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0, fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0,
0, bs_ptr); 0, bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
......
...@@ -30,7 +30,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) { ...@@ -30,7 +30,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
vector<int> strides = param->Strides(); vector<int> strides = param->Strides();
vector<int> paddings = param->Paddings(); vector<int> paddings = param->Paddings();
fpga::PoolingArgs poolArgs; fpga::PoolingArgs poolArgs = {0};
poolArgs.image.address = input_ptr; poolArgs.image.address = input_ptr;
poolArgs.image.channels = (uint32_t)input->dims()[1]; poolArgs.image.channels = (uint32_t)input->dims()[1];
poolArgs.image.height = (uint32_t)input->dims()[2]; poolArgs.image.height = (uint32_t)input->dims()[2];
......
...@@ -26,10 +26,11 @@ template <> ...@@ -26,10 +26,11 @@ template <>
bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
auto input = const_cast<Tensor *>(param->InputX()); auto input = const_cast<Tensor *>(param->InputX());
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto float_input = new Tensor(*input); auto float_input = new Tensor;
float_input->mutable_data<float>(input->dims());
fpga::format_fp32_ofm(float_input); fpga::format_fp32_ofm(float_input);
fpga::BypassArgs args; fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_layout_type = fpga::LAYOUT_HWC; args.input_layout_type = fpga::LAYOUT_HWC;
args.output_layout_type = fpga::LAYOUT_CHW; args.output_layout_type = fpga::LAYOUT_CHW;
args.input_data_type = fpga::DATA_TYPE_FP16; args.input_data_type = fpga::DATA_TYPE_FP16;
......
...@@ -341,22 +341,23 @@ class OpParam { ...@@ -341,22 +341,23 @@ class OpParam {
} }
}; };
#ifdef CONV_OP
template <typename Dtype> template <typename Dtype>
class ConvParam : OpParam { class ConvParam : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType; typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType; typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public: public:
ConvParam(const VariableNameMap &inputs, const VariableNameMap &outputs, ConvParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) { const AttributeMap &attrs, const Scope &scope) {
filter_ = FilterFrom<GType>(inputs, scope); filter_ = OpParam::FilterFrom<GType>(inputs, scope);
input_ = InputFrom<GType>(inputs, scope); input_ = OpParam::InputFrom<GType>(inputs, scope);
output_ = OutputFrom<GType>(outputs, scope); if (outputs.count("Output")) {
strides_ = GetAttr<vector<int>>("strides", attrs); output_ = OpParam::OutputFrom<GType>(outputs, scope);
paddings_ = GetAttr<vector<int>>("paddings", attrs); }
dilations_ = GetAttr<vector<int>>("dilations", attrs); strides_ = OpParam::GetAttr<vector<int>>("strides", attrs);
groups = GetAttr<int>("groups", attrs); paddings_ = OpParam::GetAttr<vector<int>>("paddings", attrs);
dilations_ = OpParam::GetAttr<vector<int>>("dilations", attrs);
groups = OpParam::GetAttr<int>("groups", attrs);
} }
const RType *Input() const { return input_; } const RType *Input() const { return input_; }
...@@ -384,7 +385,6 @@ class ConvParam : OpParam { ...@@ -384,7 +385,6 @@ class ConvParam : OpParam {
}; };
template <typename Dtype> template <typename Dtype>
Print &operator<<(Print &printer, const ConvParam<Dtype> &conv_param); Print &operator<<(Print &printer, const ConvParam<Dtype> &conv_param);
#endif
template <typename Dtype> template <typename Dtype>
class ElementwiseAddParam : OpParam { class ElementwiseAddParam : OpParam {
...@@ -1294,52 +1294,29 @@ using FusionFcReluParam = FusionFcParam<DeviceType>; ...@@ -1294,52 +1294,29 @@ using FusionFcReluParam = FusionFcParam<DeviceType>;
#endif #endif
template <typename Dtype> template <typename Dtype>
class FusionConvAddParam : public OpParam { class FusionConvAddParam : public ConvParam<Dtype> {
typedef typename DtypeTensorTrait<Dtype>::gtype GType; typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType; typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public: public:
FusionConvAddParam(const VariableNameMap &inputs, FusionConvAddParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs, const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope) { const Scope &scope)
bias_ = InputYFrom<GType>(inputs, scope); : ConvParam<Dtype>(inputs, outputs, attrs, scope) {
axis_ = GetAttr<int>("axis", attrs); bias_ = OpParam::InputYFrom<GType>(inputs, scope);
filter_ = FilterFrom<GType>(inputs, scope); axis_ = OpParam::GetAttr<int>("axis", attrs);
input_ = InputFrom<GType>(inputs, scope); output_ = OpParam::OutFrom<GType>(outputs, scope);
output_ = OutFrom<GType>(outputs, scope);
strides_ = GetAttr<vector<int>>("strides", attrs);
paddings_ = GetAttr<vector<int>>("paddings", attrs);
dilations_ = GetAttr<vector<int>>("dilations", attrs);
groups = GetAttr<int>("groups", attrs);
} }
RType *Bias() const { return bias_; } RType *Bias() const { return bias_; }
const int &Axis() const { return axis_; } const int &Axis() const { return axis_; }
const RType *Input() const { return input_; }
const RType *Filter() const { return filter_; }
RType *Output() const { return output_; } RType *Output() const { return output_; }
const vector<int> &Strides() const { return strides_; }
const vector<int> &Paddings() const { return paddings_; }
const vector<int> &Dilations() const { return dilations_; }
const int &Groups() const { return groups; }
protected: protected:
RType *bias_; RType *bias_;
int axis_; int axis_;
RType *input_;
RType *output_; RType *output_;
RType *filter_;
vector<int> strides_;
vector<int> paddings_;
vector<int> dilations_;
int groups;
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
...@@ -1366,58 +1343,33 @@ class FusionConvAddReluParam : public FusionConvAddParam<DeviceType> { ...@@ -1366,58 +1343,33 @@ class FusionConvAddReluParam : public FusionConvAddParam<DeviceType> {
#endif #endif
#ifdef FUSION_CONVADDPRELU_OP #ifdef FUSION_CONVADDPRELU_OP
template <typename DeviceType> template <typename Dtype>
class FusionConvAddPReluParam : public OpParam { class FusionConvAddPReluParam : public ConvParam<Dtype> {
typedef typename DtypeTensorTrait<DeviceType>::gtype GType; typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<DeviceType>::rtype RType; typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public: public:
FusionConvAddPReluParam(const VariableNameMap &inputs, FusionConvAddPReluParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) { const AttributeMap &attrs, const Scope &scope)
alpha_ = InputAlphaFrom<GType>(inputs, scope); : ConvParam<Dtype>(inputs, outputs, attrs, scope) {
mode_ = GetAttr<std::string>("mode", attrs); alpha_ = OpParam::InputAlphaFrom<GType>(inputs, scope);
mode_ = OpParam::GetAttr<std::string>("mode", attrs);
framework::DDim dims = alpha_->dims(); framework::DDim dims = alpha_->dims();
bias_ = InputYFrom<GType>(inputs, scope); bias_ = OpParam::InputYFrom<GType>(inputs, scope);
axis_ = GetAttr<int>("axis", attrs); axis_ = OpParam::GetAttr<int>("axis", attrs);
filter_ = FilterFrom<GType>(inputs, scope); output_ = OpParam::OutFrom<GType>(outputs, scope);
input_ = InputFrom<GType>(inputs, scope);
output_ = OutFrom<GType>(outputs, scope);
strides_ = GetAttr<vector<int>>("strides", attrs);
paddings_ = GetAttr<vector<int>>("paddings", attrs);
dilations_ = GetAttr<vector<int>>("dilations", attrs);
groups = GetAttr<int>("groups", attrs);
} }
const RType *InputAlpha() const { return alpha_; } const RType *InputAlpha() const { return alpha_; }
const std::string &Mode() const { return mode_; } const std::string &Mode() const { return mode_; }
RType *Bias() const { return bias_; } RType *Bias() const { return bias_; }
const int &Axis() const { return axis_; } const int &Axis() const { return axis_; }
const RType *Input() const { return input_; }
const RType *Filter() const { return filter_; }
RType *Output() const { return output_; } RType *Output() const { return output_; }
const vector<int> &Strides() const { return strides_; }
const vector<int> &Paddings() const { return paddings_; }
const vector<int> &Dilations() const { return dilations_; }
const int &Groups() const { return groups; }
protected: protected:
RType *bias_; RType *bias_;
int axis_; int axis_;
RType *input_;
RType *output_; RType *output_;
RType *filter_;
vector<int> strides_;
vector<int> paddings_;
vector<int> dilations_;
int groups;
RType *alpha_; RType *alpha_;
std::string mode_; std::string mode_;
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
...@@ -1433,35 +1385,30 @@ class FusionConvAddPReluParam : public OpParam { ...@@ -1433,35 +1385,30 @@ class FusionConvAddPReluParam : public OpParam {
#endif #endif
#ifdef FUSION_CONVADDADDPRELU_OP #ifdef FUSION_CONVADDADDPRELU_OP
template <typename DeviceType> template <typename Dtype>
class FusionConvAddAddPReluParam : public OpParam { class FusionConvAddAddPReluParam : public ConvParam<Dtype> {
typedef typename DtypeTensorTrait<DeviceType>::gtype GType; typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<DeviceType>::rtype RType; typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public: public:
FusionConvAddAddPReluParam(const VariableNameMap &inputs, FusionConvAddAddPReluParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) { const AttributeMap &attrs, const Scope &scope)
bias1_ = InputYFrom1<GType>(inputs, scope); : ConvParam<Dtype>(inputs, outputs, attrs, scope) {
alpha_ = InputAlphaFrom<GType>(inputs, scope); bias1_ = OpParam::InputYFrom1<GType>(inputs, scope);
mode_ = GetAttr<std::string>("mode", attrs); alpha_ = OpParam::InputAlphaFrom<GType>(inputs, scope);
mode_ = OpParam::GetAttr<std::string>("mode", attrs);
framework::DDim dims = alpha_->dims(); framework::DDim dims = alpha_->dims();
bias_ = InputYFrom<GType>(inputs, scope); bias_ = OpParam::InputYFrom<GType>(inputs, scope);
axis_ = GetAttr<int>("axis", attrs); output_ = OpParam::OutFrom<GType>(outputs, scope);
filter_ = FilterFrom<GType>(inputs, scope); axis_ = OpParam::GetAttr<int>("axis", attrs);
input_ = InputFrom<GType>(inputs, scope); keyOutput_ = OpParam::getkey("addOut", inputs, 0);
output_ = OutFrom<GType>(outputs, scope); keyX1_ = OpParam::getkey("addX", inputs, 1);
strides_ = GetAttr<vector<int>>("strides", attrs); keyY1_ = OpParam::getkey("Y", inputs, 1);
paddings_ = GetAttr<vector<int>>("paddings", attrs);
dilations_ = GetAttr<vector<int>>("dilations", attrs);
groups = GetAttr<int>("groups", attrs);
keyOutput_ = getkey("addOut", inputs, 0);
keyX1_ = getkey("addX", inputs, 1);
keyY1_ = getkey("Y", inputs, 1);
if (keyX1_ == keyOutput_) { if (keyX1_ == keyOutput_) {
bias1_ = InputYFrom1<GType>(inputs, scope); bias1_ = OpParam::InputYFrom1<GType>(inputs, scope);
} else if (keyY1_ == keyOutput_) { } else if (keyY1_ == keyOutput_) {
bias1_ = InputXFrom1<GType>(inputs, scope); bias1_ = OpParam::InputXFrom1<GType>(inputs, scope);
} }
} }
const RType *InputAlpha() const { return alpha_; } const RType *InputAlpha() const { return alpha_; }
...@@ -1471,31 +1418,12 @@ class FusionConvAddAddPReluParam : public OpParam { ...@@ -1471,31 +1418,12 @@ class FusionConvAddAddPReluParam : public OpParam {
RType *Bias() const { return bias_; } RType *Bias() const { return bias_; }
const int &Axis() const { return axis_; } const int &Axis() const { return axis_; }
const RType *Input() const { return input_; }
const RType *Filter() const { return filter_; }
RType *Output() const { return output_; } RType *Output() const { return output_; }
const vector<int> &Strides() const { return strides_; }
const vector<int> &Paddings() const { return paddings_; }
const vector<int> &Dilations() const { return dilations_; }
const int &Groups() const { return groups; }
protected: protected:
RType *bias_; RType *bias_;
int axis_; int axis_;
RType *input_;
RType *output_; RType *output_;
RType *filter_;
vector<int> strides_;
vector<int> paddings_;
vector<int> dilations_;
int groups;
RType *alpha_; RType *alpha_;
std::string mode_; std::string mode_;
RType *bias1_; RType *bias1_;
...@@ -1516,49 +1444,32 @@ class FusionConvAddAddPReluParam : public OpParam { ...@@ -1516,49 +1444,32 @@ class FusionConvAddAddPReluParam : public OpParam {
#ifdef FUSION_CONVADDBNRELU_OP #ifdef FUSION_CONVADDBNRELU_OP
template <typename Dtype> template <typename Dtype>
class FusionConvAddBNReluParam : public OpParam { class FusionConvAddBNReluParam : public ConvParam<Dtype> {
typedef typename DtypeTensorTrait<Dtype>::gtype GType; typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType; typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public: public:
FusionConvAddBNReluParam(const VariableNameMap &inputs, FusionConvAddBNReluParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) { const AttributeMap &attrs, const Scope &scope)
bias_ = InputYFrom<GType>(inputs, scope); : ConvParam<Dtype>(inputs, outputs, attrs, scope) {
axis_ = GetAttr<int>("axis", attrs); bias_ = OpParam::InputYFrom<GType>(inputs, scope);
filter_ = FilterFrom<GType>(inputs, scope); axis_ = OpParam::GetAttr<int>("axis", attrs);
input_ = InputFrom<GType>(inputs, scope); output_ = OpParam::OutFrom<GType>(outputs, scope);
output_ = OutFrom<GType>(outputs, scope); input_bias_ = OpParam::InputBiasFrom<GType>(inputs, scope);
strides_ = GetAttr<vector<int>>("strides", attrs); input_mean_ = OpParam::InputMeanFrom<GType>(inputs, scope);
paddings_ = GetAttr<vector<int>>("paddings", attrs); input_scale_ = OpParam::InputScaleFrom<GType>(inputs, scope);
dilations_ = GetAttr<vector<int>>("dilations", attrs); input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, scope);
groups = GetAttr<int>("groups", attrs); epsilon_ = OpParam::GetAttr<float>("epsilon", attrs);
input_bias_ = InputBiasFrom<GType>(inputs, scope); momentum_ = OpParam::GetAttr<float>("momentum", attrs);
input_mean_ = InputMeanFrom<GType>(inputs, scope); // is_test_ = OpParam::GetAttr<bool>("is_test", attrs);
input_scale_ = InputScaleFrom<GType>(inputs, scope);
input_variance_ = InputVarianceFrom<GType>(inputs, scope);
epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs);
// is_test_ = GetAttr<bool>("is_test", attrs);
} }
RType *Bias() const { return bias_; } RType *Bias() const { return bias_; }
const int &Axis() const { return axis_; } const int &Axis() const { return axis_; }
const RType *Input() const { return input_; }
const RType *Filter() const { return filter_; }
RType *Output() const { return output_; } RType *Output() const { return output_; }
const vector<int> &Strides() const { return strides_; }
const vector<int> &Paddings() const { return paddings_; }
const vector<int> &Dilations() const { return dilations_; }
const int &Groups() const { return groups; }
const RType *InputBias() const { return input_bias_; } const RType *InputBias() const { return input_bias_; }
const RType *InputMean() const { return input_mean_; } const RType *InputMean() const { return input_mean_; }
...@@ -1584,13 +1495,7 @@ class FusionConvAddBNReluParam : public OpParam { ...@@ -1584,13 +1495,7 @@ class FusionConvAddBNReluParam : public OpParam {
protected: protected:
RType *bias_; RType *bias_;
int axis_; int axis_;
RType *input_;
RType *output_; RType *output_;
RType *filter_;
vector<int> strides_;
vector<int> paddings_;
vector<int> dilations_;
int groups;
RType *input_bias_; RType *input_bias_;
RType *input_mean_; RType *input_mean_;
RType *input_scale_; RType *input_scale_;
...@@ -1614,57 +1519,40 @@ class FusionConvAddBNReluParam : public OpParam { ...@@ -1614,57 +1519,40 @@ class FusionConvAddBNReluParam : public OpParam {
#ifdef FUSION_CONVBNADDRELU_OP #ifdef FUSION_CONVBNADDRELU_OP
template <typename Dtype> template <typename Dtype>
class FusionConvBNAddReluParam : public OpParam { class FusionConvBNAddReluParam : public ConvParam<Dtype> {
typedef typename DtypeTensorTrait<Dtype>::gtype GType; typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType; typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public: public:
FusionConvBNAddReluParam(const VariableNameMap &inputs, FusionConvBNAddReluParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) { const AttributeMap &attrs, const Scope &scope)
bias_ = InputYFrom<GType>(inputs, scope); : ConvParam<Dtype>(inputs, outputs, attrs, scope) {
axis_ = GetAttr<int>("axis", attrs); bias_ = OpParam::InputYFrom<GType>(inputs, scope);
filter_ = FilterFrom<GType>(inputs, scope); axis_ = OpParam::GetAttr<int>("axis", attrs);
input_ = InputFrom<GType>(inputs, scope); output_ = OpParam::OutFrom<GType>(outputs, scope);
output_ = OutFrom<GType>(outputs, scope); input_bias_ = OpParam::InputBiasFrom<GType>(inputs, scope);
strides_ = GetAttr<vector<int>>("strides", attrs); input_mean_ = OpParam::InputMeanFrom<GType>(inputs, scope);
paddings_ = GetAttr<vector<int>>("paddings", attrs); input_scale_ = OpParam::InputScaleFrom<GType>(inputs, scope);
dilations_ = GetAttr<vector<int>>("dilations", attrs); input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, scope);
groups = GetAttr<int>("groups", attrs); epsilon_ = OpParam::GetAttr<float>("epsilon", attrs);
input_bias_ = InputBiasFrom<GType>(inputs, scope); momentum_ = OpParam::GetAttr<float>("momentum", attrs);
input_mean_ = InputMeanFrom<GType>(inputs, scope); keyBNY_ = OpParam::getkey("BNY", inputs, 0);
input_scale_ = InputScaleFrom<GType>(inputs, scope); keyX_ = OpParam::getkey("X", inputs, 0);
input_variance_ = InputVarianceFrom<GType>(inputs, scope); keyY_ = OpParam::getkey("Y", inputs, 0);
epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs);
keyBNY_ = getkey("BNY", inputs, 0);
keyX_ = getkey("X", inputs, 0);
keyY_ = getkey("Y", inputs, 0);
if (keyX_ == keyBNY_) { if (keyX_ == keyBNY_) {
bias_ = InputYFrom<GType>(inputs, scope); bias_ = OpParam::InputYFrom<GType>(inputs, scope);
} else if (keyY_ == keyBNY_) { } else if (keyY_ == keyBNY_) {
bias_ = InputXFrom<GType>(inputs, scope); bias_ = OpParam::InputXFrom<GType>(inputs, scope);
} }
// is_test_ = GetAttr<bool>("is_test", attrs); // is_test_ = OpParam::GetAttr<bool>("is_test", attrs);
} }
RType *Bias() const { return bias_; } RType *Bias() const { return bias_; }
const int &Axis() const { return axis_; } const int &Axis() const { return axis_; }
const RType *Input() const { return input_; }
const RType *Filter() const { return filter_; }
RType *Output() const { return output_; } RType *Output() const { return output_; }
const vector<int> &Strides() const { return strides_; }
const vector<int> &Paddings() const { return paddings_; }
const vector<int> &Dilations() const { return dilations_; }
const int &Groups() const { return groups; }
const RType *InputBias() const { return input_bias_; } const RType *InputBias() const { return input_bias_; }
const RType *InputMean() const { return input_mean_; } const RType *InputMean() const { return input_mean_; }
...@@ -1690,13 +1578,7 @@ class FusionConvBNAddReluParam : public OpParam { ...@@ -1690,13 +1578,7 @@ class FusionConvBNAddReluParam : public OpParam {
protected: protected:
RType *bias_; RType *bias_;
int axis_; int axis_;
RType *input_;
RType *output_; RType *output_;
RType *filter_;
vector<int> strides_;
vector<int> paddings_;
vector<int> dilations_;
int groups;
RType *input_bias_; RType *input_bias_;
RType *input_mean_; RType *input_mean_;
RType *input_scale_; RType *input_scale_;
...@@ -1723,44 +1605,26 @@ class FusionConvBNAddReluParam : public OpParam { ...@@ -1723,44 +1605,26 @@ class FusionConvBNAddReluParam : public OpParam {
#ifdef FUSION_CONVBN_OP #ifdef FUSION_CONVBN_OP
template <typename Dtype> template <typename Dtype>
class FusionConvBNParam : public OpParam { class FusionConvBNParam : public ConvParam<Dtype> {
typedef typename DtypeTensorTrait<Dtype>::gtype GType; typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType; typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public: public:
FusionConvBNParam(const VariableNameMap &inputs, FusionConvBNParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs, const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope) { const Scope &scope)
filter_ = FilterFrom<GType>(inputs, scope); : ConvParam<Dtype>(inputs, outputs, attrs, scope) {
input_ = InputFrom<GType>(inputs, scope); output_y_ = OpParam::OutputYFrom<GType>(outputs, scope);
output_y_ = OutputYFrom<GType>(outputs, scope); input_bias_ = OpParam::InputBiasFrom<GType>(inputs, scope);
strides_ = GetAttr<vector<int>>("strides", attrs); input_mean_ = OpParam::InputMeanFrom<GType>(inputs, scope);
paddings_ = GetAttr<vector<int>>("paddings", attrs); input_scale_ = OpParam::InputScaleFrom<GType>(inputs, scope);
dilations_ = GetAttr<vector<int>>("dilations", attrs); input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, scope);
groups = GetAttr<int>("groups", attrs); epsilon_ = OpParam::GetAttr<float>("epsilon", attrs);
input_bias_ = InputBiasFrom<GType>(inputs, scope); momentum_ = OpParam::GetAttr<float>("momentum", attrs);
input_mean_ = InputMeanFrom<GType>(inputs, scope); // is_test_ = OpParam::GetAttr<bool>("is_test", attrs);
input_scale_ = InputScaleFrom<GType>(inputs, scope);
input_variance_ = InputVarianceFrom<GType>(inputs, scope);
epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs);
// is_test_ = GetAttr<bool>("is_test", attrs);
} }
const RType *Input() const { return input_; }
const RType *Filter() const { return filter_; }
RType *Output() const { return output_y_; } RType *Output() const { return output_y_; }
const vector<int> &Strides() const { return strides_; }
const vector<int> &Paddings() const { return paddings_; }
const vector<int> &Dilations() const { return dilations_; }
const int &Groups() const { return groups; }
const RType *InputBias() const { return input_bias_; } const RType *InputBias() const { return input_bias_; }
const RType *InputMean() const { return input_mean_; } const RType *InputMean() const { return input_mean_; }
...@@ -1784,13 +1648,7 @@ class FusionConvBNParam : public OpParam { ...@@ -1784,13 +1648,7 @@ class FusionConvBNParam : public OpParam {
const RType *NewBias() const { return new_bias_; } const RType *NewBias() const { return new_bias_; }
protected: protected:
RType *input_;
RType *output_y_; RType *output_y_;
RType *filter_;
vector<int> strides_;
vector<int> paddings_;
vector<int> dilations_;
int groups;
RType *input_bias_; RType *input_bias_;
RType *input_mean_; RType *input_mean_;
RType *input_scale_; RType *input_scale_;
...@@ -1814,49 +1672,32 @@ class FusionConvBNParam : public OpParam { ...@@ -1814,49 +1672,32 @@ class FusionConvBNParam : public OpParam {
#ifdef FUSION_CONVADDBN_OP #ifdef FUSION_CONVADDBN_OP
template <typename Dtype> template <typename Dtype>
class FusionConvAddBNParam : public OpParam { class FusionConvAddBNParam : public ConvParam<Dtype> {
typedef typename DtypeTensorTrait<Dtype>::gtype GType; typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType; typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public: public:
FusionConvAddBNParam(const VariableNameMap &inputs, FusionConvAddBNParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) { const AttributeMap &attrs, const Scope &scope)
bias_ = InputYFrom<GType>(inputs, scope); : ConvParam<Dtype>(inputs, outputs, attrs, scope) {
axis_ = GetAttr<int>("axis", attrs); bias_ = OpParam::InputYFrom<GType>(inputs, scope);
filter_ = FilterFrom<GType>(inputs, scope); axis_ = OpParam::GetAttr<int>("axis", attrs);
input_ = InputFrom<GType>(inputs, scope); output_y_ = OpParam::OutputYFrom<GType>(outputs, scope);
output_y_ = OutputYFrom<GType>(outputs, scope); input_bias_ = OpParam::InputBiasFrom<GType>(inputs, scope);
strides_ = GetAttr<vector<int>>("strides", attrs); input_mean_ = OpParam::InputMeanFrom<GType>(inputs, scope);
paddings_ = GetAttr<vector<int>>("paddings", attrs); input_scale_ = OpParam::InputScaleFrom<GType>(inputs, scope);
dilations_ = GetAttr<vector<int>>("dilations", attrs); input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, scope);
groups = GetAttr<int>("groups", attrs); epsilon_ = OpParam::GetAttr<float>("epsilon", attrs);
input_bias_ = InputBiasFrom<GType>(inputs, scope); momentum_ = OpParam::GetAttr<float>("momentum", attrs);
input_mean_ = InputMeanFrom<GType>(inputs, scope); // is_test_ = OpParam::GetAttr<bool>("is_test", attrs);
input_scale_ = InputScaleFrom<GType>(inputs, scope);
input_variance_ = InputVarianceFrom<GType>(inputs, scope);
epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs);
// is_test_ = GetAttr<bool>("is_test", attrs);
} }
RType *Bias() const { return bias_; } RType *Bias() const { return bias_; }
const int &Axis() const { return axis_; } const int &Axis() const { return axis_; }
const RType *Input() const { return input_; }
const RType *Filter() const { return filter_; }
RType *Output() const { return output_y_; } RType *Output() const { return output_y_; }
const vector<int> &Strides() const { return strides_; }
const vector<int> &Paddings() const { return paddings_; }
const vector<int> &Dilations() const { return dilations_; }
const int &Groups() const { return groups; }
const RType *InputBias() const { return input_bias_; } const RType *InputBias() const { return input_bias_; }
const RType *InputMean() const { return input_mean_; } const RType *InputMean() const { return input_mean_; }
...@@ -1882,13 +1723,7 @@ class FusionConvAddBNParam : public OpParam { ...@@ -1882,13 +1723,7 @@ class FusionConvAddBNParam : public OpParam {
protected: protected:
RType *bias_; RType *bias_;
int axis_; int axis_;
RType *input_;
RType *output_y_; RType *output_y_;
RType *filter_;
vector<int> strides_;
vector<int> paddings_;
vector<int> dilations_;
int groups;
RType *input_bias_; RType *input_bias_;
RType *input_mean_; RType *input_mean_;
RType *input_scale_; RType *input_scale_;
...@@ -1912,44 +1747,26 @@ class FusionConvAddBNParam : public OpParam { ...@@ -1912,44 +1747,26 @@ class FusionConvAddBNParam : public OpParam {
#ifdef FUSION_DWCONVBNRELU_OP #ifdef FUSION_DWCONVBNRELU_OP
template <typename Dtype> template <typename Dtype>
class FusionDWConvBNReluParam : public OpParam { class FusionDWConvBNReluParam : public ConvParam<Dtype> {
typedef typename DtypeTensorTrait<Dtype>::gtype GType; typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType; typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public: public:
FusionDWConvBNReluParam(const VariableNameMap &inputs, FusionDWConvBNReluParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) { const AttributeMap &attrs, const Scope &scope)
filter_ = FilterFrom<GType>(inputs, scope); : ConvParam<Dtype>(inputs, outputs, attrs, scope) {
input_ = InputFrom<GType>(inputs, scope); output_ = OpParam::OutFrom<GType>(outputs, scope);
output_ = OutFrom<GType>(outputs, scope); input_bias_ = OpParam::InputBiasFrom<GType>(inputs, scope);
strides_ = GetAttr<vector<int>>("strides", attrs); input_mean_ = OpParam::InputMeanFrom<GType>(inputs, scope);
paddings_ = GetAttr<vector<int>>("paddings", attrs); input_scale_ = OpParam::InputScaleFrom<GType>(inputs, scope);
dilations_ = GetAttr<vector<int>>("dilations", attrs); input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, scope);
groups = GetAttr<int>("groups", attrs); epsilon_ = OpParam::GetAttr<float>("epsilon", attrs);
input_bias_ = InputBiasFrom<GType>(inputs, scope); momentum_ = OpParam::GetAttr<float>("momentum", attrs);
input_mean_ = InputMeanFrom<GType>(inputs, scope); // is_test_ = OpParam::GetAttr<bool>("is_test", attrs);
input_scale_ = InputScaleFrom<GType>(inputs, scope);
input_variance_ = InputVarianceFrom<GType>(inputs, scope);
epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs);
// is_test_ = GetAttr<bool>("is_test", attrs);
} }
const RType *Input() const { return input_; }
const RType *Filter() const { return filter_; }
RType *Output() const { return output_; } RType *Output() const { return output_; }
const vector<int> &Strides() const { return strides_; }
const vector<int> &Paddings() const { return paddings_; }
const vector<int> &Dilations() const { return dilations_; }
const int &Groups() const { return groups; }
const RType *InputBias() const { return input_bias_; } const RType *InputBias() const { return input_bias_; }
const RType *InputMean() const { return input_mean_; } const RType *InputMean() const { return input_mean_; }
...@@ -1973,13 +1790,7 @@ class FusionDWConvBNReluParam : public OpParam { ...@@ -1973,13 +1790,7 @@ class FusionDWConvBNReluParam : public OpParam {
const RType *NewBias() const { return new_bias_; } const RType *NewBias() const { return new_bias_; }
protected: protected:
RType *input_;
RType *output_; RType *output_;
RType *filter_;
vector<int> strides_;
vector<int> paddings_;
vector<int> dilations_;
int groups;
RType *input_bias_; RType *input_bias_;
RType *input_mean_; RType *input_mean_;
RType *input_scale_; RType *input_scale_;
...@@ -1995,45 +1806,26 @@ class FusionDWConvBNReluParam : public OpParam { ...@@ -1995,45 +1806,26 @@ class FusionDWConvBNReluParam : public OpParam {
#ifdef FUSION_CONVBNRELU_OP #ifdef FUSION_CONVBNRELU_OP
template <typename Dtype> template <typename Dtype>
class FusionConvBNReluParam : public OpParam { class FusionConvBNReluParam : public ConvParam<Dtype> {
typedef typename DtypeTensorTrait<Dtype>::gtype GType; typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType; typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public: public:
FusionConvBNReluParam(const VariableNameMap &inputs, FusionConvBNReluParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) { const AttributeMap &attrs, const Scope &scope)
filter_ = FilterFrom<GType>(inputs, scope); : ConvParam<Dtype>(inputs, outputs, attrs, scope) {
input_ = InputFrom<GType>(inputs, scope); output_ = OpParam::OutFrom<GType>(outputs, scope);
output_ = OutFrom<GType>(outputs, scope); input_bias_ = OpParam::InputBiasFrom<GType>(inputs, scope);
input_mean_ = OpParam::InputMeanFrom<GType>(inputs, scope);
strides_ = GetAttr<vector<int>>("strides", attrs); input_scale_ = OpParam::InputScaleFrom<GType>(inputs, scope);
paddings_ = GetAttr<vector<int>>("paddings", attrs); input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, scope);
dilations_ = GetAttr<vector<int>>("dilations", attrs); epsilon_ = OpParam::GetAttr<float>("epsilon", attrs);
groups = GetAttr<int>("groups", attrs); momentum_ = OpParam::GetAttr<float>("momentum", attrs);
input_bias_ = InputBiasFrom<GType>(inputs, scope); // is_test_ = OpParam::GetAttr<bool>("is_test", attrs);
input_mean_ = InputMeanFrom<GType>(inputs, scope);
input_scale_ = InputScaleFrom<GType>(inputs, scope);
input_variance_ = InputVarianceFrom<GType>(inputs, scope);
epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs);
// is_test_ = GetAttr<bool>("is_test", attrs);
} }
const RType *Input() const { return input_; }
const RType *Filter() const { return filter_; }
RType *Output() const { return output_; } RType *Output() const { return output_; }
const vector<int> &Strides() const { return strides_; }
const vector<int> &Paddings() const { return paddings_; }
const vector<int> &Dilations() const { return dilations_; }
const int &Groups() const { return groups; }
const RType *InputBias() const { return input_bias_; } const RType *InputBias() const { return input_bias_; }
const RType *InputMean() const { return input_mean_; } const RType *InputMean() const { return input_mean_; }
...@@ -2057,13 +1849,7 @@ class FusionConvBNReluParam : public OpParam { ...@@ -2057,13 +1849,7 @@ class FusionConvBNReluParam : public OpParam {
const RType *NewBias() const { return new_bias_; } const RType *NewBias() const { return new_bias_; }
protected: protected:
RType *input_;
RType *output_; RType *output_;
RType *filter_;
vector<int> strides_;
vector<int> paddings_;
vector<int> dilations_;
int groups;
RType *input_bias_; RType *input_bias_;
RType *input_mean_; RType *input_mean_;
RType *input_scale_; RType *input_scale_;
......
...@@ -34,11 +34,9 @@ void PReluOp<Dtype, T>::InferShape() const { ...@@ -34,11 +34,9 @@ void PReluOp<Dtype, T>::InferShape() const {
* */ * */
namespace ops = paddle_mobile::operators; namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
USE_OP_CPU(prelu);
REGISTER_OPERATOR_CPU(prelu, ops::PReluOp); REGISTER_OPERATOR_CPU(prelu, ops::PReluOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU #ifdef PADDLE_MOBILE_MALI_GPU
USE_OP_MALI_GPU(prelu);
REGISTER_OPERATOR_MALI_GPU(prelu, ops::PReluOp); REGISTER_OPERATOR_MALI_GPU(prelu, ops::PReluOp);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
......
...@@ -50,4 +50,14 @@ class PReluOp : public framework::OperatorWithKernel< ...@@ -50,4 +50,14 @@ class PReluOp : public framework::OperatorWithKernel<
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU(prelu);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
USE_OP_MALI_GPU(prelu);
#endif
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(prelu);
#endif
#endif #endif
...@@ -33,6 +33,27 @@ elseif("FPGAnets" IN_LIST NET) ...@@ -33,6 +33,27 @@ elseif("FPGAnets" IN_LIST NET)
ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h) ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-resnet paddle-mobile) target_link_libraries(test-resnet paddle-mobile)
ADD_EXECUTABLE(test-resnet50 fpga/test_resnet50.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-resnet50 paddle-mobile)
ADD_EXECUTABLE(test-fpga-EW fpga/test_fpga_EW.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-fpga-EW paddle-mobile)
ADD_EXECUTABLE(test-fpga-conv fpga/test_fpga_conv.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-fpga-conv paddle-mobile)
ADD_EXECUTABLE(test-fpga-pooling fpga/test_fpga_pooling.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-fpga-pooling paddle-mobile)
ADD_EXECUTABLE(test-fpga-bypass fpga/test_fpga_bypass.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-fpga-bypass paddle-mobile)
ADD_EXECUTABLE(test-fpga-softmax fpga/test_fpga_softmax.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-fpga-softmax paddle-mobile)
ADD_EXECUTABLE(test-fpga-concat fpga/test_fpga_concat.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-fpga-concat paddle-mobile)
ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h test_include.h executor_for_test.h) ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-tensor-quant paddle-mobile) target_link_libraries(test-tensor-quant paddle-mobile)
...@@ -242,13 +263,4 @@ else () ...@@ -242,13 +263,4 @@ else ()
#add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp) #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)
endif() endif()
# if(FPGA)
# ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h test_include.h executor_for_test.h)
# target_link_libraries(test-tensor-quant paddle-mobile)
# endif()
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_include.h"
static const char *g_resnet_combine = "../models/resnet50";
int main() {
DLOG << paddle_mobile::fpga::open_device();
paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
if (paddle_mobile.Load(std::string(g_resnet_combine) + "/model",
std::string(g_resnet_combine) + "/params", true)) {
std::vector<int64_t> dims{1, 3, 224, 224};
Tensor input_tensor;
SetupTensor<float>(&input_tensor, {1, 3, 224, 224}, static_cast<float>(0),
static_cast<float>(1));
std::vector<float> input(input_tensor.data<float>(),
input_tensor.data<float>() + input_tensor.numel());
paddle_mobile.FeedData(input_tensor);
paddle_mobile.Predict_To(-1);
// paddle_mobile.Predict_From(73);
// paddle_mobile.Predict_From_To(72, 73);
DLOG << "Computation done";
return 0;
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册