提交 83c54c9c 编写于 作者: R Ray Liu 提交者: GitHub

Merge pull request #1419 from codeWorm2015/metal_oc_pb

Metal oc pb
......@@ -85,18 +85,18 @@ struct Print {
private:
void print(LogLevel level) {
buffer_ << std::endl;
// buffer_ << std::endl;
if (level == kLOG_ERROR) {
std::cerr << buffer_.str();
std::cerr << buffer_.str() << std::endl;
} else {
std::cout << buffer_.str();
std::cout << buffer_.str() << std::endl;
}
}
std::ostringstream buffer_;
};
struct ToLog {
ToLog(LogLevel level = kLOG_DEBUG, const std::string &info = "")
explicit ToLog(LogLevel level = kLOG_DEBUG, const std::string &info = "")
: level_(level) {
unsigned blanks =
(unsigned)(level > kLOG_DEBUG ? (level - kLOG_DEBUG) * 4 : 1);
......@@ -175,11 +175,8 @@ struct Print {
friend struct ToLog;
template <typename T>
Print &operator<<(T const &value) {
Print p = Print();
return p;
return *this;
}
private:
};
struct ToLog {
......
......@@ -77,6 +77,12 @@ const char *G_OP_TYPE_CAST = "cast";
const char *G_OP_TYPE_LOG = "log";
const char *G_OP_TYPE_LOD_RESET = "lod_reset";
const char *G_OP_TYPE_LESS_THAN = "less_than";
const char *G_OP_TYPE_LOGICAL_AND = "logical_and";
const char *G_OP_TYPE_LOGICAL_OR = "logical_or";
const char *G_OP_TYPE_LOGICAL_NOT = "logical_not";
const char *G_OP_TYPE_LOGICAL_XOR = "logical_xor";
const char *G_OP_TYPE_WRITE_TO_ARRAY = "write_to_array";
const char *G_OP_TYPE_READ_FROM_ARRAY = "read_from_array";
const char *G_OP_TYPE_QUANTIZE = "quantize";
const char *G_OP_TYPE_DEQUANTIZE = "dequantize";
......@@ -181,5 +187,11 @@ std::unordered_map<
{G_OP_TYPE_NORM, {{"X"}, {"Out", "Norm"}}},
{G_OP_TYPE_LOG, {{"X"}, {"Out"}}},
{G_OP_TYPE_LOD_RESET, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_LESS_THAN, {{"X", "Y"}, {"Out"}}}};
{G_OP_TYPE_LESS_THAN, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_LOGICAL_AND, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_LOGICAL_OR, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_LOGICAL_XOR, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_LOGICAL_NOT, {{"X"}, {"Out"}}},
{G_OP_TYPE_WRITE_TO_ARRAY, {{"X", "I"}, {"Out"}}},
{G_OP_TYPE_READ_FROM_ARRAY, {{"X", "I"}, {"Out"}}}};
} // namespace paddle_mobile
......@@ -131,9 +131,12 @@ extern const char *G_OP_TYPE_FUSION_CONV_BN_ADD_RELU;
extern const char *G_OP_TYPE_FUSION_DWCONV_BN_RELU;
extern const char *G_OP_TYPE_FUSION_CONV_BN_RELU;
extern const char *G_OP_TYPE_GRU;
extern const char *G_OP_TYPE_GRU_UNIT;
extern const char *G_OP_TYPE_LRN;
extern const char *G_OP_TYPE_MUL;
extern const char *G_OP_TYPE_MULTICLASS_NMS;
extern const char *G_OP_TYPE_NORM;
extern const char *G_OP_TYPE_POOL2D;
extern const char *G_OP_TYPE_PRIOR_BOX;
extern const char *G_OP_TYPE_RELU;
......@@ -163,6 +166,12 @@ extern const char *G_OP_TYPE_CAST;
extern const char *G_OP_TYPE_LOG;
extern const char *G_OP_TYPE_LOD_RESET;
extern const char *G_OP_TYPE_LESS_THAN;
extern const char *G_OP_TYPE_LOGICAL_AND;
extern const char *G_OP_TYPE_LOGICAL_OR;
extern const char *G_OP_TYPE_LOGICAL_NOT;
extern const char *G_OP_TYPE_LOGICAL_XOR;
extern const char *G_OP_TYPE_WRITE_TO_ARRAY;
extern const char *G_OP_TYPE_READ_FROM_ARRAY;
extern const char *G_OP_TYPE_QUANTIZE;
extern const char *G_OP_TYPE_DEQUANTIZE;
......
......@@ -151,6 +151,30 @@ void format_dwconv_filter(framework::Tensor *filter_tensor, float *scale_ptr) {
filter_tensor->reset_data_ptr(new_data);
}
void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr,
int stride) {
auto dims = filter_tensor->dims();
auto num = dims[0], height = dims[2], width = dims[3];
auto data_ptr = filter_tensor->data<float>();
size_t memory_size = num * height * width * sizeof(float);
auto new_data = (float *)fpga_malloc(memory_size); // NOLINT
fpga_copy(new_data, data_ptr, memory_size);
int hw = height * width;
deconv_filter::deconv_NC_convert(&new_data, num, 1, hw);
num = dims[1];
int channel = dims[0];
deconv_filter::DWDconv_format_filter(&new_data, num, channel, height, width,
scale_ptr, stride);
// framework::DDim dims_new =
// framework::make_ddim({num, 1, height, width});
// filter_tensor->Resize(dims_new);
filter_tensor->reset_data_ptr(new_data);
}
void format_fc_filter(framework::Tensor *filter_tensor, float max_value) {
filter_tensor->scale[0] = float(max_value / 127.0); // NOLINT
filter_tensor->scale[1] = float(127.0 / max_value); // NOLINT
......@@ -243,6 +267,17 @@ void format_dwconv_data(framework::Tensor *filter_tensor,
format_bias_array(bias_ptr, channel);
format_fp16_ofm(ofm_tensor);
}
void format_DWDeconv_data(framework::Tensor *filter_tensor,
framework::Tensor *ofm_tensor, float **bs_ptr,
int group, int sub_conv_n) {
int channel = ofm_tensor->dims()[1];
// dw-deconv
format_DWDconv_filter(
filter_tensor,
(reinterpret_cast<float *>(*bs_ptr) + sub_conv_n * channel), sub_conv_n);
format_bias_array(bs_ptr, channel);
format_fp16_ofm(ofm_tensor);
}
void expand_conv_arg(ConvArgs *arg) {
ConvArgs args = *arg;
......@@ -770,6 +805,7 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
auto filter_ptr = filter->data<float>();
auto input_ptr = input->data<float>();
auto output_ptr = out->mutable_data<float>();
arg->sub_conv_num = 1;
arg->relu_enabled = relu_enabled;
arg->bias_address = bias_ptr;
arg->filter_address = filter_ptr;
......@@ -788,5 +824,109 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
arg->output.scale_address = out->scale;
} // end dwconv arg fill
void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int stride_h, int stride_w,
int padding_h, int padding_w, float *bias_ptr) {
auto filter_ptr = filter->data<float>();
auto input_ptr = input->data<float>();
auto output_ptr = out->mutable_data<float>();
auto deleter = [](void *p) { fpga_free(p); };
arg->group_num = (uint32_t)filter->dims()[0];
arg->sub_conv_num = (uint32_t)stride_w;
arg->filter_num = (uint32_t)filter->dims()[0];
int sub_conv_num = stride_w;
int sub_pad =
deconv_filter::deconv_calc_sub_pad((int)filter->dims()[3], // NOLINT
padding_w, stride_w);
auto sub_filter_width = (uint32_t)deconv_filter::deconv_get_sub_filter_axis(
(int)filter->dims()[3], stride_w); // NOLINT
auto sub_output_width = (uint32_t)deconv_filter::deconv_get_sub_out_axis(
(int)input->dims()[3], sub_pad, sub_filter_width); // NOLINT
auto sub_output_height = (uint32_t)deconv_filter::deconv_get_sub_out_axis(
(int)input->dims()[2], sub_pad, sub_filter_width); // NOLINT
arg->sub_output_width = (uint32_t)sub_output_width;
arg->sub_output_height = (uint32_t)sub_output_height;
arg->omit_size = (uint32_t)deconv_filter::deconv_get_omit(
stride_w, (int)filter->dims()[3], padding_w); // NOLINT
auto sub_channels = (int)input->dims()[1]; // NOLINT
uint32_t omit_size = arg->omit_size;
int real_out_width = sub_output_width * sub_conv_num - 2 * omit_size;
int real_out_height = sub_output_height * sub_conv_num - 2 * omit_size;
int sub_filter_num = sub_conv_num * (arg->filter_num);
framework::DDim dims_out_new = framework::make_ddim(
{1, arg->filter_num, real_out_height, real_out_width});
fpga::format_fp16_ofm(out, dims_out_new);
auto out_ptr = out->data<float>();
/*====For Addition
arg->output.address =
(half *)out_ptr + // NOLINT
omit_size * sizeof(half) *
(align_to_x(real_out_width * arg->filter_num, IMAGE_ALIGNMENT));
*/
arg->output.address = out_ptr;
arg->output.scale_address = out->scale;
int filter_offset = sub_filter_width * sub_filter_width *
align_to_x(sub_channels, FILTER_ELEMENT_ALIGNMENT) *
arg->sub_conv_num;
for (int i = 0; i < sub_conv_num; ++i) {
arg->dw_conv_args.push_back(std::make_shared<DWconvArgs>());
arg->dw_conv_args[i]->sub_conv_num = sub_conv_num;
arg->dw_conv_args[i]->relu_enabled = relu_enabled;
arg->dw_conv_args[i]->bias_address = bias_ptr;
arg->dw_conv_args[i]->filter_address =
fpga_malloc(filter_offset * sizeof(int16_t));
memcpy(arg->dw_conv_args[i]->filter_address,
(reinterpret_cast<half *>(filter_ptr) + i * filter_offset),
filter_offset * sizeof(int16_t));
arg->vector_dw_conv_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(arg->dw_conv_args[i]->filter_address),
deleter));
arg->dw_conv_args[i]->kernel.height = (uint32_t)sub_filter_width;
arg->dw_conv_args[i]->kernel.width = (uint32_t)sub_filter_width;
arg->dw_conv_args[i]->kernel.stride_h = (uint32_t)1;
arg->dw_conv_args[i]->kernel.stride_w = (uint32_t)1;
arg->dw_conv_args[i]->image.address = input_ptr;
arg->dw_conv_args[i]->image.channels = (uint32_t)input->dims()[1];
arg->dw_conv_args[i]->image.height = (uint32_t)input->dims()[2];
arg->dw_conv_args[i]->image.width = (uint32_t)input->dims()[3];
arg->dw_conv_args[i]->image.pad_height = sub_pad;
arg->dw_conv_args[i]->image.pad_width = sub_pad;
arg->dw_conv_args[i]->image.scale_address = input->scale;
arg->dw_conv_args[i]->output.address =
fpga_malloc(sub_output_height *
align_to_x(sub_output_width * sub_channels * sub_conv_num,
IMAGE_ALIGNMENT) *
sizeof(int16_t));
arg->dw_conv_args[i]->output.scale_address =
static_cast<float *>(fpga_malloc(2 * sizeof(float)));
arg->vector_dw_conv_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(arg->dw_conv_args[i]->output.address),
deleter));
arg->vector_dw_conv_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(arg->dw_conv_args[i]->output.scale_address),
deleter));
}
// arg->output.scale_address = out->scale;
} // end dwconv arg fill
} // namespace fpga
} // namespace paddle_mobile
......@@ -57,6 +57,10 @@ void fill_dwconv_arg(struct DWconvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int stride_h, int stride_w,
int padding_h, int padding_w, float* bias_ptr);
void fill_DWDeconv_arg(struct DWDeconvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int stride_h, int stride_w,
int padding_h, int padding_w, float* bs_ptr);
void format_deconv_filter(framework::Tensor* filter_tensor, float max_value,
int group_num, int stride);
......@@ -69,6 +73,10 @@ void format_deconv_data(framework::Tensor* filter_tensor,
void format_dwconv_data(framework::Tensor* filter_tensor,
framework::Tensor* ofm_tensor, float* scale_ptr,
float** bias_ptr);
void format_DWDeconv_data(framework::Tensor* filter_tensor,
framework::Tensor* ofm_tensor, float** bs_ptr,
int group, int sub_conv_n);
template <typename Dtype>
void savefile(std::string filename, void* buffer, int dataSize, Dtype tmp) {
float data;
......
......@@ -21,15 +21,6 @@ limitations under the License. */
#include "fpga/V1/api.h"
// #include "fpga_api.h"
// just for test
//#include <string>
//#include "deconv.h"
//#include "deconv_api.h"
// using namespace std;
// using namespace paddle_mobile::fpga;
// using namespace baidu::fpga::deconv::api;
// namespace api = baidu::fpga::deconv::api;
namespace paddle_mobile {
namespace fpga {
namespace deconv_filter {
......@@ -42,7 +33,8 @@ void deconv_inverse_filter(float** data_in, int num, int channel, int width,
float* tmp = *data_in;
int data_size = num * channel * width * height;
int hw_len = height * width;
auto tmp_data = (float*)fpga_malloc(data_size * sizeof(float));
auto tmp_data =
reinterpret_cast<float*>(fpga_malloc(data_size * sizeof(float)));
for (int i = 0; i < num; ++i) {
for (int j = 0; j < channel; ++j) {
for (int k = 0; k < hw_len; ++k) {
......@@ -97,9 +89,10 @@ int deconv_get_omit(int stride, int filter_width, int pad) {
return (stride - idx);
}
void deconv_get_sub_filter(char** data_in, int height, int width,
int sub_conv_n, int kernel_num, int channel) {
char* ptr_tmp = *data_in;
template <typename T>
void deconv_get_sub_filter(T** data_in, int height, int width, int sub_conv_n,
int kernel_num, int channel) {
T* ptr_tmp = *data_in;
int sub_num = kernel_num * sub_conv_n;
int sub_h = height / sub_conv_n;
int sub_w = width / sub_conv_n;
......@@ -107,7 +100,8 @@ void deconv_get_sub_filter(char** data_in, int height, int width,
int sub_filter_size =
kernel_num * sub_h * sub_w * channel * sub_conv_n * sub_conv_n;
char* ptr_sub_filter = (char*)fpga_malloc(sub_filter_size * sizeof(char));
T* ptr_sub_filter =
reinterpret_cast<T*>(fpga_malloc(sub_filter_size * sizeof(T)));
for (int idx = 0; idx < sub_conv_n; ++idx) {
for (int nn = 0; nn < sub_num; ++nn) {
int ni = nn % kernel_num;
......@@ -124,7 +118,7 @@ void deconv_get_sub_filter(char** data_in, int height, int width,
fpga_copy(
ptr_sub_filter + idx * sub_h * sub_w * channel * sub_num + sidx,
(*data_in) + kidx, channel * sizeof(char));
(*data_in) + kidx, channel * sizeof(T));
// for (int cc =0; cc < channel; ++cc) {
// ptr_sub_filter[idx*sub_h*sub_w*channel*sub_num + sidx + cc] =
// (*data_in)[kidx + cc];
......@@ -140,7 +134,7 @@ void deconv_get_sub_filter(char** data_in, int height, int width,
void deconv_NC_convert(float** filter_in, int kernel_num, int channels,
int hw) {
float* tmp = *filter_in;
float* ptr_filter = (float*)(paddle_mobile::fpga::fpga_malloc(
float* ptr_filter = reinterpret_cast<float*>(paddle_mobile::fpga::fpga_malloc(
hw * kernel_num * channels * sizeof(float)));
for (int c = 0; c < channels; ++c) {
......@@ -188,7 +182,8 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
result2);
}*/
deconv_get_sub_filter(quantize_data, height, width, stride, num, channel);
deconv_get_sub_filter<char>(quantize_data, height, width, stride, num,
channel);
/*{
char result2 = (char)0;
string filename = "sub_filter_filter_data";
......@@ -212,10 +207,12 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
((residual == 0) ? div_num : (div_num - 1)) +
align_to_x(residual, FILTER_NUM_ALIGNMENT);
char** ptr_ptr_data = (char**)fpga_malloc(sub_conv_n * sizeof(char*));
char** ptr_ptr_data =
reinterpret_cast<char**>(fpga_malloc(sub_conv_n * sizeof(char*)));
int origin_offset = sub_chw * sub_num;
for (int i = 0; i < sub_conv_n; ++i) {
(ptr_ptr_data)[i] = (char*)fpga_malloc(origin_offset * sizeof(char));
(ptr_ptr_data)[i] =
reinterpret_cast<char*>(fpga_malloc(origin_offset * sizeof(char)));
fpga_copy((ptr_ptr_data)[i], (*quantize_data) + origin_offset * i,
origin_offset * sizeof(char));
......@@ -233,8 +230,8 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
int align_offset =
align_to_x(sub_chw, FILTER_ELEMENT_ALIGNMENT) * num_after_alignment;
char* ptr_space = (char*)fpga_malloc(sub_conv_n * align_offset *
sizeof(char)); // continuous space
char* ptr_space = reinterpret_cast<char*>(fpga_malloc(
sub_conv_n * align_offset * sizeof(char))); // continuous space
for (int i = 0; i < sub_conv_n; ++i) {
char* ptr_tmp = (ptr_ptr_data)[i];
......@@ -251,7 +248,7 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
fpga_copy(ptr_space + i * align_offset, ptr_tmp, align_offset);
fpga_free(ptr_tmp);
}
*data_in = (float*)ptr_space;
*data_in = reinterpret_cast<float*>(ptr_space);
/* {
char result2 = (char)0;
......@@ -262,6 +259,22 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
fpga_flush(ptr_space, sub_conv_n * align_offset * sizeof(char));
}
void DWDconv_format_filter(float** data_in, int num, int channel, int height,
int width, float* scale_ptr, int stride) {
deconv_inverse_filter(data_in, num, channel, width, height);
filter::quantize_to_fp16(data_in, channel, height, width, scale_ptr);
int16_t** quantize_data = (int16_t**)data_in; // NOLINT
filter::convert_to_hwn(quantize_data, channel, height, width);
deconv_get_sub_filter<int16_t>(quantize_data, height, width, stride, num,
channel);
filter::align_element_n(quantize_data, channel, height, width);
fpga_flush(*quantize_data, align_to_x(channel, FILTER_ELEMENT_ALIGNMENT) *
height * width * sizeof(int16_t));
}
} // namespace deconv_filter
} // namespace fpga
} // namespace paddle_mobile
......@@ -24,11 +24,15 @@ int deconv_calc_sub_pad(int filter_axis, int pad, int stride);
int deconv_get_sub_filter_axis(int filter_axis, int stride);
int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis);
int deconv_get_omit(int stride, int filter_width, int pad);
void deconv_get_sub_filter(char** data_in, int height, int width,
int sub_conv_n, int kernel_num, int channel);
template <typename T>
void deconv_get_sub_filter(T** data_in, int height, int width, int sub_conv_n,
int kernel_num, int channel);
void deconv_format_filter(float** data_in, int num, int channel, int height,
int width, int group_num, float max, int stride);
void deconv_NC_convert(float** filter_in, int kernel_num, int channels, int hw);
void DWDconv_format_filter(float** data_in, int num, int channel, int height,
int width, float* scale_ptr, int stride);
} // namespace deconv_filter
} // namespace fpga
......
......@@ -346,6 +346,16 @@ void format_dwconv_filter(float **data_in, int num, int height, int width,
fpga_flush(*quantize_data, align_to_x(num, FILTER_ELEMENT_ALIGNMENT) *
height * width * sizeof(int16_t));
}
void format_DWDeconv_filter(float **data_in, int num, int height, int width,
float *scale_ptr) {
quantize_to_fp16(data_in, num, height, width, scale_ptr);
int16_t **quantize_data = (int16_t **)data_in; // NOLINT
convert_to_hwn(quantize_data, num, height, width);
align_element_n(quantize_data, num, height, width);
fpga_flush(*quantize_data, align_to_x(num, FILTER_ELEMENT_ALIGNMENT) *
height * width * sizeof(int16_t));
}
} // namespace filter
} // namespace fpga
} // namespace paddle_mobile
......@@ -18,7 +18,6 @@ limitations under the License. */
#include "fpga/V1/image.h"
#include "fpga/common/config.h"
#include "fpga/common/driver.h"
#ifdef COST_TIME_PRINT
#include <sys/time.h>
#include <time.h>
......@@ -163,6 +162,7 @@ using namespace std; // NOLINT
#define REG_DWCONV_FILTER_BASE_ADDR 0xe08
#define REG_DWCONV_FILTER_SHAPE 0xe10
#define REG_DWCONV_FILTER_N_ALIGN 0xe18
#define REG_DWCONV_FILTER_SUBNUMBER 0xe20
#define REG_DWCONV_CMD 0xe00
int ComputeFpgaConv(const struct SplitConvArgs &args) {
......@@ -591,6 +591,20 @@ int PerformBypass(const struct BypassArgs &args) {
return 0;
} // PerformBypass
uint64_t FPGAVersion() {
#ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeFpgaBypass===========";
#endif
#ifdef PADDLE_MOBILE_ZU5
uint64_t fpga_ver = 0;
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
fpga_ver = reg_readq(REG_HARDWARE_STATUS);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return fpga_ver;
#endif
return 0;
} // FPGAVersion
int ComputeFPGAConcat(const struct ConcatArgs &args) {
#ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeFpgaConcat===========";
......@@ -655,6 +669,45 @@ void deconv_post_process(const struct DeconvArgs &args) {
fpga_flush(args.output.address,
num * align_deconv_row_len * deconv_h * sizeof(int16_t));
}
void DWDeconv_post_process(const struct DWDeconvArgs &args) {
int sub_conv_n = args.sub_conv_num;
int sub_height = args.sub_output_height;
int sub_width = args.sub_output_width;
int omit_size = args.omit_size;
int channel = args.filter_num;
int num = 1;
int origin_h = sub_height * sub_conv_n;
int origin_w = sub_width * sub_conv_n;
int align_origin_w = align_to_x(origin_w * channel, IMAGE_ALIGNMENT);
int deconv_h = origin_h - 2 * omit_size;
int deconv_w = origin_w - 2 * omit_size;
int deconv_row_len = deconv_w * channel;
int align_deconv_row_len = align_to_x(deconv_row_len, IMAGE_ALIGNMENT);
for (int idx = 0; idx < sub_conv_n; ++idx) {
paddle_mobile::fpga::fpga_invalidate(
args.dw_conv_args[idx]->output.address,
align_origin_w * origin_h * sizeof(int16_t));
}
int deconv_idx = 0;
for (int nn = 0; nn < num; ++nn) {
for (int hh = 0; hh < origin_h; ++hh) {
int hx = (hh % sub_conv_n);
auto sub_t = (int16_t *)(args.dw_conv_args[sub_conv_n - hx - 1] // NOLINT
->output.address);
int hi = (hh / sub_conv_n);
if ((hh < omit_size) || (hh >= (origin_h - omit_size))) continue;
int sidx = (nn * origin_h * align_origin_w + hi * align_origin_w +
omit_size * channel);
fpga_copy((int16_t *)(args.output.address) + deconv_idx, // NOLINT
sub_t + sidx, sizeof(int16_t) * deconv_row_len); // NOLINT
deconv_idx += align_deconv_row_len;
}
}
fpga_flush(args.output.address,
num * align_deconv_row_len * deconv_h * sizeof(int16_t));
}
int ComputeFpgaDeconv(const struct DeconvArgs &args) {
#ifdef FPGA_PRINT_MODE
......@@ -792,17 +845,21 @@ int ComputeDWConv(const struct DWconvArgs &args) {
align_to_x((uint64_t)args.image.channels, IMAGE_ALIGNMENT);
uint64_t filter_amount_per_row_align =
filter_N_align * (uint64_t)args.kernel.width;
uint64_t filter_amount_align = filter_N_align * (uint64_t)args.kernel.width *
(uint64_t)args.kernel.height;
uint64_t sub_filter_amount_align = filter_N_align *
(uint64_t)args.kernel.width *
(uint64_t)args.kernel.height;
uint64_t filter_amount_align =
sub_filter_amount_align * (uint64_t)args.sub_conv_num;
uint32_t output_height = (uint32_t)(
(args.image.height + args.image.pad_height * 2 - args.kernel.height) /
args.kernel.stride_h +
1);
uint32_t output_width = (uint32_t)(
(args.image.width + args.image.pad_width * 2 - args.kernel.width) /
args.kernel.stride_w +
1);
((args.image.width + args.image.pad_width * 2 - args.kernel.width) /
args.kernel.stride_w +
1) *
args.sub_conv_num);
uint64_t image_amount_per_row =
align_to_x((uint64_t)args.image.width * (uint64_t)args.image.channels,
......@@ -845,12 +902,15 @@ int ComputeDWConv(const struct DWconvArgs &args) {
/*restart scale*/
reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(image_physical_address, REG_POOLING_IMAGE_BASE_ADDR);
reg_writeq(output_physical_address, REG_POOLING_RESULT_BASE_ADDR);
reg_writeq((bias_physical_address << 32 | filter_physical_address),
REG_DWCONV_FILTER_BASE_ADDR);
reg_writeq(filter_amount_per_row_align | (filter_amount_align << 32),
REG_DWCONV_FILTER_SHAPE);
reg_writeq(sub_filter_amount_align | (((uint64_t)args.sub_conv_num) << 32),
REG_DWCONV_FILTER_SUBNUMBER);
reg_writeq(filter_N_align, REG_DWCONV_FILTER_N_ALIGN);
reg_writeq(
......@@ -904,10 +964,88 @@ int ComputeDWConv(const struct DWconvArgs &args) {
output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
DLOG << "output_scale:" << output_scale;
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret;
#endif
return 0;
}
int ComputeDWDeconv(const struct DWDeconvArgs &args) {
#ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeFPGADeConv===========";
DLOG << " filter_num:" << args.filter_num
<< " group_num:" << args.group_num << "omit_size:" << args.omit_size
<< "sub_output_width: " << args.sub_output_width
<< "sub_output_height: " << args.sub_output_height
<< " sub_conv_num:" << args.sub_conv_num;
DLOG << "args.output.address: " << args.output.address
<< "args.output.scale_address: " << args.output.scale_address;
#endif
int sub_conv_num = args.sub_conv_num;
#ifdef COST_TIME_PRINT
timeval start, end;
long dif_sec, dif_usec; // NOLINT
#endif
for (int i = 0; i < sub_conv_num; i++) {
#ifdef COST_TIME_PRINT
gettimeofday(&start, NULL);
#endif
ComputeDWConv(*args.dw_conv_args[i]);
#ifdef COST_TIME_PRINT
gettimeofday(&end, NULL);
dif_sec = end.tv_sec - start.tv_sec;
dif_usec = end.tv_usec - start.tv_usec;
std::cout << "deconv basic_conv: " << i << " times: "
<< " cost time: " << (dif_sec * 1000000 + dif_usec) << "us"
<< std::endl;
#endif
}
if (sub_conv_num > 1) {
float max_scale = -1.0f;
#ifdef COST_TIME_PRINT
gettimeofday(&start, NULL);
#endif
for (int i = 0; i < sub_conv_num; i++) {
paddle_mobile::fpga::fpga_invalidate(
args.dw_conv_args[i]->output.scale_address, 2 * sizeof(float));
float ptr_scale = (args.dw_conv_args[i]->output.scale_address)[0];
if (ptr_scale > max_scale) {
args.output.scale_address[0] = ptr_scale;
args.output.scale_address[1] =
(args.dw_conv_args[i]->output.scale_address)[1];
}
}
#ifdef COST_TIME_PRINT
gettimeofday(&end, NULL);
dif_sec = end.tv_sec - start.tv_sec;
dif_usec = end.tv_usec - start.tv_usec;
std::cout << "deconv scale "
<< " cost time: " << (dif_sec * 1000000 + dif_usec) << "us"
<< std::endl;
#endif
}
#ifdef COST_TIME_PRINT
gettimeofday(&start, NULL);
#endif
DWDeconv_post_process(args);
#ifdef COST_TIME_PRINT
gettimeofday(&end, NULL);
dif_sec = end.tv_sec - start.tv_sec;
dif_usec = end.tv_usec - start.tv_usec;
std::cout << "deconv_post_process "
<< " cost time: " << (dif_sec * 1000000 + dif_usec) << "us"
<< std::endl;
#endif
return 0;
} // ComputeFpgaDeconv
} // namespace fpga
} // namespace paddle_mobile
......@@ -76,7 +76,7 @@ int32_t convertmantissa(int32_t i) {
}
float fp16_2_fp32(int16_t fp16_num) {
int16_t se_fp16 = fp16_num >> 10;
int16_t se_fp16 = (fp16_num >> 10) & 0x3f;
int16_t m_fp16 = fp16_num & 0x3ff;
int32_t e_fp32 = 0;
int16_t offset = 0;
......@@ -94,7 +94,7 @@ float fp16_2_fp32(int16_t fp16_num) {
e_fp32 = 0x80000000;
offset = 0;
} else if (se_fp16 < 63) {
e_fp32 = 0x80000000 + (se_fp16 - 32) << 23;
e_fp32 = 0x80000000 + ((se_fp16 - 32) << 23);
offset = 1024;
} else { // se_fp16 == 63
e_fp32 = 0xC7800000;
......
......@@ -229,6 +229,7 @@ struct DeconvArgs {
std::vector<std::shared_ptr<SplitConvArgs>> split_conv_args;
};
struct DWconvArgs {
uint32_t sub_conv_num;
bool relu_enabled;
void* bias_address;
void* filter_address;
......@@ -236,6 +237,19 @@ struct DWconvArgs {
struct ImageInputArgs image;
struct ImageOutputArgs output;
};
struct DWDeconvArgs {
uint32_t sub_conv_num;
uint32_t group_num;
uint32_t filter_num;
uint32_t omit_size;
uint32_t sub_output_width;
uint32_t sub_output_height;
struct ImageOutputArgs output;
std::vector<std::shared_ptr<DWconvArgs>> dw_conv_args;
std::vector<std::shared_ptr<char>> vector_dw_conv_space;
};
// static inline int align_to_x(int num, int x) { return (num + x - 1) / x * x;
// }
static inline uint32_t align_to_x(int64_t num, int64_t x) {
......
......@@ -18,6 +18,7 @@ limitations under the License. */
namespace paddle_mobile {
namespace fpga {
uint64_t FPGAVersion();
int PerformBypass(const struct BypassArgs& args);
int ComputeBasicConv(const struct ConvArgs& args);
int ComputeFpgaPool(const struct PoolingArgs& args);
......@@ -28,5 +29,7 @@ int ComputeFPGAConcat(const struct ConcatArgs& args);
int ComputeFPGASplit(const struct SplitArgs& args);
int ComputeFpgaDeconv(const struct DeconvArgs& args);
int ComputeDWConv(const struct DWconvArgs& args);
int ComputeDWDeconv(const struct DWDeconvArgs& args);
} // namespace fpga
} // namespace paddle_mobile
......@@ -90,6 +90,10 @@ class Attribute {
attr.Set<int64_t>(attr_desc->l);
break;
}
case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK: {
attr.Set<int>(attr_desc->block_idx);
break;
}
default:
PADDLE_MOBILE_THROW_EXCEPTION("attr type not support");
}
......
......@@ -65,6 +65,7 @@ Executor<Device, T>::Executor(const Program<Device> &program,
for (int j = 0; j < ops.size(); ++j) {
std::shared_ptr<OpDesc> op_desc = ops[j];
DLOG << "create op: " << op_desc->Type();
auto op_handler = OpRegistry<Device>::CreateOp(
op_desc->Type(), op_desc->GetInputs(), op_desc->GetOutputs(),
op_desc->GetAttrMap(), program_.scope);
......
......@@ -168,6 +168,9 @@ LOAD_FUSION_MATCHER(fusion_conv_bn_relu);
#ifdef GRU_OP
LOAD_OP1(gru, CPU);
#endif
#ifdef GRU_UNIT_OP
LOAD_OP1(gru_unit, CPU);
#endif
#ifdef FUSION_CONVADDBN_OP
LOAD_OP2(fusion_conv_add_bn, CPU, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_add_bn);
......@@ -189,6 +192,9 @@ LOAD_OP1(crf_decoding, CPU);
#ifdef MUL_OP
LOAD_OP2(mul, CPU, MALI_GPU);
#endif
#ifdef NORM_OP
LOAD_OP1(norm, CPU);
#endif
#ifdef RELU_OP
LOAD_OP2(relu, CPU, MALI_GPU);
LOAD_OP1(relu6, CPU);
......@@ -279,3 +285,24 @@ LOAD_OP1(lod_reset, CPU);
#ifdef LESS_THAN_OP
LOAD_OP1(less_than, CPU);
#endif
#ifdef LOGICAL_AND_OP
LOAD_OP1(logical_and, CPU);
#endif
#ifdef LOGICAL_OR_OP
LOAD_OP1(logical_or, CPU);
#endif
#ifdef LOGICAL_NOT_OP
LOAD_OP1(logical_not, CPU);
#endif
#ifdef LOGICAL_XOR_OP
LOAD_OP1(logical_xor, CPU);
#endif
#ifdef WHILE_OP
LOAD_OP1(while, CPU);
#endif
#ifdef WRITE_TO_ARRAY_OP
LOAD_OP1(write_to_array, CPU);
#endif
#ifdef READ_FROM_ARRAY_OP
LOAD_OP1(read_from_array, CPU);
#endif
......@@ -176,6 +176,8 @@ LoDTensor LodExpand(const LoDTensor &source, const LoD &lod, size_t level) {
return tensor;
}
using LoDTensorArray = std::vector<LoDTensor>;
// Get the absolute offset of a lod[start_level][start_idx:end_idx] and
// relative length of details for every levels(i.e., [start_level: ]).
//
......
......@@ -41,9 +41,7 @@ OpDesc::OpDesc(PaddleMobile__Framework__Proto__OpDesc *desc) {
for (int k = 0; k < desc->n_attrs; ++k) {
PaddleMobile__Framework__Proto__OpDesc__Attr *attr = desc->attrs[k];
std::string attr_name(attr->name);
if (attr->type != PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK) {
attrs_[attr_name] = Attribute::GetAttrValue(attr);
}
attrs_[attr_name] = Attribute::GetAttrValue(attr);
}
}
......
......@@ -44,6 +44,11 @@
*/
@property (assign, nonatomic) BOOL optimize;
/**
@b 是否预测时初始化内存,用于处理可变输入
*/
@property (assign, nonatomic) BOOL loadWhenPredict;
@end
@interface PaddleMobileCPU : NSObject
......
......@@ -73,6 +73,8 @@ static std::mutex shared_mutex;
- (instancetype)initWithConfig:(PaddleMobileCPUConfig *)config {
if (self = [super init]) {
paddle_mobile::PaddleMobileConfigInternal configInternal;
configInternal.load_when_predict = config.loadWhenPredict;
pam_ = new paddle_mobile::PaddleMobile<paddle_mobile::CPU, float>();
_config = config;
}
......
......@@ -7,15 +7,7 @@ public class PML {
* @param modelDir model dir
* @return isloadsuccess
*/
public static native boolean load(String modelDir);
/**
* load seperated model
*
* @param modelDir model dir
* @return isloadsuccess
*/
public static native boolean loadnlp(String modelDir);
public static native boolean load(String modelDir, Boolean lodMode);
/**
* load combined model
......@@ -24,7 +16,7 @@ public class PML {
* @param paramPath param file path
* @return isloadsuccess
*/
public static native boolean loadCombined(String modelPath, String paramPath);
public static native boolean loadCombined(String modelPath, String paramPath, Boolean lodMode);
/**
* load model and qualified params
......@@ -32,7 +24,7 @@ public class PML {
* @param modelDir qualified model dir
* @return isloadsuccess
*/
public static native boolean loadQualified(String modelDir);
public static native boolean loadQualified(String modelDir, Boolean lodMode);
/**
* load model and qualified combined params
......@@ -41,7 +33,7 @@ public class PML {
* @param paramPath qualified param path
* @return isloadsuccess
*/
public static native boolean loadCombinedQualified(String modelPath, String paramPath);
public static native boolean loadCombinedQualified(String modelPath, String paramPath, Boolean lodMode);
/**
* predict image
......@@ -52,9 +44,12 @@ public class PML {
*/
public static native float[] predictImage(float[] buf, int[] ddims);
public static native float[] predictYuv(byte[] buf, int imgWidth, int imgHeight, int[] ddims, float[] meanValues);
// predict with variable length input
// support only one input and one output currently
public static native float[] predictLod(float[] buf);
/**
* clear model data
*/
......@@ -66,6 +61,4 @@ public class PML {
* @param threadCount threadCount
*/
public static native void setThread(int threadCount);
}
......@@ -39,7 +39,7 @@ using framework::Tensor;
using paddle_mobile::CPU;
using std::string;
extern const char *ANDROID_LOG_TAG =
const char *ANDROID_LOG_TAG =
"paddle_mobile LOG built on " __DATE__ " " __TIME__;
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
static std::mutex shared_mutex;
......@@ -55,51 +55,31 @@ string jstring2cppstring(JNIEnv *env, jstring jstr) {
JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_load(JNIEnv *env,
jclass thiz,
jstring modelPath) {
jstring modelPath,
jboolean lodMode) {
std::lock_guard<std::mutex> lock(shared_mutex);
ANDROIDLOGI("load invoked");
bool optimize = true;
bool isLoadOk = false;
#ifdef ENABLE_EXCEPTION
try {
isLoadOk = getPaddleMobileInstance()->Load(
jstring2cppstring(env, modelPath), optimize);
jstring2cppstring(env, modelPath), optimize, false, 1,
static_cast<bool>(lodMode));
} catch (paddle_mobile::PaddleMobileException &e) {
ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
isLoadOk = false;
}
#else
isLoadOk = getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
optimize);
#endif
return static_cast<jboolean>(isLoadOk);
}
JNIEXPORT jboolean JNICALL
Java_com_baidu_paddle_PML_loadnlp(JNIEnv *env, jclass thiz, jstring modelPath) {
std::lock_guard<std::mutex> lock(shared_mutex);
ANDROIDLOGI("load invoked");
bool optimize = true;
bool isLoadOk = false;
#ifdef ENABLE_EXCEPTION
try {
isLoadOk = getPaddleMobileInstance()->Load(
jstring2cppstring(env, modelPath), optimize, false, 1, true);
} catch (paddle_mobile::PaddleMobileException &e) {
ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
isLoadOk = false;
}
#else
isLoadOk = getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
optimize, false, 1, true);
optimize, false, 1,
static_cast<bool>(lodMode));
#endif
return static_cast<jboolean>(isLoadOk);
}
JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_loadQualified(
JNIEnv *env, jclass thiz, jstring modelPath) {
JNIEnv *env, jclass thiz, jstring modelPath, jboolean lodMode) {
std::lock_guard<std::mutex> lock(shared_mutex);
ANDROIDLOGI("loadQualified invoked");
......@@ -110,21 +90,24 @@ JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_loadQualified(
#ifdef ENABLE_EXCEPTION
try {
isLoadOk = getPaddleMobileInstance()->Load(
jstring2cppstring(env, modelPath), optimize, qualified);
jstring2cppstring(env, modelPath), optimize, qualified, 1,
static_cast<bool>(lodMode));
} catch (paddle_mobile::PaddleMobileException &e) {
ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
isLoadOk = false;
}
#else
isLoadOk = getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
optimize, qualified);
optimize, qualified, 1,
static_cast<bool>(lodMode));
#endif
return static_cast<jboolean>(isLoadOk);
}
JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_loadCombined(
JNIEnv *env, jclass thiz, jstring modelPath, jstring paramPath) {
JNIEnv *env, jclass thiz, jstring modelPath, jstring paramPath,
jboolean lodMode) {
std::lock_guard<std::mutex> lock(shared_mutex);
ANDROIDLOGI("loadCombined invoked");
bool optimize = true;
......@@ -134,21 +117,22 @@ JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_loadCombined(
try {
isLoadOk = getPaddleMobileInstance()->Load(
jstring2cppstring(env, modelPath), jstring2cppstring(env, paramPath),
optimize);
optimize, false, 1, static_cast<bool>(lodMode));
} catch (paddle_mobile::PaddleMobileException &e) {
ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
isLoadOk = false;
}
#else
isLoadOk = getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
jstring2cppstring(env, paramPath),
optimize);
isLoadOk = getPaddleMobileInstance()->Load(
jstring2cppstring(env, modelPath), jstring2cppstring(env, paramPath),
optimize, false, 1, static_cast<bool>(lodMode));
#endif
return static_cast<jboolean>(isLoadOk);
}
JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_loadCombinedQualified(
JNIEnv *env, jclass thiz, jstring modelPath, jstring paramPath) {
JNIEnv *env, jclass thiz, jstring modelPath, jstring paramPath,
jboolean lodMode) {
std::lock_guard<std::mutex> lock(shared_mutex);
ANDROIDLOGI("loadCombinedQualified invoked");
bool optimize = true;
......@@ -159,15 +143,15 @@ JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_loadCombinedQualified(
try {
isLoadOk = getPaddleMobileInstance()->Load(
jstring2cppstring(env, modelPath), jstring2cppstring(env, paramPath),
optimize, qualified);
optimize, qualified, 1, static_cast<bool>(lodMode));
} catch (paddle_mobile::PaddleMobileException &e) {
ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
isLoadOk = false;
}
#else
isLoadOk = getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
jstring2cppstring(env, paramPath),
optimize, qualified);
isLoadOk = getPaddleMobileInstance()->Load(
jstring2cppstring(env, modelPath), jstring2cppstring(env, paramPath),
optimize, qualified, 1, static_cast<bool>(lodMode));
#endif
return static_cast<jboolean>(isLoadOk);
}
......
......@@ -26,24 +26,27 @@ namespace jni {
*/
JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_load(JNIEnv *env,
jclass thiz,
jstring modelPath);
jstring modelPath,
jboolean lodMode);
/**
* load separated qualified model for android
*/
JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_loadQualified(
JNIEnv *env, jclass thiz, jstring modelPath);
JNIEnv *env, jclass thiz, jstring modelPath, jboolean lodMode);
/**
* load combined model for android
*/
JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_loadCombined(
JNIEnv *env, jclass thiz, jstring modelPath, jstring paramPath);
JNIEnv *env, jclass thiz, jstring modelPath, jstring paramPath,
jboolean lodMode);
/**
* load combined qualified model for android
*/
JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_loadCombinedQualified(
JNIEnv *env, jclass thiz, jstring modelPath, jstring paramPath);
JNIEnv *env, jclass thiz, jstring modelPath, jstring paramPath,
jboolean lodMode);
/**
* object detection for anroid
......@@ -61,8 +64,8 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv(
/**
* object detection for anroid
*/
JNIEXPORT jfloatArray JNICALL
Java_com_baidu_paddle_PML_predict(JNIEnv *env, jclass thiz, jfloatArray buf);
JNIEXPORT jlongArray JNICALL
Java_com_baidu_paddle_PML_predictLod(JNIEnv *env, jclass thiz, jlongArray buf);
/**
* setThreadCount for multithread
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/controlflow/tensor_array_read_write_op.h"
namespace paddle_mobile {
namespace operators {
#ifdef WRITE_TO_ARRAY_OP
template <typename Dtype, typename T>
void WriteToArrayOp<Dtype, T>::InferShape() const {}
#endif // WRITE_TO_ARRAY_OP
#ifdef READ_FROM_ARRAY_OP
template <typename Dtype, typename T>
void ReadFromArrayOp<Dtype, T>::InferShape() const {}
#endif // READ_FROM_ARRAY_OP
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef WRITE_TO_ARRAY_OP
REGISTER_OPERATOR_CPU(write_to_array, ops::WriteToArrayOp);
#endif // WRITE_TO_ARRAY_OP
#ifdef READ_FROM_ARRAY_OP
REGISTER_OPERATOR_CPU(read_from_array, ops::ReadFromArrayOp);
#endif // READ_FROM_ARRAY_OP
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "framework/operator.h"
#include "operators/kernel/tensor_array_read_write_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
#ifdef WRITE_TO_ARRAY_OP
DECLARE_OPERATOR(WriteToArray, WriteToArrayParam, WriteToArrayKernel);
#endif // WRITE_TO_ARRAY_OP
#ifdef READ_FROM_ARRAY_OP
DECLARE_OPERATOR(ReadFromArray, ReadFromArrayParam, ReadFromArrayKernel);
#endif // WRITE_TO_ARRAY_OP
} // namespace operators
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/controlflow/while_op.h"
namespace paddle_mobile {
namespace operators {
#ifdef WHILE_OP
template <typename Dtype, typename T>
void WhileOp<Dtype, T>::InferShape() const {
// TODO(hjchen2)
}
#endif // WHILE_OP
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef WHILE_OP
REGISTER_OPERATOR_CPU(while, ops::WhileOp);
#endif // WHILE_OP
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "framework/operator.h"
#include "operators/kernel/while_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
#ifdef WHILE_OP
DECLARE_OPERATOR(While, WhileParam, WhileKernel);
#endif // WHILE_OP
} // namespace operators
} // namespace paddle_mobile
......@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVADDBNRELU_OP
#include "operators/kernel/conv_add_bn_relu_kernel.h"
#include <math.h>
#include <cmath>
#include "operators/kernel/central-arm-func/conv_add_bn_relu_arm_func.h"
namespace paddle_mobile {
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVBNADDRELU_OP
#include "operators/kernel/conv_bn_add_relu_kernel.h"
#include <cmath>
#include "operators/kernel/central-arm-func/conv_bn_add_relu_arm_func.h"
namespace paddle_mobile {
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVBNRELU_OP
#include "operators/kernel/conv_bn_relu_kernel.h"
#include <cmath>
#include "operators/kernel/central-arm-func/conv_bn_relu_arm_func.h"
namespace paddle_mobile {
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef FUSION_DWCONVBNRELU_OP
#include "operators/kernel/dwconv_bn_relu_kernel.h"
#include <cmath>
#include "operators/kernel/central-arm-func/dwconv_bn_relu_arm_func.h"
namespace paddle_mobile {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/logical_kernel.h"
namespace paddle_mobile {
namespace operators {
template <typename T>
struct LogicalAndFunctor {
bool operator()(const T& a, const T& b) const { return a && b; }
};
template <typename T>
struct LogicalOrFunctor {
bool operator()(const T& a, const T& b) const { return a || b; }
};
template <typename T>
struct LogicalNotFunctor {
bool operator()(const T& a) const { return !a; }
};
template <typename T>
struct LogicalXorFunctor {
bool operator()(const T& a, const T& b) const {
return (a || b) && !(a && b);
}
};
template <typename T, typename Functor>
void UnaryLogicalCompute(const Tensor* inputX, Tensor* output) {
Functor func;
std::transform(inputX->data<T>(), inputX->data<T>() + inputX->numel(),
output->data<T>(), func);
}
template <typename T, typename Functor>
void BinaryLogicalCompute(const Tensor* inputX, const Tensor* inputY,
Tensor* output) {
Functor func;
std::transform(inputX->data<T>(), inputX->data<T>() + inputX->numel(),
inputY->data<T>(), output->data<T>(), func);
}
#ifdef LOGICAL_AND_OP
template <>
bool LogicalAndKernel<CPU, float>::Init(LogicalBinaryParam<CPU>* param) {
return true;
}
template <>
void LogicalAndKernel<CPU, float>::Compute(
const LogicalBinaryParam<CPU>& param) {
auto* inputX = param.InputX();
auto* inputY = param.InputY();
auto* out = param.Out();
out->mutable_data<bool>();
BinaryLogicalCompute<bool, LogicalAndFunctor<bool>>(inputX, inputY, out);
}
#endif
#ifdef LOGICAL_OR_OP
template <>
bool LogicalOrKernel<CPU, float>::Init(LogicalBinaryParam<CPU>* param) {
return true;
}
template <>
void LogicalOrKernel<CPU, float>::Compute(
const LogicalBinaryParam<CPU>& param) {
auto* inputX = param.InputX();
auto* inputY = param.InputY();
auto* out = param.Out();
out->mutable_data<bool>();
BinaryLogicalCompute<bool, LogicalOrFunctor<bool>>(inputX, inputY, out);
}
#endif
#ifdef LOGICAL_NOT_OP
template <>
bool LogicalNotKernel<CPU, float>::Init(LogicalUnaryParam<CPU>* param) {
return true;
}
template <>
void LogicalNotKernel<CPU, float>::Compute(
const LogicalUnaryParam<CPU>& param) {
auto* inputX = param.InputX();
auto* out = param.Out();
out->mutable_data<bool>();
UnaryLogicalCompute<bool, LogicalNotFunctor<bool>>(inputX, out);
}
#endif
#ifdef LOGICAL_XOR_OP
template <>
bool LogicalXorKernel<CPU, float>::Init(LogicalBinaryParam<CPU>* param) {
return true;
}
template <>
void LogicalXorKernel<CPU, float>::Compute(
const LogicalBinaryParam<CPU>& param) {
auto* inputX = param.InputX();
auto* inputY = param.InputY();
auto* out = param.Out();
out->mutable_data<bool>();
BinaryLogicalCompute<bool, LogicalXorFunctor<bool>>(inputX, inputY, out);
}
#endif
} // namespace operators
} // namespace paddle_mobile
......@@ -167,7 +167,7 @@ float find_abs_max(const Tensor *input) {
max_abs = vmaxvq_f32(__max);
#endif
for (size_t i = 0; i < remain; ++i) {
max_abs = std::max(max_abs, std::abs(x[i]));
max_abs = std::max(max_abs, fabs(x[i]));
}
return max_abs;
}
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/tensor_array_read_write_kernel.h"
namespace paddle_mobile {
namespace operators {
#ifdef WRITE_TO_ARRAY_OP
template <>
bool WriteToArrayKernel<CPU, float>::Init(WriteToArrayParam<CPU> *param) {
return true;
}
template <>
void WriteToArrayKernel<CPU, float>::Compute(
const WriteToArrayParam<CPU> &param) {
int64_t offset = param.index_->data<int64_t>()[0];
if (offset >= param.output_->size()) {
param.output_->resize(offset);
}
framework::LoDTensor *out_tensor = &(param.output_->at(offset));
out_tensor->set_lod(param.input_->lod());
if (param.input_->memory_size() > 0) {
TensorCopy(*(param.input_), out_tensor);
}
}
#endif // WRITE_TO_ARRAY_OP
#ifdef READ_FROM_ARRAY_OP
template <>
bool ReadFromArrayKernel<CPU, float>::Init(ReadFromArrayParam<CPU> *param) {
return true;
}
template <>
void ReadFromArrayKernel<CPU, float>::Compute(
const ReadFromArrayParam<CPU> &param) {
int64_t offset = param.index_->data<int64_t>()[0];
if (offset < param.input_->size()) {
TensorCopy(param.input_->at(offset), param.output_);
}
}
#endif // READ_FROM_ARRAY_OP
} // namespace operators
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/while_kernel.h"
namespace paddle_mobile {
namespace operators {
#ifdef WHILE_OP
template <>
bool WhileKernel<CPU, float>::Init(WhileParam<CPU> *param) {
return true;
}
template <>
void WhileKernel<CPU, float>::Compute(const WhileParam<CPU> &param) {
// TODO(hjchen2)
}
#endif // WHILE_OP
} // namespace operators
} // namespace paddle_mobile
......@@ -21,6 +21,7 @@ limitations under the License. */
#include "operators/math/gemm.h"
#include "operators/math/math_function.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef BATCHNORM_OP
#include "operators/kernel/batchnorm_kernel.h"
#include <cmath>
namespace paddle_mobile {
namespace operators {
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVADDBNRELU_OP
#include "operators/kernel/conv_add_bn_relu_kernel.h"
#include <cmath>
#include "framework/cl/cl_image.h"
#include "framework/cl/cl_tool.h"
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVBNADDRELU_OP
#include "operators/kernel/conv_bn_add_relu_kernel.h"
#include <cmath>
namespace paddle_mobile {
namespace operators {
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVBNRELU_OP
#include "operators/kernel/conv_bn_relu_kernel.h"
#include <cmath>
namespace paddle_mobile {
namespace operators {
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef FUSION_DWCONVBNRELU_OP
#include "operators/kernel/dwconv_bn_relu_kernel.h"
#include <cmath>
namespace paddle_mobile {
namespace operators {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RELU_OP
#include "operators/kernel/relu_kernel.h"
#include "operators/kernel/activation_kernel.h"
namespace paddle_mobile {
namespace operators {
......
......@@ -15,7 +15,8 @@ limitations under the License. */
#ifdef FUSION_CONVADDBN_OP
#include "operators/kernel/conv_add_bn_kernel.h"
#include <math.h>
#include <cmath>
namespace paddle_mobile {
namespace operators {
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVADDBNRELU_OP
#include "operators/kernel/conv_add_bn_relu_kernel.h"
#include <math.h>
#include <cmath>
namespace paddle_mobile {
namespace operators {
......
文件模式从 100755 更改为 100644
文件模式从 100755 更改为 100644
......@@ -49,13 +49,23 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
"filter width should be equal to filter height ");
PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
"filter axis should be the multiple of stride axis ");
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(deconv_arg);
if (param->Groups() == channel) {
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0};
fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter, relu_enabled,
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg);
} else {
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(deconv_arg);
}
return true;
}
......@@ -63,7 +73,11 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
template <>
void DeconvAddKernel<FPGA, float>::Compute(
const FusionDeconvAddParam<FPGA> &param) {
fpga::ComputeFpgaDeconv(param.FpgaArgs());
if (param.Groups() == param.Output()->dims()[1]) {
fpga::ComputeDWDeconv(param.FpgaDWDconvArgs());
} else {
fpga::ComputeFpgaDeconv(param.FpgaArgs());
}
}
} // namespace operators
......
......@@ -50,20 +50,35 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
"filter width should be equal to filter height ");
PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
"filter axis should be the multiple of stride axis ");
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(deconv_arg);
if (param->Groups() == channel) {
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0};
fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter, relu_enabled,
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg);
} else {
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(deconv_arg);
}
return true;
}
template <>
void DeconvAddReluKernel<FPGA, float>::Compute(
const FusionDeconvAddReluParam<FPGA> &param) {
fpga::ComputeFpgaDeconv(param.FpgaArgs());
// fpga::ComputeFpgaDeconv(param.FpgaArgs());
if (param.Groups() == param.Output()->dims()[1]) {
fpga::ComputeDWDeconv(param.FpgaDWDconvArgs());
} else {
fpga::ComputeFpgaDeconv(param.FpgaArgs());
}
}
} // namespace operators
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SIGMOID_OP
#include "operators/kernel/activation_kernel.h"
namespace paddle_mobile {
namespace operators {
using framework::DDim;
using framework::Tensor;
template <>
bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) {
auto input = const_cast<Tensor *>(param->InputX());
auto input_ptr = input->data<float>();
auto out = param->Out();
fpga::format_fp32_ofm(out);
auto float_input = new Tensor;
if (input->dims().size() == 2) {
float_input->mutable_data<float>({1, input->dims()[1]});
} else if (input->dims().size() == 4) {
float_input->mutable_data<float>(
{1, input->dims()[2], input->dims()[3], input->dims()[1]});
} else {
DLOG << "wrong dimension of softmax input";
}
fpga::format_fp32_ofm(float_input);
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_layout_type = fpga::LAYOUT_HWC;
args.output_layout_type = fpga::LAYOUT_CHW;
args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP32;
args.image.address = input_ptr;
args.image.height =
(input->dims().size() == 4) ? (uint32_t)input->dims()[2] : 1;
args.image.width =
(input->dims().size() == 4) ? (uint32_t)input->dims()[3] : 1;
args.image.channels = (uint32_t)input->dims()[1];
args.output.address = float_input->data<float>();
args.output.scale_address = float_input->scale;
param->SetFloatInput(float_input);
param->SetFpgaArgs(args);
return true;
}
template <typename T>
T Sigmoid(const T a) {
T tmp = -1.0f * a;
return (1.0 / (1.0 + exp(tmp)));
}
template <typename T>
void sigmoidFuntor(Tensor *input, Tensor *output) {
auto *input_ptr = input->data<T>();
auto *output_ptr = output->mutable_data<T>();
for (int i = 0; i < input->numel(); i++) {
*(output_ptr + i) = Sigmoid<T>(*(input_ptr + i));
}
}
template <>
void SigmoidKernel<FPGA, float>::Compute(const SigmoidParam<FPGA> &param) {
Tensor *in_x = param.FloatInput();
Tensor *out = param.Out();
fpga::PerformBypass(param.FpgaArgs());
fpga::fpga_invalidate((void *)in_x->data<float>(), // NOLINT
in_x->numel() * sizeof(float));
// TODO: In general case, 0 should be squeezed before softmax input // NOLINT
sigmoidFuntor<float>(in_x, out);
fpga::fpga_flush(out->data<float>(), out->memory_size());
}
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -22,7 +22,7 @@ namespace operators {
template <>
bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
auto input = const_cast<Tensor *>(param->InputX());
auto input = const_cast<LoDTensor *>(param->InputX());
auto input_ptr = input->data<float>();
auto out = param->Out();
fpga::format_fp32_ofm(out);
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVADDBN_OP
#include "operators/kernel/conv_add_bn_kernel.h"
#include <cmath>
namespace paddle_mobile {
namespace operators {
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVADDBNRELU_OP
#include "operators/kernel/conv_add_bn_relu_kernel.h"
#include <cmath>
namespace paddle_mobile {
namespace operators {
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVBN_OP
#include "operators/kernel/conv_bn_kernel.h"
#include <cmath>
namespace paddle_mobile {
namespace operators {
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVBNRELU_OP
#include "operators/kernel/conv_bn_relu_kernel.h"
#include <cmath>
#include "fpga/V2/filter.h"
namespace paddle_mobile {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
#ifdef LOGICAL_AND_OP
DECLARE_KERNEL(LogicalAnd, LogicalBinaryParam);
#endif
#ifdef LOGICAL_OR_OP
DECLARE_KERNEL(LogicalOr, LogicalBinaryParam);
#endif
#ifdef LOGICAL_NOT_OP
DECLARE_KERNEL(LogicalNot, LogicalUnaryParam);
#endif
#ifdef LOGICAL_XOR_OP
DECLARE_KERNEL(LogicalXor, LogicalBinaryParam);
#endif
} // namespace operators
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
#ifdef WRITE_TO_ARRAY_OP
DECLARE_KERNEL(WriteToArray, WriteToArrayParam);
#endif // WRITE_TO_ARRAY_OP
#ifdef READ_FROM_ARRAY_OP
DECLARE_KERNEL(ReadFromArray, ReadFromArrayParam);
#endif // READ_FROM_ARRAY_OP
} // namespace operators
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
#ifdef WHILE_OP
template <typename Dtype>
class WhileParam : public OpParam {
public:
WhileParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope)
: inputs_(inputs), outputs_(outputs), scope_(scope) {
cond_ =
OpParam::GetVarValue<framework::LoDTensor>("Condition", inputs, scope);
sub_block_ = OpParam::GetAttr<int>("sub_block", attrs);
}
public:
framework::LoDTensor *cond_;
int sub_block_;
const VariableNameMap inputs_;
const VariableNameMap outputs_;
const Scope scope_;
};
DECLARE_KERNEL(While, WhileParam);
#endif // WHILE_OP
} // namespace operators
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/logical_op.h"
namespace paddle_mobile {
namespace operators {
#define DEFINE_LOGICAL_INFERSHAPE(OpName) \
template <typename Dtype, typename T> \
void OpName##Op<Dtype, T>::InferShape() const { \
const auto &input_dims = this->param_.InputX()->dims(); \
this->param_.Out()->Resize(input_dims); \
}
#ifdef LOGICAL_AND_OP
DEFINE_LOGICAL_INFERSHAPE(LogicalAnd);
#endif // TLOGICAL_AND_OP
#ifdef LOGICAL_OR_OP
DEFINE_LOGICAL_INFERSHAPE(LogicalOr);
#endif // TLOGICAL_OR_OP
#ifdef LOGICAL_NOT_OP
DEFINE_LOGICAL_INFERSHAPE(LogicalNot);
#endif // LOGICAL_NOT_OP
#ifdef LOGICAL_XOR_OP
DEFINE_LOGICAL_INFERSHAPE(LogicalXor);
#endif // TLOGICAL_XOR_OP
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef LOGICAL_AND_OP
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(logical_and, ops::LogicalAndOp);
#endif
#endif // LOGICAL_AND_OP
#ifdef LOGICAL_OR_OP
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(logical_or, ops::LogicalOrOp);
#endif
#endif // LOGICAL_OR_OP
#ifdef LOGICAL_NOT_OP
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(logical_not, ops::LogicalNotOp);
#endif
#endif // LOGICAL_NOT_OP
#ifdef LOGICAL_XOR_OP
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(logical_xor, ops::LogicalXorOp);
#endif
#endif // LOGICAL_XOR_OP
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "framework/operator.h"
#include "operators/kernel/logical_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
#ifdef LOGICAL_AND_OP
DECLARE_OPERATOR(LogicalAnd, LogicalBinaryParam, LogicalAndKernel);
#endif
#ifdef LOGICAL_OR_OP
DECLARE_OPERATOR(LogicalOr, LogicalBinaryParam, LogicalOrKernel);
#endif
#ifdef LOGICAL_NOT_OP
DECLARE_OPERATOR(LogicalNot, LogicalUnaryParam, LogicalNotKernel);
#endif
#ifdef LOGICAL_XOR_OP
DECLARE_OPERATOR(LogicalXor, LogicalBinaryParam, LogicalXorKernel);
#endif
} // namespace operators
} // namespace paddle_mobile
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstring>
#include <string>
#include "common/log.h"
#include "memory/t_malloc.h"
......
......@@ -40,8 +40,8 @@ template <>
inline int8_t Round<ROUND_NEAREST_TO_EVEN>(const float &x) {
float v = std::round(x);
int32_t q = static_cast<int32_t>(v);
if (std::abs(std::abs(q - v) - 0.5) <= 0) {
if (std::abs(q) % 2 != 0) {
if (fabs(fabs(q - v) - 0.5) <= 0) {
if (abs(q) % 2 != 0) {
q = q + ((q > 0) ? -1 : 1);
}
}
......
......@@ -1078,6 +1078,20 @@ class SigmoidParam : public OpParam {
private:
RType *input_x_;
RType *out_;
#ifdef PADDLE_MOBILE_FPGA
private:
std::shared_ptr<RType> float_input_x_;
fpga::BypassArgs fpga_bypass_args;
public:
RType *FloatInput() const {
return float_input_x_ == nullptr ? input_x_ : float_input_x_.get();
}
void SetFloatInput(Tensor *input) { float_input_x_.reset(input); }
const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; }
void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; }
#endif
};
#endif
......@@ -2357,10 +2371,17 @@ class ConvTransposeParam : public OpParam {
private:
fpga::DeconvArgs fpga_conv_args;
fpga::DWDeconvArgs fpga_DWDeconv_args;
public:
const fpga::DeconvArgs &FpgaArgs() const { return fpga_conv_args; }
const fpga::DWDeconvArgs &FpgaDWDconvArgs() const {
return fpga_DWDeconv_args;
}
void SetFpgaArgs(const fpga::DeconvArgs &args) { fpga_conv_args = args; }
void SetFpgaArgs(const fpga::DWDeconvArgs &args) {
fpga_DWDeconv_args = args;
}
#endif
};
......@@ -2942,5 +2963,112 @@ class CompareParam : public OpParam {
};
#endif // LESS_THAN_OP
#if defined(LOGICAL_AND_OP) || defined(LOGICAL_OR_OP) || defined(LOGICAL_XOR_OP)
template <typename Dtype>
class LogicalBinaryParam : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public:
LogicalBinaryParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope) {
input_x_ = InputXFrom<GType>(inputs, scope);
input_y_ = InputYFrom<GType>(inputs, scope);
output_ = OutFrom<GType>(outputs, scope);
}
const GType *InputX() const { return input_x_; }
const GType *InputY() const { return input_y_; }
GType *Out() const { return output_; }
public:
GType *input_x_;
GType *input_y_;
GType *output_;
};
#endif // LOGICAL_AND_OP LOGICAL_OR_OP LOGICAL_XOR_OP
#ifdef LOGICAL_NOT_OP
template <typename Dtype>
class LogicalUnaryParam : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public:
LogicalUnaryParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope) {
input_x_ = InputXFrom<GType>(inputs, scope);
output_ = OutFrom<GType>(outputs, scope);
}
const GType *InputX() const { return input_x_; }
GType *Out() const { return output_; }
public:
GType *input_x_;
GType *output_;
};
#endif // LOGICAL_NOT_OP
// #ifdef WHILE_OP
// template <typename Dtype>
// class WhileParam : public OpParam {
// public:
// WhileParam(const VariableNameMap &inputs,
// const VariableNameMap &outputs, const AttributeMap &attrs,
// const Scope &scope) {
// cond_ = OpParam::GetVarValue<framework::LoDTensor>("Condition", inputs,
// scope); block_desc_ = OpParam::GetAttr<framework::BlockDesc
// *>("sub_block", attrs);
// }
//
// public:
// framework::LoDTensor *cond_;
// const framework::BlockDesc *block_desc_;
// };
// #endif // WHILE_OP
#ifdef WRITE_TO_ARRAY_OP
template <typename Dtype>
class WriteToArrayParam : public OpParam {
public:
WriteToArrayParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope) {
input_ = OpParam::GetVarValue<framework::LoDTensor>("X", inputs, scope);
index_ = OpParam::GetVarValue<framework::LoDTensor>("I", inputs, scope);
output_ =
OpParam::GetVarValue<framework::LoDTensorArray>("Out", outputs, scope);
}
public:
framework::LoDTensor *input_;
framework::LoDTensor *index_;
framework::LoDTensorArray *output_;
};
#endif
#ifdef READ_FROM_ARRAY_OP
template <typename Dtype>
class ReadFromArrayParam : public OpParam {
public:
ReadFromArrayParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope) {
input_ =
OpParam::GetVarValue<framework::LoDTensorArray>("X", inputs, scope);
index_ = OpParam::GetVarValue<framework::LoDTensor>("I", inputs, scope);
output_ = OpParam::GetVarValue<framework::LoDTensor>("Out", outputs, scope);
}
public:
framework::LoDTensorArray *input_;
framework::LoDTensor *index_;
framework::LoDTensor *output_;
};
#endif
} // namespace operators
} // namespace paddle_mobile
文件模式从 100755 更改为 100644
......@@ -30,6 +30,10 @@ if (CON GREATER -1)
target_link_libraries(test-mobilenet-combine paddle-mobile)
set(FOUND_MATCH ON)
# gen test
ADD_EXECUTABLE(test-mobilenetgpu net/test_mobilenet_GPU.cpp test_helper.h test_include.h)
target_link_libraries(test-mobilenetgpu paddle-mobile)
endif ()
list(FIND NET "yolo" CON)
......@@ -417,4 +421,20 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE(test-vgg16ssd net/test_vgg16ssd.cpp test_helper.h test_include.h)
target_link_libraries(test-vgg16ssd paddle-mobile)
# gen test
ADD_EXECUTABLE(test-logical-and-op operators/test_logical_and_op.cpp test_helper.h test_include.h)
target_link_libraries(test-logical-and-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-logical-or-op operators/test_logical_or_op.cpp test_helper.h test_include.h)
target_link_libraries(test-logical-or-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-logical-not-op operators/test_logical_not_op.cpp test_helper.h test_include.h)
target_link_libraries(test-logical-not-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-logical-xor-op operators/test_logical_xor_op.cpp test_helper.h test_include.h)
target_link_libraries(test-logical-xor-op paddle-mobile)
endif ()
......@@ -25,11 +25,11 @@ int main() {
paddle_mobile.SetCLPath("/data/local/tmp/bin");
#endif
auto isok =
paddle_mobile.Load(std::string(g_mobilenet_mul) + "/model",
std::string(g_mobilenet_mul) + "/params", true);
// auto isok =
// paddle_mobile.Load(std::string(g_mobilenet_mul) + "/model",
// std::string(g_mobilenet_mul) + "/params", true);
// auto isok = paddle_mobile.Load(std::string(g_mobilenet_mul), true);
auto isok = paddle_mobile.Load(std::string(g_mobilenet), true);
if (isok) {
auto time2 = paddle_mobile::time();
std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms"
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_include.h"
#include "operators/logical_op.h"
namespace paddle_mobile {
void LogicalAnd(const framework::Tensor *inputX,
const framework::Tensor *inputY, framework::Tensor *output) {
auto x_data = inputX->data<bool>();
auto y_data = inputY->data<bool>();
auto output_data = output->data<bool>();
for (int i = 0; i < inputX->numel(); ++i) {
*output_data = *x_data && *y_data;
x_data++;
y_data++;
output_data++;
}
}
int TestLogicalAndOp(const std::vector<int> input_shape) {
framework::DDim input_dims = framework::make_ddim(input_shape);
VariableNameMap inputs;
VariableNameMap outputs;
auto scope = std::make_shared<framework::Scope>();
inputs["X"] = std::vector<std::string>({"inputX"});
inputs["Y"] = std::vector<std::string>({"inputY"});
outputs["Out"] = std::vector<std::string>({"output"});
auto x_var = scope.get()->Var("inputX");
auto x = x_var->template GetMutable<framework::LoDTensor>();
SetupTensor<bool>(x, input_dims, 0, 1);
auto y_var = scope.get()->Var("inputY");
auto y = y_var->template GetMutable<framework::LoDTensor>();
SetupTensor<bool>(y, input_dims, 0, 1);
auto output_var = scope.get()->Var("output");
framework::AttributeMap attrs;
auto *op = new operators::LogicalAndOp<CPU, float>("logical_and", inputs,
outputs, attrs, scope);
op->InferShape();
op->Init();
op->Run();
auto output = output_var->template Get<framework::LoDTensor>();
framework::Tensor output_cmp;
bool *output_cmp_data = output_cmp.mutable_data<bool>(output->dims());
LogicalAnd(x, y, &output_cmp);
const bool *output_data = output->data<bool>();
for (int i = 0; i < output->numel(); ++i) {
if (output_data[i] != output_cmp_data[i]) {
LOG(kLOG_INFO) << "output_data[" << i << "] = " << output_data[i]
<< ", output_cmp_data[" << i
<< "] = " << output_cmp_data[i];
delete op;
exit(1);
}
}
}
} // namespace paddle_mobile
int main() {
paddle_mobile::TestLogicalAndOp({1, 1, 2, 3});
paddle_mobile::TestLogicalAndOp({1, 3, 11, 12});
paddle_mobile::TestLogicalAndOp({1, 16, 32, 32});
DLOG << "test logical_and op pass.";
return 0;
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_include.h"
#include "operators/logical_op.h"
namespace paddle_mobile {
void LogicalNot(const framework::Tensor *inputX, framework::Tensor *output) {
auto x_data = inputX->data<bool>();
auto output_data = output->data<bool>();
for (int i = 0; i < inputX->numel(); ++i) {
*output_data = !*x_data;
x_data++;
output_data++;
}
}
int TestLogicalNotOp(const std::vector<int> input_shape) {
framework::DDim input_dims = framework::make_ddim(input_shape);
VariableNameMap inputs;
VariableNameMap outputs;
auto scope = std::make_shared<framework::Scope>();
inputs["X"] = std::vector<std::string>({"inputX"});
outputs["Out"] = std::vector<std::string>({"output"});
auto x_var = scope.get()->Var("inputX");
auto x = x_var->template GetMutable<framework::LoDTensor>();
SetupTensor<bool>(x, input_dims, 0, 1);
auto output_var = scope.get()->Var("output");
framework::AttributeMap attrs;
auto *op = new operators::LogicalNotOp<CPU, float>("logical_not", inputs,
outputs, attrs, scope);
op->InferShape();
op->Init();
op->Run();
auto output = output_var->template Get<framework::LoDTensor>();
framework::Tensor output_cmp;
bool *output_cmp_data = output_cmp.mutable_data<bool>(output->dims());
LogicalNot(x, &output_cmp);
const bool *output_data = output->data<bool>();
for (int i = 0; i < output->numel(); ++i) {
if (output_data[i] != output_cmp_data[i]) {
LOG(kLOG_INFO) << "output_data[" << i << "] = " << output_data[i]
<< ", output_cmp_data[" << i
<< "] = " << output_cmp_data[i];
delete op;
exit(1);
}
}
}
} // namespace paddle_mobile
int main() {
paddle_mobile::TestLogicalNotOp({1, 1, 2, 3});
paddle_mobile::TestLogicalNotOp({1, 3, 11, 12});
paddle_mobile::TestLogicalNotOp({1, 16, 32, 32});
DLOG << "test logical_not op pass.";
return 0;
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_include.h"
#include "operators/logical_op.h"
namespace paddle_mobile {
void LogicalOr(const framework::Tensor *inputX, const framework::Tensor *inputY,
framework::Tensor *output) {
auto x_data = inputX->data<bool>();
auto y_data = inputY->data<bool>();
auto output_data = output->data<bool>();
for (int i = 0; i < inputX->numel(); ++i) {
*output_data = *x_data || *y_data;
x_data++;
y_data++;
output_data++;
}
}
int TestLogicalOrOp(const std::vector<int> input_shape) {
framework::DDim input_dims = framework::make_ddim(input_shape);
VariableNameMap inputs;
VariableNameMap outputs;
auto scope = std::make_shared<framework::Scope>();
inputs["X"] = std::vector<std::string>({"inputX"});
inputs["Y"] = std::vector<std::string>({"inputY"});
outputs["Out"] = std::vector<std::string>({"output"});
auto x_var = scope.get()->Var("inputX");
auto x = x_var->template GetMutable<framework::LoDTensor>();
SetupTensor<bool>(x, input_dims, 0, 1);
auto y_var = scope.get()->Var("inputY");
auto y = y_var->template GetMutable<framework::LoDTensor>();
SetupTensor<bool>(y, input_dims, 0, 1);
auto output_var = scope.get()->Var("output");
framework::AttributeMap attrs;
auto *op = new operators::LogicalOrOp<CPU, float>("logical_or", inputs,
outputs, attrs, scope);
op->InferShape();
op->Init();
op->Run();
auto output = output_var->template Get<framework::LoDTensor>();
framework::Tensor output_cmp;
bool *output_cmp_data = output_cmp.mutable_data<bool>(output->dims());
LogicalOr(x, y, &output_cmp);
const bool *output_data = output->data<bool>();
for (int i = 0; i < output->numel(); ++i) {
if (output_data[i] != output_cmp_data[i]) {
LOG(kLOG_INFO) << "output_data[" << i << "] = " << output_data[i]
<< ", output_cmp_data[" << i
<< "] = " << output_cmp_data[i];
delete op;
exit(1);
}
}
}
} // namespace paddle_mobile
int main() {
paddle_mobile::TestLogicalOrOp({1, 1, 2, 3});
paddle_mobile::TestLogicalOrOp({1, 3, 11, 12});
paddle_mobile::TestLogicalOrOp({1, 16, 32, 32});
DLOG << "test logical_or op pass.";
return 0;
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_include.h"
#include "operators/logical_op.h"
namespace paddle_mobile {
void LogicalXor(const framework::Tensor *inputX,
const framework::Tensor *inputY, framework::Tensor *output) {
auto x_data = inputX->data<bool>();
auto y_data = inputY->data<bool>();
auto output_data = output->data<bool>();
for (int i = 0; i < inputX->numel(); ++i) {
bool x = *x_data;
bool y = *y_data;
*output_data = (x || y) && !(x && y);
x_data++;
y_data++;
output_data++;
}
}
int TestLogicalXorOp(const std::vector<int> input_shape) {
framework::DDim input_dims = framework::make_ddim(input_shape);
VariableNameMap inputs;
VariableNameMap outputs;
auto scope = std::make_shared<framework::Scope>();
inputs["X"] = std::vector<std::string>({"inputX"});
inputs["Y"] = std::vector<std::string>({"inputY"});
outputs["Out"] = std::vector<std::string>({"output"});
auto x_var = scope.get()->Var("inputX");
auto x = x_var->template GetMutable<framework::LoDTensor>();
SetupTensor<bool>(x, input_dims, 0, 1);
auto y_var = scope.get()->Var("inputY");
auto y = y_var->template GetMutable<framework::LoDTensor>();
SetupTensor<bool>(y, input_dims, 0, 1);
auto output_var = scope.get()->Var("output");
framework::AttributeMap attrs;
auto *op = new operators::LogicalXorOp<CPU, float>("logical_xor", inputs,
outputs, attrs, scope);
op->InferShape();
op->Init();
op->Run();
auto output = output_var->template Get<framework::LoDTensor>();
framework::Tensor output_cmp;
bool *output_cmp_data = output_cmp.mutable_data<bool>(output->dims());
LogicalXor(x, y, &output_cmp);
const bool *output_data = output->data<bool>();
for (int i = 0; i < output->numel(); ++i) {
if (output_data[i] != output_cmp_data[i]) {
LOG(kLOG_INFO) << "output_data[" << i << "] = " << output_data[i]
<< ", output_cmp_data[" << i
<< "] = " << output_cmp_data[i];
delete op;
exit(1);
}
}
}
} // namespace paddle_mobile
int main() {
paddle_mobile::TestLogicalXorOp({1, 1, 2, 3});
paddle_mobile::TestLogicalXorOp({1, 3, 11, 12});
paddle_mobile::TestLogicalXorOp({1, 16, 32, 32});
DLOG << "test logical_xor op pass.";
return 0;
}
......@@ -83,6 +83,26 @@ void SetupTensor(paddle_mobile::framework::Tensor *input,
}
}
template <>
void SetupTensor<bool>(paddle_mobile::framework::Tensor *input,
paddle_mobile::framework::DDim dims, bool lower,
bool upper) {
static unsigned int seed = 100;
std::mt19937 rng(seed++);
std::uniform_real_distribution<double> uniform_dist(0, 1);
bool *input_ptr = input->mutable_data<bool>(dims);
if (lower == upper) {
for (int i = 0; i < input->numel(); ++i) {
input_ptr[i] = lower;
}
} else {
for (int i = 0; i < input->numel(); ++i) {
input_ptr[i] = uniform_dist(rng) > 0.5;
}
}
}
template <typename T>
T *CreateInput(Tensor *input, DDim dims, T low, T up) {
SetupTensor<T>(input, dims, static_cast<float>(low), static_cast<float>(up));
......
......@@ -26,6 +26,7 @@ function print_usage() {
${BLUE}ios${NONE}: run build for apple ios platform
${BLUE}linux_armv7${NONE}: run build for linux armv7 platform
${BLUE}linux_armv8${NONE}: run build for linux armv8 platform
${BLUE}fpga${NONE}: run build for fpga platform
"
echo "\n${RED}Network${NONE}: optional, for deep compressing the framework size
${BLUE}googlenet${NONE}: build only googlenet support
......@@ -146,6 +147,7 @@ function build_ios_armv8_cpu_only() {
-DIOS_PLATFORM=OS \
-DIOS_ARCH="${IOS_ARCH}" \
-DIS_IOS=true \
-DUSE_OPENMP=OFF \
-DGPU_MALI=OFF \
-DGPU_CL=OFF \
-DFPGA=OFF
......@@ -163,6 +165,7 @@ function build_ios_armv8_gpu() {
-DIOS_PLATFORM=OS \
-DIOS_ARCH="${IOS_ARCH}" \
-DIS_IOS=true \
-DUSE_OPENMP=OFF \
-DGPU_MALI=OFF \
-DGPU_CL=ON \
-DFPGA=OFF
......@@ -217,11 +220,19 @@ function build_ios() {
}
function build_linux_armv7() {
check_ndk
build_linux_armv7_cpu_only
# build_linux_armv7_gpu
}
function build_linux_fpga() {
cd ..
image=`docker images paddle-mobile:dev | grep 'paddle-mobile'`
if [[ "x"$image == "x" ]]; then
docker build -t paddle-mobile:dev - < Dockerfile
fi
docker run --rm -v `pwd`:/workspace paddle-mobile:dev bash /workspace/tools/docker_build_fpga.sh
}
function main() {
local CMD=$1
init
......@@ -238,6 +249,9 @@ function main() {
linux_armv7)
build_linux_armv7
;;
fpga)
build_linux_fpga
;;
*)
print_usage
exit 0
......
apt-get update
apt-get install -y gcc g++ cmake
cd /workspace && mkdir build
cd build && cmake .. -DCPU=OFF -DFPGA=ON && make -j4
......@@ -281,6 +281,13 @@ if(NOT FOUND_MATCH)
set(TANH_OP ON)
set(LOD_RESET_OP ON)
set(LESS_THAN_OP ON)
set(LOGICAL_AND_OP ON)
set(LOGICAL_OR_OP ON)
set(LOGICAL_NOT_OP ON)
set(LOGICAL_XOR_OP ON)
set(WHILE_OP ON)
set(WRITE_TO_ARRAY_OP ON)
set(READ_FROM_ARRAY_OP ON)
endif()
# option(BATCHNORM_OP "" ON)
......@@ -530,6 +537,18 @@ endif()
if (LESS_THAN_OP)
add_definitions(-DLESS_THAN_OP)
endif()
if (LOGICAL_AND_OP)
add_definitions(-DLOGICAL_AND_OP)
endif()
if (LOGICAL_OR_OP)
add_definitions(-DLOGICAL_OR_OP)
endif()
if (LOGICAL_NOT_OP)
add_definitions(-DLOGICAL_NOT_OP)
endif()
if (LOGICAL_XOR_OP)
add_definitions(-DLOGICAL_XOR_OP)
endif()
if (TANH_OP)
add_definitions(-DTANH_OP)
......@@ -543,3 +562,13 @@ endif()
if (FUSION_DECONVADDRELU_OP)
add_definitions(-DFUSION_DECONVADDRELU_OP)
endif()
if (WHILE_OP)
add_definitions(-DWHILE_OP)
endif()
if (WRITE_TO_ARRAY_OP)
add_definitions(-DWRITE_TO_ARRAY_OP)
endif()
if (READ_FROM_ARRAY_OP)
add_definitions(-DREAD_FROM_ARRAY_OP)
endif()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册