提交 3435149d 编写于 作者: R Ruilong Liu 提交者: GitHub

Merge branch 'develop' into develop

......@@ -14,36 +14,35 @@ limitations under the License. */
#pragma once
#include <stdint.h>
#include <cstddef>
#include <iostream>
#include <limits>
// memory management;
namespace paddle {
namespace mobile {
namespace paddle_mobile {
namespace fpga {
namespace api {
int open_device();
int close_device();
void *fpga_malloc(size_t size);
void fpga_free(void *ptr);
void fpga_copy(void *dst, const void *src, size_t num);
void* fpga_malloc(size_t size);
void fpga_free(void* ptr);
void fpga_copy(void* dst, const void* src, size_t num);
struct FpgaVersionArgs {
void *buf;
void* buf;
};
struct MemoryToPhysicalArgs {
const void *src;
const void* src;
uint64_t physical;
};
struct MemoryCopyArgs {
void *src;
void *dst;
void* src;
void* dst;
size_t size;
};
......@@ -51,38 +50,71 @@ struct FpgaQuantArgs {
float scale;
};
struct FpgaBNArgs {};
struct FpgaBNArgs {
bool enabled = false;
void* bias_addr;
void* scale_addr;
};
struct FpgaKernelArgs {
uint32_t width;
uint32_t height;
uint32_t stride_h;
uint32_t stride_w;
};
struct FpgaImageArgs {
uint32_t width;
uint32_t height;
uint32_t channels;
uint32_t pad_h;
uint32_t pad_w;
};
struct FpgaConvArgs {
bool enable_BN = false;
bool enable_Relu = false;
struct FpgaBNParam bn_parm;
bool relu_enabled;
struct FpgaBNArgs BNargs;
void* image_addr;
void* filter_addr;
void* bias_addr;
void* output_addr;
float quant_scale;
struct FpgaImageArgs image;
uint32_t filter_num;
uint32_t group_num;
struct FpgaKernelArgs kernel;
};
struct FpgaPoolArgs {
bool enable_BN = false;
struct FpgaBNParam bn_parm;
void* image_addr;
void* output_addr;
struct FpgaImageArgs image;
struct FpgaKernelArgs kernel;
};
struct FpgaEWAddArgs { // only support X + Y
bool enable_Relu = false;
struct FpgaEWAddArgs {
bool relu_enabled;
void* image0_addr;
void* image1_addr;
void* result_addr;
uint32_t const0;
uint32_t const1;
uint32_t data_len; // aligned element count
};
int ComputeFpgaConv(struct FpgaConvArgs);
int ComputeFpgaPool(struct FpgaPoolArgs);
int ComputeFpgaEWAdd(struct FpgaEWAddArgs);
int ComputeFpgaConv(struct FpgaConvArgs args);
int ComputeFpgaPool(struct FpgaPoolArgs args);
int ComputeFpgaEWAdd(struct FpgaEWAddArgs args);
#define IOCTL_FPGA_MAGIC 'FPGA'
#define IOCTL_FPGA_MAGIC 'CNN'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs)
#define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaQuantArgs)
#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
#define IOCTL_MEM_TOPHY _IOW(IOCTL_FPGA_MAGIC, 12, struct MemoryToPhysicalArgs)
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs)
} // namespace api
} // namespace fpga
} // namespace mobile
} // namespace paddle
} // namespace paddle_mobile
......@@ -22,6 +22,9 @@ limitations under the License. */
#include "framework/scope.h"
#include "framework/tensor.h"
#include "framework/variable.h"
#ifdef PADDLE_MOBILE_FPGA
#include "fpga/api/fpga_api.h"
#endif
namespace paddle_mobile {
namespace operators {
......@@ -256,6 +259,15 @@ class ElementwiseAddParam : OpParam {
Tensor *input_y_;
Tensor *out_;
int axis_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaEWAddArgs fpga_EW_add_args;
public:
const fpga::FpgaEWAddArgs &FpgaArgs() const { return fpga_EW_add_args; }
void SetFpgaArgs(const fpga::FpgaEWAddArgs &args) { fpga_EW_add_args = args; }
#endif
};
#ifdef FUSION_ELEMENTWISEADDRELU_OP
......@@ -450,80 +462,15 @@ class PoolParam : public OpParam {
vector<int> paddings_;
bool ceil_mode_;
bool global_pooling_ = false;
};
#endif
#ifdef FUSION_POOLBN_OP
class FusionPoolBNParam : OpParam {
public:
FusionPoolBNParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope) {
input_ = InputXFrom<LoDTensor>(inputs, scope);
pooling_type_ = GetAttr<string>("pooling_type", attrs);
ksize_ = GetAttr<vector<int>>("ksize", attrs);
strides_ = GetAttr<vector<int>>("strides", attrs);
paddings_ = GetAttr<vector<int>>("paddings", attrs);
ceil_mode_ = GetAttr<bool>("ceil_mode", attrs);
global_pooling_ = GetAttr<bool>("global_pooling", attrs);
output_y_ = OutputYFrom<LoDTensor>(outputs, scope);
input_bias_ = InputBiasFrom<LoDTensor>(inputs, scope);
input_mean_ = InputMeanFrom<LoDTensor>(inputs, scope);
input_scale_ = InputScaleFrom<LoDTensor>(inputs, scope);
input_variance_ = InputVarianceFrom<LoDTensor>(inputs, scope);
epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs);
// is_test_ = GetAttr<bool>("is_test", attrs);
}
const Tensor *Input() const { return input_; }
const string &PoolingType() const { return pooling_type_; }
const vector<int> &Ksize() const { return ksize_; }
const vector<int> &Strides() const { return strides_; }
const vector<int> &Paddings() const { return paddings_; }
bool isCeilMode() const { return ceil_mode_; }
bool isGlobalPooling() const { return global_pooling_; }
Tensor *OutputY() const { return output_y_; }
const Tensor *InputBias() const { return input_bias_; }
const Tensor *InputMean() const { return input_mean_; }
const Tensor *InputScale() const { return input_scale_; }
const Tensor *InputVariance() const { return input_variance_; }
const float &Epsilon() const { return epsilon_; }
const float &Momentum() const { return momentum_; }
const bool &IsTest() const { return is_test_; }
const string &DataFormat() const { return data_format_; }
#ifdef PADDLE_MOBILE_FPGA
private:
Tensor *input_;
string pooling_type_;
vector<int> ksize_;
vector<int> strides_;
vector<int> paddings_;
bool ceil_mode_;
bool global_pooling_ = false;
Tensor *output_y_;
Tensor *input_bias_;
Tensor *input_mean_;
Tensor *input_scale_;
Tensor *input_variance_;
float epsilon_;
float momentum_;
bool is_test_;
string data_format_;
fpga::FpgaPoolArgs fpga_pool_args;
public:
const fpga::FpgaPoolArgs &FpgaArgs() const { return fpga_pool_args; }
void SetFpgaArgs(const fpga::FpgaPoolArgs &args) { fpga_pool_args = args; }
#endif
};
#endif
......@@ -704,7 +651,7 @@ class MultiClassNMSParam : public OpParam {
class FeedParam : public OpParam {
public:
FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, Scope &scope) {
const AttributeMap &attrs, Scope const &scope) {
input_x_ = InputXFrom<LoDTensor>(inputs, scope);
out_ = OutFrom<LoDTensor>(outputs, scope);
auto var = scope.Var("batch_size");
......@@ -983,6 +930,15 @@ class FusionFcParam : public OpParam {
int x_num_col_dims_;
int y_num_col_dims_;
int axis_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
#endif
};
#ifdef FUSION_FCRELU_OP
......@@ -1032,6 +988,15 @@ class FusionConvAddParam : public OpParam {
vector<int> paddings_;
vector<int> dilations_;
int groups;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
#endif
};
Print &operator<<(Print &printer, const FusionConvAddParam &conv_param);
......@@ -1128,6 +1093,15 @@ class FusionConvAddBNReluParam : public OpParam {
bool is_test_;
Tensor *new_bias_;
Tensor *new_scale_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
#endif
};
#endif
......@@ -1213,6 +1187,15 @@ class FusionConvAddBNParam : public OpParam {
bool is_test_;
Tensor *new_bias_;
Tensor *new_scale_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
#endif
};
#endif
......@@ -1426,9 +1409,5 @@ class DropoutParam : public OpParam {
};
#endif
#ifdef REGION_OP
class RegionParam : public OpParam {};
#endif
} // namespace operators
} // namespace paddle_mobile
......@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <cstdlib>
#include <ctime>
#include <iostream>
#include "../test_helper.h"
#include "common/log.h"
......@@ -20,13 +22,21 @@ limitations under the License. */
#define a(i, j) a[(i)*lda + (j)]
#define b(i, j) b[(i)*ldb + (j)]
#define c(i, j) c[(i)*ldc + (j)]
#define c1(i, j) c1[(i)*ldc + (j)]
#define m 62
#define n 63
#define k 74
void print_matirx(int m, int n, int ldc, float *c) {
for (int i = 0; i < m; ++i) {
std::cout << c(i, 0);
for (int j = 1; j < n; ++j) {
std::cout << " | " << c(i, j);
}
std::cout << std::endl;
}
std::cout << std::endl;
}
int main() {
int do_sgemm(int m, int n, int k, bool relu, int t1, int t2, int pr) {
int lda = k;
int ldb = n;
int ldc = n;
......@@ -39,44 +49,88 @@ int main() {
static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m * n));
float *c1 =
static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m * n));
float *scale =
static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m));
float *bias =
static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m));
srand(unsigned(time(0)));
for (int i = 0; i < m * k; ++i) {
a[i] = 2;
a[i] = t1 + rand() % t2;
}
for (int i = 0; i < k * n; ++i) {
b[i] = 2;
b[i] = t1 + rand() % t2;
}
for (int i = 0; i < m * n; ++i) {
c[i] = 2;
c1[i] = 2;
for (int i = 0; i < m; ++i) {
scale[i] = t1 + rand() % t2;
}
auto time1 = time();
// paddle_mobile::operators::math::Sgemm(m, n, k, 0.9, a, lda, b, ldb, 0.3,
// c,
// ldc);
auto time2 = time();
DLOG << "gemm cost :" << time_diff(time1, time2) << "ms\n";
for (int i = 0; i < m * n; ++i) {
std::cout << c[i] << " | ";
if (i % n == (n - 1)) {
std::cout << std::endl;
}
for (int i = 0; i < m; ++i) {
bias[i] = t1 + rand() % t2;
}
for (int j = 0; j < n; ++j) {
for (int i = 0; i < m; ++i) {
c1(i, j) *= 0.3;
for (int p = 0; p < k; ++p) {
c1(i, j) += 0.9 * a(i, p) * b(p, j);
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
float r = 0;
for (int p = 0; p < k; p++) {
r += a(i, p) * b(p, j);
}
r *= scale[i];
r += bias[i];
if (relu && (r < 0)) {
r = 0;
}
c1(i, j) = r;
}
}
std::cout << "正确结果对比:" << std::endl;
paddle_mobile::operators::math::SgemmWithBn(m, n, k, 0.9, a, lda, b, ldb, 0.3,
c, ldc, relu, scale, bias);
int eq = 0;
int neq = 0;
for (int i = 0; i < m * n; ++i) {
std::cout << c1[i] << " | ";
if (i % n == (n - 1)) {
std::cout << std::endl;
if (static_cast<int>(c[i]) == static_cast<int>(c1[i])) {
++eq;
} else {
++neq;
}
}
if (pr > 0) {
std::cout << "A:" << std::endl;
print_matirx(m, k, lda, a);
std::cout << "B:" << std::endl;
print_matirx(k, n, ldb, b);
std::cout << "C:" << std::endl;
print_matirx(m, n, ldc, c);
std::cout << "C1:" << std::endl;
print_matirx(m, n, ldc, c1);
}
std::cout << "mnk=" << m << " " << n << " " << k << " relu=" << relu
<< " eq=" << eq << " neq=" << neq << std::endl;
paddle_mobile::memory::Free(a);
paddle_mobile::memory::Free(b);
paddle_mobile::memory::Free(c);
paddle_mobile::memory::Free(c1);
paddle_mobile::memory::Free(scale);
paddle_mobile::memory::Free(bias);
return 0;
}
int main() {
do_sgemm(9, 9, 9, true, 10, 10, 10);
do_sgemm(10, 6, 12, false, 10, 10, 0);
do_sgemm(512, 256, 384, false, 10, 10, 0);
do_sgemm(1366, 768, 256, false, 10, 10, 0);
do_sgemm(1255, 755, 333, false, 10, 10, 0);
do_sgemm(555, 777, 999, false, 10, 10, 0);
do_sgemm(10, 6, 12, true, -4, 10, 0);
do_sgemm(512, 256, 384, true, -4, 10, 0);
do_sgemm(1366, 768, 256, true, -4, 10, 0);
do_sgemm(1255, 755, 333, true, -4, 10, 0);
do_sgemm(555, 777, 999, true, -4, 10, 0);
return 0;
}
......@@ -75,11 +75,9 @@ if ("FPGAnets" IN_LIST NET)
set(FUSION_CONVADDRELU_OP ON)
set(FUSION_CONVADDBNRELU_OP ON)
set(FUSION_CONVADDBN_OP ON)
set(FUSION_POOLBN_OP ON)
set(FUSION_ELEMENTWISEADDRELU_OP ON)
set(FUSION_FC_OP ON)
set(FUSION_FCRELU_OP ON)
set(REGION_OP ON)
set(POOL_OP ON)
set(CONCAT_OP ON)
set(SOFTMAX_OP ON)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册