提交 f14da1e1 编写于 作者: qnqinan's avatar qnqinan

Merge remote-tracking branch 'origin/develop' into develop

...@@ -20,7 +20,9 @@ limitations under the License. */ ...@@ -20,7 +20,9 @@ limitations under the License. */
#include <vector> #include <vector>
namespace paddle_mobile { namespace paddle_mobile {
enum class Precision : int { FP32 = 0 }; enum class Precision : int { FP32 = 0, FP16 = 1 };
typedef int16_t half;
template <Precision p> template <Precision p>
struct PrecisionTrait { struct PrecisionTrait {
...@@ -31,6 +33,10 @@ template <> ...@@ -31,6 +33,10 @@ template <>
struct PrecisionTrait<Precision::FP32> { struct PrecisionTrait<Precision::FP32> {
typedef float ptype; typedef float ptype;
}; };
template <>
struct PrecisionTrait<Precision::FP16> {
typedef half ptype;
};
//! device type //! device type
enum DeviceTypeEnum { kINVALID = -1, kCPU = 0, kFPGA = 1, kGPU_MALI = 2 }; enum DeviceTypeEnum { kINVALID = -1, kCPU = 0, kFPGA = 1, kGPU_MALI = 2 };
......
...@@ -14,36 +14,35 @@ limitations under the License. */ ...@@ -14,36 +14,35 @@ limitations under the License. */
#pragma once #pragma once
#include <stdint.h>
#include <cstddef> #include <cstddef>
#include <iostream> #include <iostream>
#include <limits> #include <limits>
// memory management; // memory management;
namespace paddle { namespace paddle_mobile {
namespace mobile {
namespace fpga { namespace fpga {
namespace api {
int open_device(); int open_device();
int close_device(); int close_device();
void *fpga_malloc(size_t size); void* fpga_malloc(size_t size);
void fpga_free(void *ptr); void fpga_free(void* ptr);
void fpga_copy(void *dst, const void *src, size_t num); void fpga_copy(void* dst, const void* src, size_t num);
struct FpgaVersionArgs { struct FpgaVersionArgs {
void *buf; void* buf;
}; };
struct MemoryToPhysicalArgs { struct MemoryToPhysicalArgs {
const void *src; const void* src;
uint64_t physical; uint64_t physical;
}; };
struct MemoryCopyArgs { struct MemoryCopyArgs {
void *src; void* src;
void *dst; void* dst;
size_t size; size_t size;
}; };
...@@ -51,38 +50,71 @@ struct FpgaQuantArgs { ...@@ -51,38 +50,71 @@ struct FpgaQuantArgs {
float scale; float scale;
}; };
struct FpgaBNArgs {}; struct FpgaBNArgs {
bool enabled = false;
void* bias_addr;
void* scale_addr;
};
struct FpgaKernelArgs {
uint32_t width;
uint32_t height;
uint32_t stride_h;
uint32_t stride_w;
};
struct FpgaImageArgs {
uint32_t width;
uint32_t height;
uint32_t channels;
uint32_t pad_h;
uint32_t pad_w;
};
struct FpgaConvArgs { struct FpgaConvArgs {
bool enable_BN = false; bool relu_enabled;
bool enable_Relu = false; struct FpgaBNArgs BNargs;
struct FpgaBNParam bn_parm; void* image_addr;
void* filter_addr;
void* bias_addr;
void* output_addr;
float quant_scale;
struct FpgaImageArgs image;
uint32_t filter_num;
uint32_t group_num;
struct FpgaKernelArgs kernel;
}; };
struct FpgaPoolArgs { struct FpgaPoolArgs {
bool enable_BN = false; void* image_addr;
struct FpgaBNParam bn_parm; void* output_addr;
struct FpgaImageArgs image;
struct FpgaKernelArgs kernel;
}; };
struct FpgaEWAddArgs { // only support X + Y struct FpgaEWAddArgs {
bool enable_Relu = false; bool relu_enabled;
void* image0_addr;
void* image1_addr;
void* result_addr;
uint32_t const0;
uint32_t const1;
uint32_t data_len; // aligned element count
}; };
int ComputeFpgaConv(struct FpgaConvArgs); int ComputeFpgaConv(struct FpgaConvArgs args);
int ComputeFpgaPool(struct FpgaPoolArgs); int ComputeFpgaPool(struct FpgaPoolArgs args);
int ComputeFpgaEWAdd(struct FpgaEWAddArgs); int ComputeFpgaEWAdd(struct FpgaEWAddArgs args);
#define IOCTL_FPGA_MAGIC 'FPGA' #define IOCTL_FPGA_MAGIC 'CNN'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs) #define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs)
#define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs) #define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaArgs) #define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaQuantArgs)
#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs) #define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
#define IOCTL_MEM_TOPHY _IOW(IOCTL_FPGA_MAGIC, 12, struct MemoryToPhysicalArgs)
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs) #define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs) #define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs) #define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs)
} // namespace api
} // namespace fpga } // namespace fpga
} // namespace mobile } // namespace paddle_mobile
} // namespace paddle
...@@ -14,11 +14,13 @@ limitations under the License. */ ...@@ -14,11 +14,13 @@ limitations under the License. */
#pragma once #pragma once
#include <algorithm>
#include <map> #include <map>
#include <string> #include <string>
#include <vector>
#include "framework/operator.h" #include "framework/operator.h"
#include "node.h" #include "framework/program/program-optimize/node.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
......
...@@ -16,14 +16,15 @@ limitations under the License. */ ...@@ -16,14 +16,15 @@ limitations under the License. */
#include <cstdint> #include <cstdint>
#include <cstring> #include <cstring>
#include <fstream>
#include <memory> #include <memory>
#include <string>
#include <type_traits> #include <type_traits>
#include <typeindex> #include <typeindex>
#include <vector> #include <vector>
#include "common/enforce.h"
#include <fstream>
#include "common/enforce.h" #include "common/enforce.h"
#include "common/types.h"
#include "framework/data_layout.h" #include "framework/data_layout.h"
#include "framework/ddim.h" #include "framework/ddim.h"
#include "memory/t_malloc.h" #include "memory/t_malloc.h"
...@@ -63,7 +64,8 @@ struct SizeOfTypeFunctor<HEAD, TAIL...> { ...@@ -63,7 +64,8 @@ struct SizeOfTypeFunctor<HEAD, TAIL...> {
}; };
static inline size_t SizeOfType(std::type_index type) { static inline size_t SizeOfType(std::type_index type) {
SizeOfTypeFunctor<int, float, double, int16_t, int64_t, bool, size_t> functor; SizeOfTypeFunctor<int, half, float, double, int16_t, int64_t, bool, size_t>
functor;
size_t size = functor(type); size_t size = functor(type);
PADDLE_MOBILE_ENFORCE(size != 0UL, "Cannot get size of type %s", type.name()); PADDLE_MOBILE_ENFORCE(size != 0UL, "Cannot get size of type %s", type.name());
......
...@@ -187,7 +187,7 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc, ...@@ -187,7 +187,7 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
memcpy(&max_value, *data + sizeof(float), sizeof(float)); memcpy(&max_value, *data + sizeof(float), sizeof(float));
*data += 2 * sizeof(float); *data += 2 * sizeof(float);
const float factor = (max_value - min_value) / 255.0; const float factor = (max_value - min_value) / 255.0;
uint8_t *uint8_data = (uint8_t *)(*data); uint8_t *uint8_data = reinterpret_cast<uint8_t *>(*data);
for (int k = 0; k < memory_size; ++k) { for (int k = 0; k < memory_size; ++k) {
static_cast<float *>(memory)[k] = uint8_data[k] * factor + min_value; static_cast<float *>(memory)[k] = uint8_data[k] * factor + min_value;
} }
...@@ -419,7 +419,7 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict( ...@@ -419,7 +419,7 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
} }
template class Executor<CPU, Precision::FP32>; template class Executor<CPU, Precision::FP32>;
template class Executor<FPGA, Precision::FP32>;
template class Executor<GPU_MALI, Precision::FP32>; template class Executor<GPU_MALI, Precision::FP32>;
template class Executor<FPGA, Precision::FP16>;
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#ifdef CONCAT_OP #ifdef CONCAT_OP
#include "concat_op.h" #include <vector>
#include "operators/concat_op.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -68,6 +70,7 @@ REGISTER_OPERATOR_CPU(concat, ops::ConcatOp); ...@@ -68,6 +70,7 @@ REGISTER_OPERATOR_CPU(concat, ops::ConcatOp);
REGISTER_OPERATOR_MALI_GPU(concat, ops::ConcatOp); REGISTER_OPERATOR_MALI_GPU(concat, ops::ConcatOp);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(concat, ops::ConcatOp);
#endif #endif
#endif #endif
...@@ -53,6 +53,7 @@ USE_OP_CPU(concat); ...@@ -53,6 +53,7 @@ USE_OP_CPU(concat);
USE_OP_MALI_GPU(concat); USE_OP_MALI_GPU(concat);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(concat);
#endif #endif
#endif #endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONCAT_OP
#include "operators/kernel/concat_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConcatKernel<FPGA, half>::Init(ConcatParam *param) {
return true;
}
template <>
void ConcatKernel<FPGA, half>::Compute(const ConcatParam &param) const {
auto inputs = param.Inputs();
auto *out = param.Out();
int64_t axis = param.Axis();
out->mutable_data<half>();
DDim out_dim = out->dims();
int pixels = out_dim[1] * out_dim[2];
auto out_channel = out_dim[3];
auto out_offset = 0;
for (int i = 0; i < inputs.size(); ++i) {
auto input = inputs[i];
auto channels = input[3];
out_offset += channels;
auto src = input->data<half>();
for (int j = 0; j < pixels; ++j) {
auto dst = out->data<half>() + out_offset;
memory::Copy(dst, src, sizeof(half));
}
}
}
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -22,6 +22,9 @@ limitations under the License. */ ...@@ -22,6 +22,9 @@ limitations under the License. */
#include "framework/scope.h" #include "framework/scope.h"
#include "framework/tensor.h" #include "framework/tensor.h"
#include "framework/variable.h" #include "framework/variable.h"
#ifdef PADDLE_MOBILE_FPGA
#include "fpga/api/fpga_api.h"
#endif
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -256,6 +259,15 @@ class ElementwiseAddParam : OpParam { ...@@ -256,6 +259,15 @@ class ElementwiseAddParam : OpParam {
Tensor *input_y_; Tensor *input_y_;
Tensor *out_; Tensor *out_;
int axis_; int axis_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaEWAddArgs fpga_EW_add_args;
public:
const fpga::FpgaEWAddArgs &FpgaArgs() const { return fpga_EW_add_args; }
void SetFpgaArgs(const fpga::FpgaEWAddArgs &args) { fpga_EW_add_args = args; }
#endif
}; };
#ifdef FUSION_ELEMENTWISEADDRELU_OP #ifdef FUSION_ELEMENTWISEADDRELU_OP
...@@ -450,80 +462,15 @@ class PoolParam : public OpParam { ...@@ -450,80 +462,15 @@ class PoolParam : public OpParam {
vector<int> paddings_; vector<int> paddings_;
bool ceil_mode_; bool ceil_mode_;
bool global_pooling_ = false; bool global_pooling_ = false;
}; #ifdef PADDLE_MOBILE_FPGA
#endif
#ifdef FUSION_POOLBN_OP
class FusionPoolBNParam : OpParam {
public:
FusionPoolBNParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope) {
input_ = InputXFrom<LoDTensor>(inputs, scope);
pooling_type_ = GetAttr<string>("pooling_type", attrs);
ksize_ = GetAttr<vector<int>>("ksize", attrs);
strides_ = GetAttr<vector<int>>("strides", attrs);
paddings_ = GetAttr<vector<int>>("paddings", attrs);
ceil_mode_ = GetAttr<bool>("ceil_mode", attrs);
global_pooling_ = GetAttr<bool>("global_pooling", attrs);
output_y_ = OutputYFrom<LoDTensor>(outputs, scope);
input_bias_ = InputBiasFrom<LoDTensor>(inputs, scope);
input_mean_ = InputMeanFrom<LoDTensor>(inputs, scope);
input_scale_ = InputScaleFrom<LoDTensor>(inputs, scope);
input_variance_ = InputVarianceFrom<LoDTensor>(inputs, scope);
epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs);
// is_test_ = GetAttr<bool>("is_test", attrs);
}
const Tensor *Input() const { return input_; }
const string &PoolingType() const { return pooling_type_; }
const vector<int> &Ksize() const { return ksize_; }
const vector<int> &Strides() const { return strides_; }
const vector<int> &Paddings() const { return paddings_; }
bool isCeilMode() const { return ceil_mode_; }
bool isGlobalPooling() const { return global_pooling_; }
Tensor *OutputY() const { return output_y_; }
const Tensor *InputBias() const { return input_bias_; }
const Tensor *InputMean() const { return input_mean_; }
const Tensor *InputScale() const { return input_scale_; }
const Tensor *InputVariance() const { return input_variance_; }
const float &Epsilon() const { return epsilon_; }
const float &Momentum() const { return momentum_; }
const bool &IsTest() const { return is_test_; }
const string &DataFormat() const { return data_format_; }
private: private:
Tensor *input_; fpga::FpgaPoolArgs fpga_pool_args;
string pooling_type_;
vector<int> ksize_; public:
vector<int> strides_; const fpga::FpgaPoolArgs &FpgaArgs() const { return fpga_pool_args; }
vector<int> paddings_; void SetFpgaArgs(const fpga::FpgaPoolArgs &args) { fpga_pool_args = args; }
bool ceil_mode_; #endif
bool global_pooling_ = false;
Tensor *output_y_;
Tensor *input_bias_;
Tensor *input_mean_;
Tensor *input_scale_;
Tensor *input_variance_;
float epsilon_;
float momentum_;
bool is_test_;
string data_format_;
}; };
#endif #endif
...@@ -704,7 +651,7 @@ class MultiClassNMSParam : public OpParam { ...@@ -704,7 +651,7 @@ class MultiClassNMSParam : public OpParam {
class FeedParam : public OpParam { class FeedParam : public OpParam {
public: public:
FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs, FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, Scope &scope) { const AttributeMap &attrs, Scope const &scope) {
input_x_ = InputXFrom<LoDTensor>(inputs, scope); input_x_ = InputXFrom<LoDTensor>(inputs, scope);
out_ = OutFrom<LoDTensor>(outputs, scope); out_ = OutFrom<LoDTensor>(outputs, scope);
auto var = scope.Var("batch_size"); auto var = scope.Var("batch_size");
...@@ -983,6 +930,15 @@ class FusionFcParam : public OpParam { ...@@ -983,6 +930,15 @@ class FusionFcParam : public OpParam {
int x_num_col_dims_; int x_num_col_dims_;
int y_num_col_dims_; int y_num_col_dims_;
int axis_; int axis_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
#endif
}; };
#ifdef FUSION_FCRELU_OP #ifdef FUSION_FCRELU_OP
...@@ -1032,6 +988,15 @@ class FusionConvAddParam : public OpParam { ...@@ -1032,6 +988,15 @@ class FusionConvAddParam : public OpParam {
vector<int> paddings_; vector<int> paddings_;
vector<int> dilations_; vector<int> dilations_;
int groups; int groups;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
#endif
}; };
Print &operator<<(Print &printer, const FusionConvAddParam &conv_param); Print &operator<<(Print &printer, const FusionConvAddParam &conv_param);
...@@ -1128,6 +1093,15 @@ class FusionConvAddBNReluParam : public OpParam { ...@@ -1128,6 +1093,15 @@ class FusionConvAddBNReluParam : public OpParam {
bool is_test_; bool is_test_;
Tensor *new_bias_; Tensor *new_bias_;
Tensor *new_scale_; Tensor *new_scale_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
#endif
}; };
#endif #endif
...@@ -1213,6 +1187,15 @@ class FusionConvAddBNParam : public OpParam { ...@@ -1213,6 +1187,15 @@ class FusionConvAddBNParam : public OpParam {
bool is_test_; bool is_test_;
Tensor *new_bias_; Tensor *new_bias_;
Tensor *new_scale_; Tensor *new_scale_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
#endif
}; };
#endif #endif
...@@ -1426,9 +1409,5 @@ class DropoutParam : public OpParam { ...@@ -1426,9 +1409,5 @@ class DropoutParam : public OpParam {
}; };
#endif #endif
#ifdef REGION_OP
class RegionParam : public OpParam {};
#endif
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -33,7 +33,7 @@ class ResizeOp ...@@ -33,7 +33,7 @@ class ResizeOp
DeviceType, ResizeParam, operators::ResizeKernel<DeviceType, T>> { DeviceType, ResizeParam, operators::ResizeKernel<DeviceType, T>> {
public: public:
ResizeOp(const std::string &type, const VariableNameMap &inputs, ResizeOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap attrs, const VariableNameMap &outputs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope) std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, ResizeParam, : framework::OperatorWithKernel<DeviceType, ResizeParam,
operators::ResizeKernel<DeviceType, T>>( operators::ResizeKernel<DeviceType, T>>(
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include <cstdlib>
#include <ctime>
#include "../test_helper.h"
#include "common/log.h"
#include "memory/t_malloc.h"
#include "operators/math/gemm.h"
#define a(i, j) a[(i)*lda + (j)]
#define b(i, j) b[(i)*ldb + (j)]
#define c(i, j) c[(i)*ldc + (j)]
#define c1(i, j) c1[(i)*ldc + (j)]
void print_matirx(int m, int n, int ldc, float *c) {
for (int i = 0; i < m; ++i) {
std::cout << c(i, 0);
for (int j = 1; j < n; ++j) {
std::cout << " | " << c(i, j);
}
std::cout << std::endl;
}
std::cout << std::endl;
}
int do_sgemm(int m, int n, int k, bool relu, int t1, int t2, int pr) {
int lda = k;
int ldb = n;
int ldc = n;
float *a = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m * k));
float *b = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * k * n));
float *c = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m * n));
float *c1 = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m * n));
float* scale = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m));
float* bias = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m));
srand(unsigned(time(0)));
for (int i = 0; i < m * k; ++i) {
a[i] = t1 + rand() % t2;
}
for (int i = 0; i < k * n; ++i) {
b[i] = t1 + rand() % t2;
}
for (int i = 0; i < m; ++i) {
scale[i] = t1 + rand() % t2;
}
for (int i = 0; i < m; ++i) {
bias[i] = t1 + rand() % t2;
}
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
float r = 0;
for (int p = 0; p < k; p++) {
r += a(i, p) * b(p, j);
}
r *= scale[i];
r += bias[i];
if (relu && (r < 0)) {
r = 0;
}
c1(i, j) = r;
}
}
paddle_mobile::operators::math::SgemmWithBn(m, n, k, 0.9, a, lda,
b, ldb, 0.3, c, ldc, relu, scale, bias);
int eq = 0;
int neq = 0;
for (int i = 0; i < m * n; ++i) {
if (static_cast<int>(c[i]) == static_cast<int>(c1[i])) {
++eq;
} else {
++neq;
}
}
if (pr > 0) {
std::cout << "A:" << std::endl;
print_matirx(m, k, lda, a);
std::cout << "B:" << std::endl;
print_matirx(k, n, ldb, b);
std::cout << "C:" << std::endl;
print_matirx(m, n, ldc, c);
std::cout << "C1:" << std::endl;
print_matirx(m, n, ldc, c1);
}
std::cout << "mnk=" << m << " " << n << " " << k <<
" relu=" << relu <<
" eq=" << eq << " neq=" << neq << std::endl;
paddle_mobile::memory::Free(a);
paddle_mobile::memory::Free(b);
paddle_mobile::memory::Free(c);
paddle_mobile::memory::Free(c1);
paddle_mobile::memory::Free(scale);
paddle_mobile::memory::Free(bias);
return 0;
}
int main() {
do_sgemm(9, 9, 9, true, 10, 10, 10);
do_sgemm(10, 6, 12, false, 10, 10, 0);
do_sgemm(512, 256, 384, false, 10, 10, 0);
do_sgemm(1366, 768, 256, false, 10, 10, 0);
do_sgemm(1255, 755, 333, false, 10, 10, 0);
do_sgemm(555, 777, 999, false, 10, 10, 0);
do_sgemm(10, 6, 12, true, -4, 10, 0);
do_sgemm(512, 256, 384, true, -4, 10, 0);
do_sgemm(1366, 768, 256, true, -4, 10, 0);
do_sgemm(1255, 755, 333, true, -4, 10, 0);
do_sgemm(555, 777, 999, true, -4, 10, 0);
return 0;
}
...@@ -65,6 +65,8 @@ endif() ...@@ -65,6 +65,8 @@ endif()
file(TO_CMAKE_PATH "${ANDROID_NDK}" ANDROID_NDK) file(TO_CMAKE_PATH "${ANDROID_NDK}" ANDROID_NDK)
# Android NDK revision # Android NDK revision
message("${ANDROID_NDK}")
file(READ "${ANDROID_NDK}/source.properties" ANDROID_NDK_SOURCE_PROPERTIES) file(READ "${ANDROID_NDK}/source.properties" ANDROID_NDK_SOURCE_PROPERTIES)
set(ANDROID_NDK_SOURCE_PROPERTIES_REGEX set(ANDROID_NDK_SOURCE_PROPERTIES_REGEX
"^Pkg\\.Desc = Android NDK\nPkg\\.Revision = ([0-9]+)\\.") "^Pkg\\.Desc = Android NDK\nPkg\\.Revision = ([0-9]+)\\.")
...@@ -159,7 +161,7 @@ endif() ...@@ -159,7 +161,7 @@ endif()
# Default values for configurable variables. # Default values for configurable variables.
if(NOT ANDROID_TOOLCHAIN) if(NOT ANDROID_TOOLCHAIN)
set(ANDROID_TOOLCHAIN clang) set(ANDROID_TOOLCHAIN gcc)
endif() endif()
if(NOT ANDROID_ABI) if(NOT ANDROID_ABI)
set(ANDROID_ABI armeabi-v7a) set(ANDROID_ABI armeabi-v7a)
......
...@@ -40,8 +40,8 @@ build_for_android() { ...@@ -40,8 +40,8 @@ build_for_android() {
fi fi
if [ -z "$PLATFORM" ]; then if [ -z "$PLATFORM" ]; then
PLATFORM="arm-v7a" # Users could choose "arm-v8a" platform. # PLATFORM="arm-v7a" # Users could choose "arm-v8a" platform.
# PLATFORM="arm-v8a" PLATFORM="arm-v8a"
fi fi
if [ "${PLATFORM}" = "arm-v7a" ]; then if [ "${PLATFORM}" = "arm-v7a" ]; then
...@@ -63,7 +63,7 @@ build_for_android() { ...@@ -63,7 +63,7 @@ build_for_android() {
TOOLCHAIN_FILE="./tools/android-cmake/android.toolchain.cmake" TOOLCHAIN_FILE="./tools/android-cmake/android.toolchain.cmake"
ANDROID_ARM_MODE="arm" ANDROID_ARM_MODE="arm"
if [ "${#NETS}" > 1 ]; then if [ "${#NETS}" -gt 1 ]; then
cmake .. \ cmake .. \
-B"../build/release/${PLATFORM}" \ -B"../build/release/${PLATFORM}" \
-DANDROID_ABI="${ABI}" \ -DANDROID_ABI="${ABI}" \
...@@ -99,7 +99,7 @@ build_for_ios() { ...@@ -99,7 +99,7 @@ build_for_ios() {
BUILD_DIR=../build/release/"${PLATFORM}"/ BUILD_DIR=../build/release/"${PLATFORM}"/
TOOLCHAIN_FILE="./tools/ios-cmake/ios.toolchain.cmake" TOOLCHAIN_FILE="./tools/ios-cmake/ios.toolchain.cmake"
mkdir -p "${BUILD_DIR}" mkdir -p "${BUILD_DIR}"
if [ "${#NETS}" > 1 ]; then if [ "${#NETS}" -gt 1 ]; then
cmake .. \ cmake .. \
-B"${BUILD_DIR}" \ -B"${BUILD_DIR}" \
-DCMAKE_BUILD_TYPE="${MODE}" \ -DCMAKE_BUILD_TYPE="${MODE}" \
......
...@@ -75,11 +75,9 @@ if ("FPGAnets" IN_LIST NET) ...@@ -75,11 +75,9 @@ if ("FPGAnets" IN_LIST NET)
set(FUSION_CONVADDRELU_OP ON) set(FUSION_CONVADDRELU_OP ON)
set(FUSION_CONVADDBNRELU_OP ON) set(FUSION_CONVADDBNRELU_OP ON)
set(FUSION_CONVADDBN_OP ON) set(FUSION_CONVADDBN_OP ON)
set(FUSION_POOLBN_OP ON)
set(FUSION_ELEMENTWISEADDRELU_OP ON) set(FUSION_ELEMENTWISEADDRELU_OP ON)
set(FUSION_FC_OP ON) set(FUSION_FC_OP ON)
set(FUSION_FCRELU_OP ON) set(FUSION_FCRELU_OP ON)
set(REGION_OP ON)
set(POOL_OP ON) set(POOL_OP ON)
set(CONCAT_OP ON) set(CONCAT_OP ON)
set(SOFTMAX_OP ON) set(SOFTMAX_OP ON)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册