提交 afa73663 编写于 作者: qnqinan's avatar qnqinan

Merge remote-tracking branch 'origin/develop' into develop

...@@ -29,15 +29,15 @@ limitations under the License. */ ...@@ -29,15 +29,15 @@ limitations under the License. */
#include "fpga/api/fpga_api.h" #include "fpga/api/fpga_api.h"
namespace paddle { namespace paddle_mobile {
namespace mobile {
namespace fpga { namespace fpga {
namespace api {
static int fd = -1; static int fd = -1;
static const char *device_path = "/dev/fpgadrv0"; static const char *device_path = "/dev/fpgadrv0";
static inline int do_ioctl(int req, void *arg) { return ioctl(req, arg); } static inline int do_ioctl(int req, void *arg) {
return ioctl(req, (long unsigned int)arg);
}
int open_device() { int open_device() {
if (fd == -1) { if (fd == -1) {
...@@ -48,8 +48,8 @@ int open_device() { ...@@ -48,8 +48,8 @@ int open_device() {
// memory management; // memory management;
void *fpga_malloc(size_t size) { void *fpga_malloc(size_t size) {
return reinterpret_cast<(void *)> mmap64(NULL, size, PROT_READ | PROT_WRITE, return reinterpret_cast<void *>(
MAP_SHARED, fd, 0); mmap64(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0));
} }
void fpga_free(void *ptr) { munmap(ptr, 0); } void fpga_free(void *ptr) { munmap(ptr, 0); }
...@@ -58,11 +58,9 @@ void fpga_copy(void *dest, const void *src, size_t num) { ...@@ -58,11 +58,9 @@ void fpga_copy(void *dest, const void *src, size_t num) {
memcpy(dest, src, num); memcpy(dest, src, num);
} }
int ComputeFpgaConv(struct FpgaConvArgs) {} int ComputeFpgaConv(struct ConvArgs args) {}
int ComputeFpgaPool(struct FpgaPoolArgs) {} int ComputeFpgaPool(struct PoolingArgs args) {}
int ComputeFpgaEWAdd(struct FpgaEWAddArgs) {} int ComputeFpgaEWAdd(struct EWAddArgs args) {}
} // namespace api
} // namespace fpga } // namespace fpga
} // namespace mobile } // namespace paddle_mobile
} // namespace paddle
...@@ -31,90 +31,132 @@ void* fpga_malloc(size_t size); ...@@ -31,90 +31,132 @@ void* fpga_malloc(size_t size);
void fpga_free(void* ptr); void fpga_free(void* ptr);
void fpga_copy(void* dst, const void* src, size_t num); void fpga_copy(void* dst, const void* src, size_t num);
struct FpgaVersionArgs { struct VersionArgs {
void* buf; void* buffer;
};
struct MemoryToPhysicalArgs {
const void* src;
uint64_t physical;
}; };
struct MemoryCopyArgs { struct MemoryCopyArgs {
void* src; void* src;
void* dst; void* dest;
size_t size; size_t size;
}; };
struct FpgaQuantArgs { struct BNArgs {
float scale; bool enabled;
}; void* bias_address;
void* scale_address;
struct FpgaBNArgs {
bool enabled = false;
void* bias_addr;
void* scale_addr;
}; };
struct FpgaKernelArgs { /**
Conv and Pooling kernel
*/
struct KernelArgs {
uint32_t width; uint32_t width;
uint32_t height; uint32_t height;
uint32_t stride_h;
uint32_t stride_w; uint32_t stride_w;
uint32_t stride_h;
}; };
struct FpgaImageArgs { struct ImageInputArgs {
uint32_t width; void* address; // input featuremap virtual address
uint32_t height; float* scale_address; // input scale address;
uint32_t channels; uint32_t channels;
uint32_t pad_h; uint32_t width; // featuremap width
uint32_t pad_w; uint32_t height;
uint32_t pad_width; // padding width;
uint32_t pad_height;
};
struct ImageOutputArgs {
void* address; // output result address;
float* scale_address; // output scale address;
}; };
struct FpgaConvArgs { struct ConvArgs {
bool relu_enabled; bool relu_enabled;
struct FpgaBNArgs BNargs; void* bias_address;
void* image_addr; void* filter_address;
void* filter_addr;
void* bias_addr;
void* output_addr;
float quant_scale;
struct FpgaImageArgs image;
uint32_t filter_num; uint32_t filter_num;
uint32_t group_num; uint32_t group_num;
struct FpgaKernelArgs kernel; struct BNArgs bn;
struct KernelArgs kernel;
struct ImageInputArgs image; // input image;
struct ImageOutputArgs output;
}; };
struct FpgaPoolArgs { struct PoolingArgs {
void* image_addr; struct KernelArgs kernel;
void* output_addr; struct ImageInputArgs image; // input image;
struct FpgaImageArgs image; struct ImageOutputArgs output;
struct FpgaKernelArgs kernel;
}; };
struct FpgaEWAddArgs { // elementwise add arguments
struct EWAddArgs {
bool relu_enabled; bool relu_enabled;
void* image0_addr;
void* image1_addr; float const0; // output0 = const0 x input0 + const1 x input1;
void* result_addr; float const1;
uint32_t const0; struct ImageInputArgs image0;
uint32_t const1; struct ImageInputArgs image1;
uint32_t data_len; // aligned element count struct ImageOutputArgs output;
};
struct FpgaRegWriteArgs {
uint64_t address; //
uint64_t value;
}; };
int ComputeFpgaConv(struct FpgaConvArgs args); struct FpgaRegReadArgs {
int ComputeFpgaPool(struct FpgaPoolArgs args); uint64_t address;
int ComputeFpgaEWAdd(struct FpgaEWAddArgs args); uint64_t value;
};
#define IOCTL_FPGA_MAGIC 'FPGA'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 02, struct FpgaRegReadArgs)
#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 03, struct FpgaRegWriteArgs)
#define IOCTL_SEPARATOR_0 10
#define IOCTL_FPGA_MAGIC 'CNN'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs)
#define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaQuantArgs)
#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs) #define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs) #define IOCTL_SEPARATOR_1 20
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs)
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs)
enum FPGA_ERR_TYPE {
ERR_IOCTL_CMD = -1,
ERR_TIMEOUT = -2,
ERR_COMPLETION_TIMEOUT = -3,
ERR_INVALID_FPGA_ADDR = -4,
ERR_NOMEM = -5,
ERR_NO_RESERVE_MEM = -6,
ERR_COPY_FROM_USER = -7,
ERR_COPY_TO_USER = -8,
ERR_DEL_TIMER = -9,
ERR_ENABLE_MSI = -10,
ERR_REGISTER_IRQ = -11,
ERR_PCIE_REGISTER = -12,
ERR_PCIE_PROBE = -13,
ERR_REGISTER_BLOCK = -14,
ERR_ALLOC_GENDISK = -15,
ERR_INIT_QUEUE = -16,
ERR_WAIT = -17,
ERR_ECC_ERROR = -31,
ERR_FPGA_FAIL_STOP = -64,
ERR_FPGA_DEBUG_STOP = -113,
DEV_TMP_UNAVAILABLE = -128
};
//============================== API =============================
int ComputeFpgaConv(struct ConvArgs args);
int ComputeFpgaPool(struct PoolingArgs args);
int ComputeFpgaEWAdd(struct EWAddArgs args);
} // namespace fpga } // namespace fpga
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -253,6 +253,18 @@ class Tensor { ...@@ -253,6 +253,18 @@ class Tensor {
"Tensor's dims_ is out of bound. "); "Tensor's dims_ is out of bound. ");
} }
#ifdef PADDLE_MOBILE_FPGA
struct FPGAArgs {
float scale;
inline float *scale_pointer() { return &scale; }
};
struct &fpga_args() const {
return fpgaArgs_;
}
#endif
private: private:
/** /**
* @note Placeholder hides type T, so it doesn't appear as a * @note Placeholder hides type T, so it doesn't appear as a
...@@ -319,6 +331,10 @@ class Tensor { ...@@ -319,6 +331,10 @@ class Tensor {
* begins. * begins.
*/ */
size_t offset_; size_t offset_;
#ifdef PADDLE_MOBILE_FPGA
FPGAArgs fpgaArgs_;
#endif
}; };
#ifdef PADDLE_MOBILE_DEBUG #ifdef PADDLE_MOBILE_DEBUG
......
...@@ -420,6 +420,6 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict( ...@@ -420,6 +420,6 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
template class Executor<CPU, Precision::FP32>; template class Executor<CPU, Precision::FP32>;
template class Executor<GPU_MALI, Precision::FP32>; template class Executor<GPU_MALI, Precision::FP32>;
template class Executor<FPGA, Precision::FP16>; template class Executor<FPGA, Precision::FP32>;
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -27,17 +27,17 @@ namespace memory { ...@@ -27,17 +27,17 @@ namespace memory {
const int MALLOC_ALIGN = 64; const int MALLOC_ALIGN = 64;
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
namespace api = paddle::mobile::fpga::api; namespace fpga = paddle_mobile::fpga;
void Copy(void *dst, const void *src, size_t num) { void Copy(void *dst, const void *src, size_t num) {
std::memcpy(dst, src, num); std::memcpy(dst, src, num);
} }
void *Alloc(size_t size) { return api::malloc(size); } void *Alloc(size_t size) { return fpga::fpga_malloc(size); }
void Free(void *ptr) { void Free(void *ptr) {
if (ptr) { if (ptr) {
api::fpga_free(ptr); fpga::fpga_free(ptr);
} }
} }
......
...@@ -29,7 +29,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> { ...@@ -29,7 +29,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
std::shared_ptr<framework::Scope> scope) std::shared_ptr<framework::Scope> scope)
: framework::OperatorBase<DeviceType>(type, inputs, outputs, attrs, : framework::OperatorBase<DeviceType>(type, inputs, outputs, attrs,
scope), scope),
param_(inputs, outputs, attrs, *scope) {} param_(inputs, outputs, attrs, scope.get()) {}
void RunImpl() const { param_.Out()->ShareDataWith(*param_.InputX()); } void RunImpl() const { param_.Out()->ShareDataWith(*param_.InputX()); }
void Init() {} void Init() {}
......
...@@ -14,8 +14,6 @@ limitations under the License. */ ...@@ -14,8 +14,6 @@ limitations under the License. */
#ifdef DROPOUT_OP #ifdef DROPOUT_OP
#pragma once
#include "operators/kernel/dropout_kernel.h" #include "operators/kernel/dropout_kernel.h"
#include <operators/math/transform.h> #include <operators/math/transform.h>
......
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
#include "framework/operator.h" #include "framework/operator.h"
#include "operators/op_param.h" #include "operators/op_param.h"
#pragma once; #pragma once
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
......
...@@ -39,7 +39,7 @@ void ConcatKernel<FPGA, half>::Compute(const ConcatParam &param) const { ...@@ -39,7 +39,7 @@ void ConcatKernel<FPGA, half>::Compute(const ConcatParam &param) const {
for (int i = 0; i < inputs.size(); ++i) { for (int i = 0; i < inputs.size(); ++i) {
auto input = inputs[i]; auto input = inputs[i];
auto channels = input[3]; auto channels = input->dims()[3];
out_offset += channels; out_offset += channels;
auto src = input->data<half>(); auto src = input->data<half>();
for (int j = 0; j < pixels; ++j) { for (int j = 0; j < pixels; ++j) {
......
...@@ -20,13 +20,11 @@ limitations under the License. */ ...@@ -20,13 +20,11 @@ limitations under the License. */
#include "operators/math/vol2col.h" #include "operators/math/vol2col.h"
#include "operators/op_param.h" #include "operators/op_param.h"
#pragma once; #pragma once
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
using namespace framework;
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
class Im2SequenceKernel class Im2SequenceKernel
: public framework::OpKernelBase<DeviceType, Im2SequenceParam> { : public framework::OpKernelBase<DeviceType, Im2SequenceParam> {
......
...@@ -14,8 +14,6 @@ limitations under the License. */ ...@@ -14,8 +14,6 @@ limitations under the License. */
#ifdef FUSION_FC_OP #ifdef FUSION_FC_OP
#pragma once
#include "operators/kernel/fusion_fc_kernel.h" #include "operators/kernel/fusion_fc_kernel.h"
namespace paddle_mobile { namespace paddle_mobile {
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include "framework/operator.h" #include "framework/operator.h"
#include "operators/op_param.h" #include "operators/op_param.h"
#pragma once; #pragma once
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include "framework/operator.h" #include "framework/operator.h"
#include "operators/op_param.h" #include "operators/op_param.h"
#pragma once; #pragma once
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include "framework/operator.h" #include "framework/operator.h"
#include "operators/op_param.h" #include "operators/op_param.h"
#pragma once; #pragma once
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
......
...@@ -262,11 +262,11 @@ class ElementwiseAddParam : OpParam { ...@@ -262,11 +262,11 @@ class ElementwiseAddParam : OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::FpgaEWAddArgs fpga_EW_add_args; fpga::EWAddArgs fpga_EW_add_args;
public: public:
const fpga::FpgaEWAddArgs &FpgaArgs() const { return fpga_EW_add_args; } const fpga::EWAddArgs &FpgaArgs() const { return fpga_EW_add_args; }
void SetFpgaArgs(const fpga::FpgaEWAddArgs &args) { fpga_EW_add_args = args; } void SetFpgaArgs(const fpga::EWAddArgs &args) { fpga_EW_add_args = args; }
#endif #endif
}; };
...@@ -465,11 +465,11 @@ class PoolParam : public OpParam { ...@@ -465,11 +465,11 @@ class PoolParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::FpgaPoolArgs fpga_pool_args; fpga::PoolingArgs fpga_pool_args;
public: public:
const fpga::FpgaPoolArgs &FpgaArgs() const { return fpga_pool_args; } const fpga::PoolingArgs &FpgaArgs() const { return fpga_pool_args; }
void SetFpgaArgs(const fpga::FpgaPoolArgs &args) { fpga_pool_args = args; } void SetFpgaArgs(const fpga::PoolingArgs &args) { fpga_pool_args = args; }
#endif #endif
}; };
#endif #endif
...@@ -651,10 +651,10 @@ class MultiClassNMSParam : public OpParam { ...@@ -651,10 +651,10 @@ class MultiClassNMSParam : public OpParam {
class FeedParam : public OpParam { class FeedParam : public OpParam {
public: public:
FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs, FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, Scope const &scope) { const AttributeMap &attrs, Scope *scope) {
input_x_ = InputXFrom<LoDTensor>(inputs, scope); input_x_ = InputXFrom<LoDTensor>(inputs, *scope);
out_ = OutFrom<LoDTensor>(outputs, scope); out_ = OutFrom<LoDTensor>(outputs, *scope);
auto var = scope.Var("batch_size"); auto var = scope->Var("batch_size");
batch_size = var->GetValue<int>(); batch_size = var->GetValue<int>();
} }
const Tensor *InputX() const { return input_x_; } const Tensor *InputX() const { return input_x_; }
...@@ -933,11 +933,11 @@ class FusionFcParam : public OpParam { ...@@ -933,11 +933,11 @@ class FusionFcParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::FpgaConvArgs fpga_conv_args; fpga::ConvArgs fpga_conv_args;
public: public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
...@@ -991,11 +991,11 @@ class FusionConvAddParam : public OpParam { ...@@ -991,11 +991,11 @@ class FusionConvAddParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::FpgaConvArgs fpga_conv_args; fpga::ConvArgs fpga_conv_args;
public: public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
...@@ -1096,11 +1096,11 @@ class FusionConvAddBNReluParam : public OpParam { ...@@ -1096,11 +1096,11 @@ class FusionConvAddBNReluParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::FpgaConvArgs fpga_conv_args; fpga::ConvArgs fpga_conv_args;
public: public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
#endif #endif
...@@ -1190,11 +1190,11 @@ class FusionConvAddBNParam : public OpParam { ...@@ -1190,11 +1190,11 @@ class FusionConvAddBNParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
fpga::FpgaConvArgs fpga_conv_args; fpga::ConvArgs fpga_conv_args;
public: public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
#endif #endif
......
...@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <string>
#include "../test_helper.h" #include "../test_helper.h"
#include "io/loader.h" #include "io/loader.h"
...@@ -20,12 +22,10 @@ int main() { ...@@ -20,12 +22,10 @@ int main() {
// ../../../test/models/googlenet // ../../../test/models/googlenet
// ../../../test/models/mobilenet // ../../../test/models/mobilenet
// auto program = loader.Load(g_googlenet, true); // auto program = loader.Load(g_googlenet, true);
// auto program = loader.Load(g_mobilenet_ssd, true);
auto program = loader.Load(g_mobilenet_ssd, true); auto program = loader.Load(std::string(g_ocr) + "/model",
// auto program = loader.Load(g_googlenet_combine + "/model", std::string(g_ocr) + "/params", false);
// g_googlenet_combine +
// "/params", true);
// program.originProgram->Description("program desc: "); // program.originProgram->Description("program desc: ");
return 0; return 0;
} }
...@@ -24,6 +24,7 @@ limitations under the License. */ ...@@ -24,6 +24,7 @@ limitations under the License. */
#include "framework/ddim.h" #include "framework/ddim.h"
#include "framework/tensor.h" #include "framework/tensor.h"
static const char *g_ocr = "../models/ocr";
static const char *g_mobilenet_ssd = "../models/mobilenet+ssd"; static const char *g_mobilenet_ssd = "../models/mobilenet+ssd";
static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture"; static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture";
static const char *g_squeezenet = "../models/squeezenet"; static const char *g_squeezenet = "../models/squeezenet";
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include <cstdlib>
#include <ctime>
#include "../test_helper.h"
#include "common/log.h"
#include "memory/t_malloc.h"
#include "operators/math/gemm.h"
#define a(i, j) a[(i)*lda + (j)]
#define b(i, j) b[(i)*ldb + (j)]
#define c(i, j) c[(i)*ldc + (j)]
#define c1(i, j) c1[(i)*ldc + (j)]
void print_matirx(int m, int n, int ldc, float *c) {
for (int i = 0; i < m; ++i) {
std::cout << c(i, 0);
for (int j = 1; j < n; ++j) {
std::cout << " | " << c(i, j);
}
std::cout << std::endl;
}
std::cout << std::endl;
}
int do_sgemm(int m, int n, int k, bool relu, int t1, int t2, int pr) {
int lda = k;
int ldb = n;
int ldc = n;
float *a = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m * k));
float *b = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * k * n));
float *c = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m * n));
float *c1 = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m * n));
float* scale = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m));
float* bias = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m));
srand(unsigned(time(0)));
for (int i = 0; i < m * k; ++i) {
a[i] = t1 + rand() % t2;
}
for (int i = 0; i < k * n; ++i) {
b[i] = t1 + rand() % t2;
}
for (int i = 0; i < m; ++i) {
scale[i] = t1 + rand() % t2;
}
for (int i = 0; i < m; ++i) {
bias[i] = t1 + rand() % t2;
}
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
float r = 0;
for (int p = 0; p < k; p++) {
r += a(i, p) * b(p, j);
}
r *= scale[i];
r += bias[i];
if (relu && (r < 0)) {
r = 0;
}
c1(i, j) = r;
}
}
paddle_mobile::operators::math::SgemmWithBn(m, n, k, 0.9, a, lda,
b, ldb, 0.3, c, ldc, relu, scale, bias);
int eq = 0;
int neq = 0;
for (int i = 0; i < m * n; ++i) {
if (static_cast<int>(c[i]) == static_cast<int>(c1[i])) {
++eq;
} else {
++neq;
}
}
if (pr > 0) {
std::cout << "A:" << std::endl;
print_matirx(m, k, lda, a);
std::cout << "B:" << std::endl;
print_matirx(k, n, ldb, b);
std::cout << "C:" << std::endl;
print_matirx(m, n, ldc, c);
std::cout << "C1:" << std::endl;
print_matirx(m, n, ldc, c1);
}
std::cout << "mnk=" << m << " " << n << " " << k <<
" relu=" << relu <<
" eq=" << eq << " neq=" << neq << std::endl;
paddle_mobile::memory::Free(a);
paddle_mobile::memory::Free(b);
paddle_mobile::memory::Free(c);
paddle_mobile::memory::Free(c1);
paddle_mobile::memory::Free(scale);
paddle_mobile::memory::Free(bias);
return 0;
}
int main() {
do_sgemm(9, 9, 9, true, 10, 10, 10);
do_sgemm(10, 6, 12, false, 10, 10, 0);
do_sgemm(512, 256, 384, false, 10, 10, 0);
do_sgemm(1366, 768, 256, false, 10, 10, 0);
do_sgemm(1255, 755, 333, false, 10, 10, 0);
do_sgemm(555, 777, 999, false, 10, 10, 0);
do_sgemm(10, 6, 12, true, -4, 10, 0);
do_sgemm(512, 256, 384, true, -4, 10, 0);
do_sgemm(1366, 768, 256, true, -4, 10, 0);
do_sgemm(1255, 755, 333, true, -4, 10, 0);
do_sgemm(555, 777, 999, true, -4, 10, 0);
return 0;
}
...@@ -40,8 +40,8 @@ build_for_android() { ...@@ -40,8 +40,8 @@ build_for_android() {
fi fi
if [ -z "$PLATFORM" ]; then if [ -z "$PLATFORM" ]; then
# PLATFORM="arm-v7a" # Users could choose "arm-v8a" platform. PLATFORM="arm-v7a" # Users could choose "arm-v8a" platform.
PLATFORM="arm-v8a" # PLATFORM="arm-v8a"
fi fi
if [ "${PLATFORM}" = "arm-v7a" ]; then if [ "${PLATFORM}" = "arm-v7a" ]; then
......
...@@ -3,8 +3,8 @@ ...@@ -3,8 +3,8 @@
#include "src/enforce.h" #include "src/enforce.h"
#include "src/var_desc.h" #include "src/var_desc.h"
#include "src/program_desc.h" #include "src/program_desc.h"
#include <cstring>
#include <cstdlib> #include <cstdlib>
#include <string>
#include <cmath> #include <cmath>
#include <iostream> #include <iostream>
#include <utility> #include <utility>
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
#include "src/protobuf-c.h" #include "src/protobuf-c.h"
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <limits>
const size_t kSize64 = sizeof(uint64_t); const size_t kSize64 = sizeof(uint64_t);
const size_t kSize32 = sizeof(uint32_t); const size_t kSize32 = sizeof(uint32_t);
......
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#ifndef TOOLS_QUANTIFICATION_SRC_BLOCK_DESC_LOCAL_H_ #ifndef TOOLS_QUANTIFICATION_SRC_BLOCK_DESC_LOCAL_H_
#define TOOLS_QUANTIFICATION_SRC_BLOCK_DESC_LOCAL_H_ #define TOOLS_QUANTIFICATION_SRC_BLOCK_DESC_LOCAL_H_
#include <memory>
#include <vector> #include <vector>
#include "src/var_desc.h" #include "src/var_desc.h"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册