提交 33122506 编写于 作者: qnqinan's avatar qnqinan

Merge remote-tracking branch 'origin/develop' into develop

......@@ -29,15 +29,15 @@ limitations under the License. */
#include "fpga/api/fpga_api.h"
namespace paddle {
namespace mobile {
namespace paddle_mobile {
namespace fpga {
namespace api {
static int fd = -1;
static const char *device_path = "/dev/fpgadrv0";
static inline int do_ioctl(int req, void *arg) { return ioctl(req, arg); }
static inline int do_ioctl(int req, void *arg) {
return ioctl(req, (long unsigned int)arg);
}
int open_device() {
if (fd == -1) {
......@@ -48,8 +48,8 @@ int open_device() {
// memory management;
void *fpga_malloc(size_t size) {
return reinterpret_cast<(void *)> mmap64(NULL, size, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0);
return reinterpret_cast<void *>(
mmap64(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0));
}
void fpga_free(void *ptr) { munmap(ptr, 0); }
......@@ -58,11 +58,9 @@ void fpga_copy(void *dest, const void *src, size_t num) {
memcpy(dest, src, num);
}
int ComputeFpgaConv(struct FpgaConvArgs) {}
int ComputeFpgaPool(struct FpgaPoolArgs) {}
int ComputeFpgaEWAdd(struct FpgaEWAddArgs) {}
int ComputeFpgaConv(struct ConvArgs args) {}
int ComputeFpgaPool(struct PoolingArgs args) {}
int ComputeFpgaEWAdd(struct EWAddArgs args) {}
} // namespace api
} // namespace fpga
} // namespace mobile
} // namespace paddle
} // namespace paddle_mobile
......@@ -31,90 +31,132 @@ void* fpga_malloc(size_t size);
void fpga_free(void* ptr);
void fpga_copy(void* dst, const void* src, size_t num);
struct FpgaVersionArgs {
void* buf;
};
struct MemoryToPhysicalArgs {
const void* src;
uint64_t physical;
struct VersionArgs {
void* buffer;
};
struct MemoryCopyArgs {
void* src;
void* dst;
void* dest;
size_t size;
};
struct FpgaQuantArgs {
float scale;
};
struct FpgaBNArgs {
bool enabled = false;
void* bias_addr;
void* scale_addr;
struct BNArgs {
bool enabled;
void* bias_address;
void* scale_address;
};
struct FpgaKernelArgs {
/**
Conv and Pooling kernel
*/
struct KernelArgs {
uint32_t width;
uint32_t height;
uint32_t stride_h;
uint32_t stride_w;
uint32_t stride_h;
};
struct FpgaImageArgs {
uint32_t width;
uint32_t height;
struct ImageInputArgs {
void* address; // input featuremap virtual address
float* scale_address; // input scale address;
uint32_t channels;
uint32_t pad_h;
uint32_t pad_w;
uint32_t width; // featuremap width
uint32_t height;
uint32_t pad_width; // padding width;
uint32_t pad_height;
};
struct ImageOutputArgs {
void* address; // output result address;
float* scale_address; // output scale address;
};
struct FpgaConvArgs {
struct ConvArgs {
bool relu_enabled;
struct FpgaBNArgs BNargs;
void* image_addr;
void* filter_addr;
void* bias_addr;
void* output_addr;
float quant_scale;
struct FpgaImageArgs image;
void* bias_address;
void* filter_address;
uint32_t filter_num;
uint32_t group_num;
struct FpgaKernelArgs kernel;
struct BNArgs bn;
struct KernelArgs kernel;
struct ImageInputArgs image; // input image;
struct ImageOutputArgs output;
};
struct FpgaPoolArgs {
void* image_addr;
void* output_addr;
struct FpgaImageArgs image;
struct FpgaKernelArgs kernel;
struct PoolingArgs {
struct KernelArgs kernel;
struct ImageInputArgs image; // input image;
struct ImageOutputArgs output;
};
struct FpgaEWAddArgs {
// elementwise add arguments
struct EWAddArgs {
bool relu_enabled;
void* image0_addr;
void* image1_addr;
void* result_addr;
uint32_t const0;
uint32_t const1;
uint32_t data_len; // aligned element count
float const0; // output0 = const0 x input0 + const1 x input1;
float const1;
struct ImageInputArgs image0;
struct ImageInputArgs image1;
struct ImageOutputArgs output;
};
struct FpgaRegWriteArgs {
uint64_t address; //
uint64_t value;
};
int ComputeFpgaConv(struct FpgaConvArgs args);
int ComputeFpgaPool(struct FpgaPoolArgs args);
int ComputeFpgaEWAdd(struct FpgaEWAddArgs args);
struct FpgaRegReadArgs {
uint64_t address;
uint64_t value;
};
#define IOCTL_FPGA_MAGIC 'FPGA'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 02, struct FpgaRegReadArgs)
#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 03, struct FpgaRegWriteArgs)
#define IOCTL_SEPARATOR_0 10
#define IOCTL_FPGA_MAGIC 'CNN'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs)
#define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaQuantArgs)
#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs)
#define IOCTL_SEPARATOR_1 20
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs)
enum FPGA_ERR_TYPE {
ERR_IOCTL_CMD = -1,
ERR_TIMEOUT = -2,
ERR_COMPLETION_TIMEOUT = -3,
ERR_INVALID_FPGA_ADDR = -4,
ERR_NOMEM = -5,
ERR_NO_RESERVE_MEM = -6,
ERR_COPY_FROM_USER = -7,
ERR_COPY_TO_USER = -8,
ERR_DEL_TIMER = -9,
ERR_ENABLE_MSI = -10,
ERR_REGISTER_IRQ = -11,
ERR_PCIE_REGISTER = -12,
ERR_PCIE_PROBE = -13,
ERR_REGISTER_BLOCK = -14,
ERR_ALLOC_GENDISK = -15,
ERR_INIT_QUEUE = -16,
ERR_WAIT = -17,
ERR_ECC_ERROR = -31,
ERR_FPGA_FAIL_STOP = -64,
ERR_FPGA_DEBUG_STOP = -113,
DEV_TMP_UNAVAILABLE = -128
};
//============================== API =============================
int ComputeFpgaConv(struct ConvArgs args);
int ComputeFpgaPool(struct PoolingArgs args);
int ComputeFpgaEWAdd(struct EWAddArgs args);
} // namespace fpga
} // namespace paddle_mobile
......@@ -253,6 +253,18 @@ class Tensor {
"Tensor's dims_ is out of bound. ");
}
#ifdef PADDLE_MOBILE_FPGA
struct FPGAArgs {
float scale;
inline float *scale_pointer() { return &scale; }
};
struct &fpga_args() const {
return fpgaArgs_;
}
#endif
private:
/**
* @note Placeholder hides type T, so it doesn't appear as a
......@@ -319,6 +331,10 @@ class Tensor {
* begins.
*/
size_t offset_;
#ifdef PADDLE_MOBILE_FPGA
FPGAArgs fpgaArgs_;
#endif
};
#ifdef PADDLE_MOBILE_DEBUG
......
......@@ -420,6 +420,6 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
template class Executor<CPU, Precision::FP32>;
template class Executor<GPU_MALI, Precision::FP32>;
template class Executor<FPGA, Precision::FP16>;
template class Executor<FPGA, Precision::FP32>;
} // namespace paddle_mobile
......@@ -27,17 +27,17 @@ namespace memory {
const int MALLOC_ALIGN = 64;
#ifdef PADDLE_MOBILE_FPGA
namespace api = paddle::mobile::fpga::api;
namespace fpga = paddle_mobile::fpga;
void Copy(void *dst, const void *src, size_t num) {
std::memcpy(dst, src, num);
}
void *Alloc(size_t size) { return api::malloc(size); }
void *Alloc(size_t size) { return fpga::fpga_malloc(size); }
void Free(void *ptr) {
if (ptr) {
api::fpga_free(ptr);
fpga::fpga_free(ptr);
}
}
......
......@@ -29,7 +29,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
std::shared_ptr<framework::Scope> scope)
: framework::OperatorBase<DeviceType>(type, inputs, outputs, attrs,
scope),
param_(inputs, outputs, attrs, *scope) {}
param_(inputs, outputs, attrs, scope.get()) {}
void RunImpl() const { param_.Out()->ShareDataWith(*param_.InputX()); }
void Init() {}
......
......@@ -14,8 +14,6 @@ limitations under the License. */
#ifdef DROPOUT_OP
#pragma once
#include "operators/kernel/dropout_kernel.h"
#include <operators/math/transform.h>
......
......@@ -17,7 +17,7 @@ limitations under the License. */
#include "framework/operator.h"
#include "operators/op_param.h"
#pragma once;
#pragma once
namespace paddle_mobile {
namespace operators {
......
......@@ -39,7 +39,7 @@ void ConcatKernel<FPGA, half>::Compute(const ConcatParam &param) const {
for (int i = 0; i < inputs.size(); ++i) {
auto input = inputs[i];
auto channels = input[3];
auto channels = input->dims()[3];
out_offset += channels;
auto src = input->data<half>();
for (int j = 0; j < pixels; ++j) {
......
......@@ -20,13 +20,11 @@ limitations under the License. */
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
#pragma once;
#pragma once
namespace paddle_mobile {
namespace operators {
using namespace framework;
template <typename DeviceType, typename T>
class Im2SequenceKernel
: public framework::OpKernelBase<DeviceType, Im2SequenceParam> {
......
......@@ -14,8 +14,6 @@ limitations under the License. */
#ifdef FUSION_FC_OP
#pragma once
#include "operators/kernel/fusion_fc_kernel.h"
namespace paddle_mobile {
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#include "framework/operator.h"
#include "operators/op_param.h"
#pragma once;
#pragma once
namespace paddle_mobile {
namespace operators {
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#include "framework/operator.h"
#include "operators/op_param.h"
#pragma once;
#pragma once
namespace paddle_mobile {
namespace operators {
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#include "framework/operator.h"
#include "operators/op_param.h"
#pragma once;
#pragma once
namespace paddle_mobile {
namespace operators {
......
......@@ -262,11 +262,11 @@ class ElementwiseAddParam : OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaEWAddArgs fpga_EW_add_args;
fpga::EWAddArgs fpga_EW_add_args;
public:
const fpga::FpgaEWAddArgs &FpgaArgs() const { return fpga_EW_add_args; }
void SetFpgaArgs(const fpga::FpgaEWAddArgs &args) { fpga_EW_add_args = args; }
const fpga::EWAddArgs &FpgaArgs() const { return fpga_EW_add_args; }
void SetFpgaArgs(const fpga::EWAddArgs &args) { fpga_EW_add_args = args; }
#endif
};
......@@ -465,11 +465,11 @@ class PoolParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaPoolArgs fpga_pool_args;
fpga::PoolingArgs fpga_pool_args;
public:
const fpga::FpgaPoolArgs &FpgaArgs() const { return fpga_pool_args; }
void SetFpgaArgs(const fpga::FpgaPoolArgs &args) { fpga_pool_args = args; }
const fpga::PoolingArgs &FpgaArgs() const { return fpga_pool_args; }
void SetFpgaArgs(const fpga::PoolingArgs &args) { fpga_pool_args = args; }
#endif
};
#endif
......@@ -651,10 +651,10 @@ class MultiClassNMSParam : public OpParam {
class FeedParam : public OpParam {
public:
FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, Scope const &scope) {
input_x_ = InputXFrom<LoDTensor>(inputs, scope);
out_ = OutFrom<LoDTensor>(outputs, scope);
auto var = scope.Var("batch_size");
const AttributeMap &attrs, Scope *scope) {
input_x_ = InputXFrom<LoDTensor>(inputs, *scope);
out_ = OutFrom<LoDTensor>(outputs, *scope);
auto var = scope->Var("batch_size");
batch_size = var->GetValue<int>();
}
const Tensor *InputX() const { return input_x_; }
......@@ -933,11 +933,11 @@ class FusionFcParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
fpga::ConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
#endif
};
......@@ -991,11 +991,11 @@ class FusionConvAddParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
fpga::ConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
#endif
};
......@@ -1096,11 +1096,11 @@ class FusionConvAddBNReluParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
fpga::ConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
#endif
};
#endif
......@@ -1190,11 +1190,11 @@ class FusionConvAddBNParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
fpga::ConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
#endif
};
#endif
......
......@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <string>
#include "../test_helper.h"
#include "io/loader.h"
......@@ -20,12 +22,10 @@ int main() {
// ../../../test/models/googlenet
// ../../../test/models/mobilenet
// auto program = loader.Load(g_googlenet, true);
// auto program = loader.Load(g_mobilenet_ssd, true);
auto program = loader.Load(g_mobilenet_ssd, true);
// auto program = loader.Load(g_googlenet_combine + "/model",
// g_googlenet_combine +
// "/params", true);
auto program = loader.Load(std::string(g_ocr) + "/model",
std::string(g_ocr) + "/params", false);
// program.originProgram->Description("program desc: ");
return 0;
}
......@@ -24,6 +24,7 @@ limitations under the License. */
#include "framework/ddim.h"
#include "framework/tensor.h"
static const char *g_ocr = "../models/ocr";
static const char *g_mobilenet_ssd = "../models/mobilenet+ssd";
static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture";
static const char *g_squeezenet = "../models/squeezenet";
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include <cstdlib>
#include <ctime>
#include "../test_helper.h"
#include "common/log.h"
#include "memory/t_malloc.h"
#include "operators/math/gemm.h"
#define a(i, j) a[(i)*lda + (j)]
#define b(i, j) b[(i)*ldb + (j)]
#define c(i, j) c[(i)*ldc + (j)]
#define c1(i, j) c1[(i)*ldc + (j)]
void print_matirx(int m, int n, int ldc, float *c) {
for (int i = 0; i < m; ++i) {
std::cout << c(i, 0);
for (int j = 1; j < n; ++j) {
std::cout << " | " << c(i, j);
}
std::cout << std::endl;
}
std::cout << std::endl;
}
int do_sgemm(int m, int n, int k, bool relu, int t1, int t2, int pr) {
int lda = k;
int ldb = n;
int ldc = n;
float *a = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m * k));
float *b = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * k * n));
float *c = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m * n));
float *c1 = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m * n));
float* scale = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m));
float* bias = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m));
srand(unsigned(time(0)));
for (int i = 0; i < m * k; ++i) {
a[i] = t1 + rand() % t2;
}
for (int i = 0; i < k * n; ++i) {
b[i] = t1 + rand() % t2;
}
for (int i = 0; i < m; ++i) {
scale[i] = t1 + rand() % t2;
}
for (int i = 0; i < m; ++i) {
bias[i] = t1 + rand() % t2;
}
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
float r = 0;
for (int p = 0; p < k; p++) {
r += a(i, p) * b(p, j);
}
r *= scale[i];
r += bias[i];
if (relu && (r < 0)) {
r = 0;
}
c1(i, j) = r;
}
}
paddle_mobile::operators::math::SgemmWithBn(m, n, k, 0.9, a, lda,
b, ldb, 0.3, c, ldc, relu, scale, bias);
int eq = 0;
int neq = 0;
for (int i = 0; i < m * n; ++i) {
if (static_cast<int>(c[i]) == static_cast<int>(c1[i])) {
++eq;
} else {
++neq;
}
}
if (pr > 0) {
std::cout << "A:" << std::endl;
print_matirx(m, k, lda, a);
std::cout << "B:" << std::endl;
print_matirx(k, n, ldb, b);
std::cout << "C:" << std::endl;
print_matirx(m, n, ldc, c);
std::cout << "C1:" << std::endl;
print_matirx(m, n, ldc, c1);
}
std::cout << "mnk=" << m << " " << n << " " << k <<
" relu=" << relu <<
" eq=" << eq << " neq=" << neq << std::endl;
paddle_mobile::memory::Free(a);
paddle_mobile::memory::Free(b);
paddle_mobile::memory::Free(c);
paddle_mobile::memory::Free(c1);
paddle_mobile::memory::Free(scale);
paddle_mobile::memory::Free(bias);
return 0;
}
int main() {
do_sgemm(9, 9, 9, true, 10, 10, 10);
do_sgemm(10, 6, 12, false, 10, 10, 0);
do_sgemm(512, 256, 384, false, 10, 10, 0);
do_sgemm(1366, 768, 256, false, 10, 10, 0);
do_sgemm(1255, 755, 333, false, 10, 10, 0);
do_sgemm(555, 777, 999, false, 10, 10, 0);
do_sgemm(10, 6, 12, true, -4, 10, 0);
do_sgemm(512, 256, 384, true, -4, 10, 0);
do_sgemm(1366, 768, 256, true, -4, 10, 0);
do_sgemm(1255, 755, 333, true, -4, 10, 0);
do_sgemm(555, 777, 999, true, -4, 10, 0);
return 0;
}
......@@ -40,8 +40,8 @@ build_for_android() {
fi
if [ -z "$PLATFORM" ]; then
# PLATFORM="arm-v7a" # Users could choose "arm-v8a" platform.
PLATFORM="arm-v8a"
PLATFORM="arm-v7a" # Users could choose "arm-v8a" platform.
# PLATFORM="arm-v8a"
fi
if [ "${PLATFORM}" = "arm-v7a" ]; then
......
......@@ -3,8 +3,8 @@
#include "src/enforce.h"
#include "src/var_desc.h"
#include "src/program_desc.h"
#include <cstring>
#include <cstdlib>
#include <string>
#include <cmath>
#include <iostream>
#include <utility>
......@@ -13,7 +13,7 @@
#include "src/protobuf-c.h"
#include <fstream>
#include <iostream>
#include <limits>
const size_t kSize64 = sizeof(uint64_t);
const size_t kSize32 = sizeof(uint32_t);
......
......@@ -19,6 +19,7 @@ limitations under the License. */
#ifndef TOOLS_QUANTIFICATION_SRC_BLOCK_DESC_LOCAL_H_
#define TOOLS_QUANTIFICATION_SRC_BLOCK_DESC_LOCAL_H_
#include <memory>
#include <vector>
#include "src/var_desc.h"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册