提交 cf86fb69 编写于 作者: qnqinan's avatar qnqinan

Merge remote-tracking branch 'origin/develop' into develop

......@@ -9,7 +9,6 @@ option(LOG_PROFILE "log profile" ON)
option(CPU "armv7 with neon" ON)
option(MALI_GPU "mali gpu" OFF)
option(FPGA "fpga" OFF)
option(QUANTI "quantification" OFF)
file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm)
file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h)
......@@ -163,7 +162,4 @@ if(DEBUGING)
endif()
endif()
if (QUANTI)
add_subdirectory(tools/quantification)
endif ()
......@@ -27,7 +27,7 @@ Paddle-Moible是PaddlePaddle组织下的项目,是一个致力于嵌入式平
- **ARM CPU**
![](http://mms-graph.bj.bcebos.com/paddle-mobile%2F2018_07_18.png)
![](http://mms-graph.bj.bcebos.com/paddle-mobile%2F2018_07_29.png)
arm cpu是paddle-mobile的主要支持方向,cpu的通用性一直是其优势。嵌入式深度学习,需要大量的cpu汇编实现。我们正在紧锣密鼓的编码,为的是能充分硬件的每一点加速能力。
arm cpu的优化工作还在进行中,现在使用了常规的cpu优化。在arm a73上paddle-mobile arm-v7现在单核运行一次mobilenet1.0是110+ms,显然这不是我们的最终目标,我们正在用大量的汇编改写,后续性能仍会有巨大提升空间, 目前只支持armv7, 未来我们也会支持armv8。
......
# Quantification 模型量化、反量化
## 背景故事
部分网络如AlexNet训练出的模型体积较大,不适宜在移动设备上使用。
## 解决模型过大办法
1. 选用适合移动端的模型结构如:mobilenet、googlenet、 yolo、squeezenet 等;
2. 使用我们提供的量化工具,可以在几乎不影响精度的情况下将float32模型减小至原模型的 1/4;
- - - - -
## 量化工具介绍
### 模型转化工具目录:
- [量化工具目录](https://github.com/PaddlePaddle/paddle-mobile/tree/develop/tools/quantification)
- [模型转化工具](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/tools/quantification/convert.cpp)
#### 使用说明
- [工具使用](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/tools/quantification/README.md)
## 如何读取量化后的模型
load方法中添加了 quantification 参数,默认为false。 如果需要load量化后的模型,按需传参即可。
[我是源代码](https://github.com/PaddlePaddle/paddle-mobile/blob/55302b33ea3bd68c9797d8f65e527544792b8095/src/io/paddle_mobile.h)
```c++
bool Load(const std::string &dirname, bool optimize = false,
bool quantification = false, int batch_size = 1);
```
- - - - -
......@@ -17,39 +17,39 @@ limitations under the License. */
namespace paddle_mobile {
const std::string G_OP_TYPE_CONV = "conv2d";
const std::string G_OP_TYPE_BATCHNORM = "batch_norm";
const std::string G_OP_TYPE_BOX_CODER = "box_coder";
const std::string G_OP_TYPE_CONCAT = "concat";
const std::string G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add";
const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU = "fusion_conv_add_relu";
const std::string G_OP_TYPE_FUSION_CONV_ADD_BN_RELU = "fusion_conv_add_bn_relu";
const std::string G_OP_TYPE_FUSION_DWCONV_BN_RELU = "fusion_dwconv_bn_relu";
const std::string G_OP_TYPE_FUSION_CONV_BN_RELU = "fusion_conv_bn_relu";
const std::string G_OP_TYPE_FC = "fusion_fc";
const std::string G_OP_TYPE_FUSION_CONV_ADD = "fusion_conv_add";
const std::string G_OP_TYPE_LRN = "lrn";
const std::string G_OP_TYPE_MUL = "mul";
const std::string G_OP_TYPE_MULTICLASS_NMS = "multiclass_nms";
const std::string G_OP_TYPE_POOL2D = "pool2d";
const std::string G_OP_TYPE_PRIOR_BOX = "prior_box";
const std::string G_OP_TYPE_RELU = "relu";
const std::string G_OP_TYPE_RESHAPE = "reshape";
const std::string G_OP_TYPE_SIGMOID = "sigmoid";
const std::string G_OP_TYPE_SOFTMAX = "softmax";
const std::string G_OP_TYPE_TRANSPOSE = "transpose";
const std::string G_OP_TYPE_SPLIT = "split";
const std::string G_OP_TYPE_FEED = "feed";
const std::string G_OP_TYPE_FETCH = "fetch";
const std::string G_OP_TYPE_DEPTHWISE_CONV = "depthwise_conv2d";
const std::string G_OP_TYPE_IM2SEQUENCE = "im2sequence";
const std::string G_OP_TYPE_DROPOUT = "dropout";
const std::string G_OP_TYPE_FUSION_CONV_ADD_BN = "fusion_conv_add_bn";
const std::string G_OP_TYPE_FUSION_POOL_BN = "fusion_pool_bn";
const std::string G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU =
const char *G_OP_TYPE_CONV = "conv2d";
const char *G_OP_TYPE_BATCHNORM = "batch_norm";
const char *G_OP_TYPE_BOX_CODER = "box_coder";
const char *G_OP_TYPE_CONCAT = "concat";
const char *G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add";
const char *G_OP_TYPE_FUSION_CONV_ADD_RELU = "fusion_conv_add_relu";
const char *G_OP_TYPE_FUSION_CONV_ADD_BN_RELU = "fusion_conv_add_bn_relu";
const char *G_OP_TYPE_FUSION_DWCONV_BN_RELU = "fusion_dwconv_bn_relu";
const char *G_OP_TYPE_FUSION_CONV_BN_RELU = "fusion_conv_bn_relu";
const char *G_OP_TYPE_FC = "fusion_fc";
const char *G_OP_TYPE_FUSION_CONV_ADD = "fusion_conv_add";
const char *G_OP_TYPE_LRN = "lrn";
const char *G_OP_TYPE_MUL = "mul";
const char *G_OP_TYPE_MULTICLASS_NMS = "multiclass_nms";
const char *G_OP_TYPE_POOL2D = "pool2d";
const char *G_OP_TYPE_PRIOR_BOX = "prior_box";
const char *G_OP_TYPE_RELU = "relu";
const char *G_OP_TYPE_RESHAPE = "reshape";
const char *G_OP_TYPE_SIGMOID = "sigmoid";
const char *G_OP_TYPE_SOFTMAX = "softmax";
const char *G_OP_TYPE_TRANSPOSE = "transpose";
const char *G_OP_TYPE_SPLIT = "split";
const char *G_OP_TYPE_FEED = "feed";
const char *G_OP_TYPE_FETCH = "fetch";
const char *G_OP_TYPE_DEPTHWISE_CONV = "depthwise_conv2d";
const char *G_OP_TYPE_IM2SEQUENCE = "im2sequence";
const char *G_OP_TYPE_DROPOUT = "dropout";
const char *G_OP_TYPE_FUSION_CONV_ADD_BN = "fusion_conv_add_bn";
const char *G_OP_TYPE_FUSION_POOL_BN = "fusion_pool_bn";
const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU =
"fusion_elementwise_add_relu";
const std::string G_OP_TYPE_FUSION_FC_RELU = "fusion_fc_relu";
const std::string G_OP_TYPE_REGION = "region";
const char *G_OP_TYPE_FUSION_FC_RELU = "fusion_fc_relu";
const char *G_OP_TYPE_REGION = "region";
std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
......
......@@ -73,40 +73,40 @@ enum PMStatus {
PMWrongDevice = 0x08 /*!< un-correct device. */
};
extern const std::string G_OP_TYPE_CONV;
extern const std::string G_OP_TYPE_BATCHNORM;
extern const std::string G_OP_TYPE_BOX_CODER;
extern const std::string G_OP_TYPE_CONCAT;
extern const std::string G_OP_TYPE_ELEMENTWISE_ADD;
extern const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU;
extern const std::string G_OP_TYPE_FC;
extern const std::string G_OP_TYPE_FUSION_CONV_ADD;
extern const std::string G_OP_TYPE_FUSION_CONV_ADD_BN_RELU;
extern const std::string G_OP_TYPE_FUSION_DWCONV_BN_RELU;
extern const std::string G_OP_TYPE_FUSION_CONV_BN_RELU;
extern const std::string G_OP_TYPE_LRN;
extern const std::string G_OP_TYPE_MUL;
extern const std::string G_OP_TYPE_MULTICLASS_NMS;
extern const std::string G_OP_TYPE_POOL2D;
extern const std::string G_OP_TYPE_PRIOR_BOX;
extern const std::string G_OP_TYPE_RELU;
extern const std::string G_OP_TYPE_RESHAPE;
extern const std::string G_OP_TYPE_SIGMOID;
extern const std::string G_OP_TYPE_SOFTMAX;
extern const std::string G_OP_TYPE_TRANSPOSE;
extern const std::string G_OP_TYPE_SPLIT;
extern const std::string G_OP_TYPE_FEED;
extern const std::string G_OP_TYPE_FETCH;
extern const std::string G_OP_TYPE_DEPTHWISE_CONV;
extern const std::string G_OP_TYPE_IM2SEQUENCE;
extern const std::string G_OP_TYPE_DROPOUT;
extern const std::string G_OP_TYPE_FUSION_CONV_ADD_BN;
extern const std::string G_OP_TYPE_FUSION_POOL_BN;
extern const std::string G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU;
extern const std::string G_OP_TYPE_FUSION_FC_RELU;
extern const std::string G_OP_TYPE_REGION;
extern const char *G_OP_TYPE_CONV;
extern const char *G_OP_TYPE_BATCHNORM;
extern const char *G_OP_TYPE_BOX_CODER;
extern const char *G_OP_TYPE_CONCAT;
extern const char *G_OP_TYPE_ELEMENTWISE_ADD;
extern const char *G_OP_TYPE_FUSION_CONV_ADD_RELU;
extern const char *G_OP_TYPE_FC;
extern const char *G_OP_TYPE_FUSION_CONV_ADD;
extern const char *G_OP_TYPE_FUSION_CONV_ADD_BN_RELU;
extern const char *G_OP_TYPE_FUSION_DWCONV_BN_RELU;
extern const char *G_OP_TYPE_FUSION_CONV_BN_RELU;
extern const char *G_OP_TYPE_LRN;
extern const char *G_OP_TYPE_MUL;
extern const char *G_OP_TYPE_MULTICLASS_NMS;
extern const char *G_OP_TYPE_POOL2D;
extern const char *G_OP_TYPE_PRIOR_BOX;
extern const char *G_OP_TYPE_RELU;
extern const char *G_OP_TYPE_RESHAPE;
extern const char *G_OP_TYPE_SIGMOID;
extern const char *G_OP_TYPE_SOFTMAX;
extern const char *G_OP_TYPE_TRANSPOSE;
extern const char *G_OP_TYPE_SPLIT;
extern const char *G_OP_TYPE_FEED;
extern const char *G_OP_TYPE_FETCH;
extern const char *G_OP_TYPE_DEPTHWISE_CONV;
extern const char *G_OP_TYPE_IM2SEQUENCE;
extern const char *G_OP_TYPE_DROPOUT;
extern const char *G_OP_TYPE_FUSION_CONV_ADD_BN;
extern const char *G_OP_TYPE_FUSION_POOL_BN;
extern const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU;
extern const char *G_OP_TYPE_FUSION_FC_RELU;
extern const char *G_OP_TYPE_REGION;
extern std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
......
......@@ -58,6 +58,10 @@ void fpga_copy(void *dest, const void *src, size_t num) {
memcpy(dest, src, num);
}
int ComputeFpgaConv(struct FpgaConvArgs) {}
int ComputeFpgaPool(struct FpgaPoolArgs) {}
int ComputeFpgaEWAdd(struct FpgaEWAddArgs) {}
} // namespace api
} // namespace fpga
} // namespace mobile
......
......@@ -32,24 +32,55 @@ void *fpga_malloc(size_t size);
void fpga_free(void *ptr);
void fpga_copy(void *dst, const void *src, size_t num);
struct CnnVersionArgs {
struct FpgaVersionArgs {
void *buf;
};
struct QuantArgs {
struct MemoryToPhysicalArgs {
const void *src;
uint64_t physical;
};
struct MemoryCopyArgs {
void *src;
void *dst;
size_t size;
};
struct FpgaQuantArgs {
float scale;
};
struct BatchNormalizationArgs {
bool enable;
struct FpgaBNArgs {};
struct FpgaConvArgs {
bool enable_BN = false;
bool enable_Relu = false;
struct FpgaBNParam bn_parm;
};
struct FpgaPoolArgs {
bool enable_BN = false;
struct FpgaBNParam bn_parm;
};
struct FpgaEWAddArgs { // only support X + Y
bool enable_Relu = false;
};
struct ScaleArgs {};
int ComputeFpgaConv(struct FpgaConvArgs);
int ComputeFpgaPool(struct FpgaPoolArgs);
int ComputeFpgaEWAdd(struct FpgaEWAddArgs);
#define IOCTL_CNN_MAGIC 'CNN'
#define IOCTL_VERSION _IOW(IOCTL_CNN_MAGIC, 1, struct CnnVersionArgs)
#define IOCTL_GET_QUANT _IOW(IOCTL_CNN_MAGIC, 2, struct QuantArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_CNN_MAGIC, 3, struct QuantArgs)
#define IOCTL_FPGA_MAGIC 'FPGA'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs)
#define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaArgs)
#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
#define IOCTL_MEM_TOPHY _IOW(IOCTL_FPGA_MAGIC, 12, struct MemoryToPhysicalArgs)
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs)
} // namespace api
} // namespace fpga
......
......@@ -92,8 +92,8 @@ void PackMatrixB(int k, int n, int n_tail, const float *B, int ldb,
*/
// 将A矩阵分块复制到连续内存(RowMajor)
void PackMatrixA_(int m, int k, int m_tail, const float *A, int lda,
float *buffer) {
void PackMatrixA_4r(int m, int k, int m_tail, const float *A, int lda,
float *buffer) {
const float *a0, *a1, *a2, *a3;
for (int i = 0; i < m - m_tail; i += MR) {
a0 = A + i * lda;
......@@ -131,9 +131,62 @@ void PackMatrixA_(int m, int k, int m_tail, const float *A, int lda,
}
}
void PackMatrixA_6r(int m, int k, int m_tail, const float *A, int lda,
float *buffer) {
const float *a0, *a1, *a2, *a3, *a4, *a5;
for (int i = 0; i < m - m_tail; i += MR) {
a0 = A + i * lda;
a1 = A + (i + 1) * lda;
a2 = A + (i + 2) * lda;
a3 = A + (i + 3) * lda;
a4 = A + (i + 4) * lda;
a5 = A + (i + 5) * lda;
for (int j = 0; j < k; ++j) {
*buffer++ = *a0++;
*buffer++ = *a1++;
*buffer++ = *a2++;
*buffer++ = *a3++;
*buffer++ = *a4++;
*buffer++ = *a5++;
}
}
int i = m - m_tail;
a0 = &A(i, 0);
a1 = a0 + lda;
a2 = a0 + 2 * lda;
a3 = a0 + 3 * lda;
a4 = a0 + 4 * lda;
a5 = a0 + 5 * lda;
if (m_tail != 0) {
if (m_tail <= 5) {
a5 = zero;
}
if (m_tail <= 4) {
a4 = zero;
}
if (m_tail <= 3) {
a3 = zero;
}
if (m_tail <= 2) {
a2 = zero;
}
if (m_tail <= 1) {
a1 = zero;
}
for (int j = 0; j < k; ++j) {
*buffer++ = *a0++;
*buffer++ = *a1++;
*buffer++ = *a2++;
*buffer++ = *a3++;
*buffer++ = *a4++;
*buffer++ = *a5++;
}
}
}
// 将B矩阵分块复制到连续内存(RowMajor)
void PackMatrixB_(int k, int n, int n_tail, const float *B, int ldb,
float *buffer) {
void PackMatrixB_8c(int k, int n, int n_tail, const float *B, int ldb,
float *buffer) {
const float *b0;
for (int j = 0; j < n - n_tail; j += NR) {
for (int i = 0; i < k; ++i) {
......@@ -188,7 +241,8 @@ void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b,
for (int j = 0; j < nc; j += NR) {
for (int i = 0; i < mc; i += MR) {
// AddDot4x4(KC, a + i * KC, b + j * KC, c + i * NC + j, NC);
AddDot4x8(KC, a + i * KC, b + j * KC, c + i * NC + j, NC);
// AddDot4x8(KC, a + i * KC, b + j * KC, c + i * NC + j, NC);
AddDot6x8(KC, a + i * KC, b + j * KC, c + i * NC + j, NC);
}
}
......@@ -218,7 +272,8 @@ void InnerKernelWithBn(int mc, int nc, float alpha, const float *a,
for (int j = 0; j < nc; j += NR) {
for (int i = 0; i < mc; i += MR) {
// AddDot4x4(KC, a + i * KC, b + j * KC, c + i * NC + j, NC);
AddDot4x8(KC, a + i * KC, b + j * KC, c + i * NC + j, NC);
// AddDot4x8(KC, a + i * KC, b + j * KC, c + i * NC + j, NC);
AddDot6x8(KC, a + i * KC, b + j * KC, c + i * NC + j, NC);
}
}
......@@ -1868,22 +1923,22 @@ void Sgemm(int m, int n, int k, float alpha, const float *A, int lda,
const float *B, int ldb, float beta, float *C, int ldc, bool relu) {
// L1 data cache is 32 kib (Per Contex-A57, Contex-A72, Contex-A73)
// L2 cache is 0.5~4 Mib (Contex-A72 cluster)
int L1 = 30 * 1024;
int L2 = 1 * 1024 * 1024;
int L1 = 32 * 1024;
int L2 = 0.5 * 1024 * 1024;
KC = k;
MC = L2 / (2 * KC * sizeof(float));
NC = MC;
MC = L1 / (KC * sizeof(float));
NC = L2 / (KC * sizeof(float));
// make sure MC is multiple of 4, and NC is multiple of 8
// make sure MC is multiple of MR, and NC is multiple of NR
int mblock_num = (m + MC - 1) / MC;
MC = (m + mblock_num - 1) / mblock_num;
MC = (MC + 4 - 1) / 4 * 4;
MC = (MC + MR - 1) / MR * MR;
// DLOG << "mblock_num = " << mblock_num << ", MC = " << MC << "\n";
int nblock_num = (n + NC - 1) / NC;
NC = (n + nblock_num - 1) / nblock_num;
NC = (NC + 8 - 1) / 8 * 8;
NC = (NC + NR - 1) / NR * NR;
// DLOG << "nblock_num = " << nblock_num << ", NC = " << NC << "\n";
packedA = static_cast<float *>(
......@@ -1901,10 +1956,10 @@ void Sgemm(int m, int n, int k, float alpha, const float *A, int lda,
int mc, nc;
for (int j = 0; j < n; j += NC) {
nc = s_min(n - j, NC);
PackMatrixB_(KC, nc, nc % NR, &B(0, j), ldb, packedB);
PackMatrixB_8c(KC, nc, nc % NR, &B(0, j), ldb, packedB);
for (int i = 0; i < m; i += MC) {
mc = s_min(m - i, MC);
PackMatrixA_(mc, KC, mc % MR, &A(i, 0), lda, packedA);
PackMatrixA_6r(mc, KC, mc % MR, &A(i, 0), lda, packedA);
InnerKernel(mc, nc, alpha, packedA, packedB, beta, packedC, &C(i, j), ldc,
relu);
}
......@@ -1921,22 +1976,22 @@ void SgemmWithBn(int m, int n, int k, float alpha, const float *A, int lda,
bool relu, float *new_scale, float *new_bias) {
// L1 data cache is 32 kib (Per Contex-A57, Contex-A72, Contex-A73)
// L2 cache is 0.5~4 Mib (Contex-A72 cluster)
int L1 = 30 * 1024;
int L2 = 1 * 1024 * 1024;
int L1 = 32 * 1024;
int L2 = 0.5 * 1024 * 1024;
KC = k;
MC = L2 / (2 * KC * sizeof(float));
NC = MC;
MC = L1 / (KC * sizeof(float));
NC = L2 / (KC * sizeof(float));
// make sure MC is multiple of 4, and NC is multiple of 8
// make sure MC is multiple of MR, and NC is multiple of NR
int mblock_num = (m + MC - 1) / MC;
MC = (m + mblock_num - 1) / mblock_num;
MC = (MC + 4 - 1) / 4 * 4;
MC = (MC + MR - 1) / MR * MR;
// DLOG << "mblock_num = " << mblock_num << ", MC = " << MC << "\n";
int nblock_num = (n + NC - 1) / NC;
NC = (n + nblock_num - 1) / nblock_num;
NC = (NC + 8 - 1) / 8 * 8;
NC = (NC + NR - 1) / NR * NR;
// DLOG << "nblock_num = " << nblock_num << ", NC = " << NC << "\n";
packedA = static_cast<float *>(
......@@ -1954,10 +2009,10 @@ void SgemmWithBn(int m, int n, int k, float alpha, const float *A, int lda,
int mc, nc;
for (int j = 0; j < n; j += NC) {
nc = s_min(n - j, NC);
PackMatrixB_(KC, nc, nc % NR, &B(0, j), ldb, packedB);
PackMatrixB_8c(KC, nc, nc % NR, &B(0, j), ldb, packedB);
for (int i = 0; i < m; i += MC) {
mc = s_min(m - i, MC);
PackMatrixA_(mc, KC, mc % MR, &A(i, 0), lda, packedA);
PackMatrixA_6r(mc, KC, mc % MR, &A(i, 0), lda, packedA);
InnerKernelWithBn(mc, nc, alpha, packedA, packedB, beta, packedC,
&C(i, j), ldc, relu, new_scale + i, new_bias + i);
}
......@@ -1969,6 +2024,221 @@ void SgemmWithBn(int m, int n, int k, float alpha, const float *A, int lda,
paddle_mobile::memory::Free(zero);
}
void AddDot6x8(int k, const float *a, const float *b, float *c, int ldc) {
#if __ARM_NEON
#if __aarch64__
// init C
float32x4_t cv0 = vdupq_n_f32(0.0);
float32x4_t cv1 = vdupq_n_f32(0.0);
float32x4_t cv2 = vdupq_n_f32(0.0);
float32x4_t cv3 = vdupq_n_f32(0.0);
float32x4_t cv4 = vdupq_n_f32(0.0);
float32x4_t cv5 = vdupq_n_f32(0.0);
float32x4_t cv6 = vdupq_n_f32(0.0);
float32x4_t cv7 = vdupq_n_f32(0.0);
float32x4_t cv8 = vdupq_n_f32(0.0);
float32x4_t cv9 = vdupq_n_f32(0.0);
float32x4_t cv10 = vdupq_n_f32(0.0);
float32x4_t cv11 = vdupq_n_f32(0.0);
float32x4_t av;
float32x4_t bv0;
float32x4_t bv1;
float32x2_t av01;
float32x2_t av23;
float32x2_t av45;
for (int p = 0; p < k; p += 1) {
av = vld1q_f32(a);
av01 = vget_low_f32(av);
av23 = vget_high_f32(av);
av45 = vld1_f32(a + 4);
bv0 = vld1q_f32(b);
bv1 = vld1q_f32(b + 4);
cv0 = vmlaq_lane_f32(cv0, bv0, av01, 0);
cv1 = vmlaq_lane_f32(cv1, bv1, av01, 0);
cv2 = vmlaq_lane_f32(cv2, bv0, av01, 1);
cv3 = vmlaq_lane_f32(cv3, bv1, av01, 1);
cv4 = vmlaq_lane_f32(cv4, bv0, av23, 0);
cv5 = vmlaq_lane_f32(cv5, bv1, av23, 0);
cv6 = vmlaq_lane_f32(cv6, bv0, av23, 1);
cv7 = vmlaq_lane_f32(cv7, bv1, av23, 1);
cv8 = vmlaq_lane_f32(cv8, bv0, av45, 0);
cv9 = vmlaq_lane_f32(cv9, bv1, av45, 0);
cv10 = vmlaq_lane_f32(cv10, bv0, av45, 1);
cv11 = vmlaq_lane_f32(cv11, bv1, av45, 1);
a += MR;
b += NR;
}
vst1q_f32(c, cv0);
vst1q_f32(c + 4, cv1);
vst1q_f32(c + ldc, cv2);
vst1q_f32(c + ldc + 4, cv3);
vst1q_f32(c + 2 * ldc, cv4);
vst1q_f32(c + 2 * ldc + 4, cv5);
vst1q_f32(c + 3 * ldc, cv6);
vst1q_f32(c + 3 * ldc + 4, cv7);
vst1q_f32(c + 4 * ldc, cv8);
vst1q_f32(c + 4 * ldc + 4, cv9);
vst1q_f32(c + 5 * ldc, cv10);
vst1q_f32(c + 5 * ldc + 4, cv11);
#else
const float *a_ptr, *b_ptr;
a_ptr = a;
b_ptr = b;
int kc1 = k / 4;
int kc2 = k % 4;
int step = 4 * ldc;
asm volatile(
"pld [%[a_ptr]] \n\t"
"pld [%[b_ptr]] \n\t"
"pld [%[a_ptr], #64] \n\t"
"pld [%[b_ptr], #64] \n\t"
"vmov.f32 q4, #0.0 \n\t"
"vmov.f32 q5, #0.0 \n\t"
"vmov.f32 q6, #0.0 \n\t"
"vmov.f32 q7, #0.0 \n\t"
"vmov.f32 q8, #0.0 \n\t"
"vmov.f32 q9, #0.0 \n\t"
"vmov.f32 q10, #0.0 \n\t"
"vmov.f32 q11, #0.0 \n\t"
"vmov.f32 q12, #0.0 \n\t"
"vmov.f32 q13, #0.0 \n\t"
"vmov.f32 q14, #0.0 \n\t"
"vmov.f32 q15, #0.0 \n\t"
"subs %[kc1], %[kc1], #1 \n\t"
"blt end_kc1_%= \n\t"
"loop_kc1_%=: \n\t"
// "pld [%[a_ptr], #128] \n\t"
// "pld [%[b_ptr], #128] \n\t"
// "pld [%[a_ptr], #192] \n\t"
// "pld [%[b_ptr], #192] \n\t"
"vld1.32 {d0-d2}, [%[a_ptr]]! \n\t"
"vld1.32 {q2, q3}, [%[b_ptr]]! \n\t"
"vmla.f32 q4, q2, d0[0] \n\t"
"vmla.f32 q5, q3, d0[0] \n\t"
"vmla.f32 q6, q2, d0[1] \n\t"
"vmla.f32 q7, q3, d0[1] \n\t"
"vmla.f32 q8, q2, d1[0] \n\t"
"vmla.f32 q9, q3, d1[0] \n\t"
"vmla.f32 q10, q2, d1[1] \n\t"
"vmla.f32 q11, q3, d1[1] \n\t"
"vmla.f32 q12, q2, d2[0] \n\t"
"vmla.f32 q13, q3, d2[0] \n\t"
"vmla.f32 q14, q2, d2[1] \n\t"
"vmla.f32 q15, q3, d2[1] \n\t"
"vld1.32 {d0-d2}, [%[a_ptr]]! \n\t"
"vld1.32 {q2, q3}, [%[b_ptr]]! \n\t"
"vmla.f32 q4, q2, d0[0] \n\t"
"vmla.f32 q5, q3, d0[0] \n\t"
"vmla.f32 q6, q2, d0[1] \n\t"
"vmla.f32 q7, q3, d0[1] \n\t"
"vmla.f32 q8, q2, d1[0] \n\t"
"vmla.f32 q9, q3, d1[0] \n\t"
"vmla.f32 q10, q2, d1[1] \n\t"
"vmla.f32 q11, q3, d1[1] \n\t"
"vmla.f32 q12, q2, d2[0] \n\t"
"vmla.f32 q13, q3, d2[0] \n\t"
"vmla.f32 q14, q2, d2[1] \n\t"
"vmla.f32 q15, q3, d2[1] \n\t"
"vld1.32 {d0-d2}, [%[a_ptr]]! \n\t"
"vld1.32 {q2, q3}, [%[b_ptr]]! \n\t"
"vmla.f32 q4, q2, d0[0] \n\t"
"vmla.f32 q5, q3, d0[0] \n\t"
"vmla.f32 q6, q2, d0[1] \n\t"
"vmla.f32 q7, q3, d0[1] \n\t"
"vmla.f32 q8, q2, d1[0] \n\t"
"vmla.f32 q9, q3, d1[0] \n\t"
"vmla.f32 q10, q2, d1[1] \n\t"
"vmla.f32 q11, q3, d1[1] \n\t"
"vmla.f32 q12, q2, d2[0] \n\t"
"vmla.f32 q13, q3, d2[0] \n\t"
"vmla.f32 q14, q2, d2[1] \n\t"
"vmla.f32 q15, q3, d2[1] \n\t"
"vld1.32 {d0-d2}, [%[a_ptr]]! \n\t"
"vld1.32 {q2, q3}, [%[b_ptr]]! \n\t"
"vmla.f32 q4, q2, d0[0] \n\t"
"vmla.f32 q5, q3, d0[0] \n\t"
"vmla.f32 q6, q2, d0[1] \n\t"
"vmla.f32 q7, q3, d0[1] \n\t"
"vmla.f32 q8, q2, d1[0] \n\t"
"vmla.f32 q9, q3, d1[0] \n\t"
"vmla.f32 q10, q2, d1[1] \n\t"
"vmla.f32 q11, q3, d1[1] \n\t"
"vmla.f32 q12, q2, d2[0] \n\t"
"vmla.f32 q13, q3, d2[0] \n\t"
"vmla.f32 q14, q2, d2[1] \n\t"
"vmla.f32 q15, q3, d2[1] \n\t"
"subs %[kc1], %[kc1], #1 \n\t"
"bge loop_kc1_%= \n\t"
"end_kc1_%=: \n\t"
"subs %[kc2], %[kc2], #1 \n\t"
"blt end_kc2_%= \n\t"
"loop_kc2_%=: \n\t"
"vld1.32 {d0-d2}, [%[a_ptr]]! \n\t"
"vld1.32 {q2, q3}, [%[b_ptr]]! \n\t"
"vmla.f32 q4, q2, d0[0] \n\t"
"vmla.f32 q5, q3, d0[0] \n\t"
"vmla.f32 q6, q2, d0[1] \n\t"
"vmla.f32 q7, q3, d0[1] \n\t"
"vmla.f32 q8, q2, d1[0] \n\t"
"vmla.f32 q9, q3, d1[0] \n\t"
"vmla.f32 q10, q2, d1[1] \n\t"
"vmla.f32 q11, q3, d1[1] \n\t"
"vmla.f32 q12, q2, d2[0] \n\t"
"vmla.f32 q13, q3, d2[0] \n\t"
"vmla.f32 q14, q2, d2[1] \n\t"
"vmla.f32 q15, q3, d2[1] \n\t"
"subs %[kc2], %[kc2], #1 \n\t"
"bge loop_kc2_%= \n\t"
"end_kc2_%=: \n\t"
"mov r5, %[c] \n\t"
"mov r6, %[step] \n\t"
"vst1.32 {q4, q5}, [r5], r6 \n\t"
"vst1.32 {q6, q7}, [r5], r6 \n\t"
"vst1.32 {q8, q9}, [r5], r6 \n\t"
"vst1.32 {q10, q11}, [r5], r6 \n\t"
"vst1.32 {q12, q13}, [r5], r6 \n\t"
"vst1.32 {q14, q15}, [r5] \n\t"
:
: [a_ptr] "r"(a_ptr), [b_ptr] "r"(b_ptr), [c] "r"(c), [kc1] "r"(kc1),
[kc2] "r"(kc2), [step] "r"(step)
: "memory", "r5", "r6", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15");
#endif // __aarch64__
#else
#endif // __ARM_NEON
}
} // namespace math
} // namespace operators
} // namespace paddle_mobile
......@@ -19,7 +19,7 @@ limitations under the License. */
#define B(i, j) B[(i)*ldb + (j)]
#define C(i, j) C[(i)*ldc + (j)]
#define MR 4
#define MR 6
#define NR 8
#define s_min(i, j) ((i) < (j) ? (i) : (j))
......@@ -39,12 +39,14 @@ void PackMatrixB(int k, int n, int n_tail, const float *B, int ldb,
*/
// 将 A 矩阵分块复制到连续内存(RowMajor)
void PackMatrixA_(int m, int k, int m_tail, const float *A, int lda,
float *buffer);
void PackMatrixA_4r(int m, int k, int m_tail, const float *A, int lda,
float *buffer);
void PackMatrixA_6r(int m, int k, int m_tail, const float *A, int lda,
float *buffer);
// 将 B 矩阵分块复制到连续内存(RowMajor)
void PackMatrixB_(int k, int n, int n_tail, const float *B, int ldb,
float *buffer);
void PackMatrixB_8c(int k, int n, int n_tail, const float *B, int ldb,
float *buffer);
// 分块矩阵乘法
void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b,
......@@ -67,6 +69,7 @@ void VectorKernelWithBn(int m, int n, int k, float alpha, const float *A,
// 计算一个更小的 C 矩阵分块
void AddDot4x4(int k, const float *a, const float *b, float *c, int ldc);
void AddDot4x8(int k, const float *a, const float *b, float *c, int ldc);
void AddDot6x8(int k, const float *a, const float *b, float *c, int ldc);
// 分块矩阵乘法结果回写
// C = A * B
......
......@@ -145,6 +145,10 @@ else ()
ADD_EXECUTABLE(test-conv-add-relu-op operators/test_conv_add_relu_op.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-conv-add-relu-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-conv-add-bn-relu-op operators/test_fusion_conv_add_bn_relu_op.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-conv-add-bn-relu-op paddle-mobile)
#add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)
endif()
......@@ -43,7 +43,7 @@ template <typename DeviceType, typename OpType>
class Executor4Test : public Executor<DeviceType> {
public:
Executor4Test(Program<DeviceType> p, string op_type,
bool use_optimize = false)
bool use_optimize = false, int predict_op_count = 1)
: Executor<DeviceType>() {
this->use_optimize_ = use_optimize;
this->program_ = p;
......@@ -57,12 +57,14 @@ class Executor4Test : public Executor<DeviceType> {
LOG(paddle_mobile::LogLevel::kLOG_ERROR)
<< "to_predict_program_ == nullptr";
}
const std::vector<std::shared_ptr<BlockDesc>> blocks =
this->to_predict_program_->Blocks();
for (std::shared_ptr<BlockDesc> block_desc : blocks) {
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
for (std::shared_ptr<OpDesc> op : ops) {
if (op->Type() == op_type) {
for (int i = 0; i < ops.size(); ++i) {
auto op = ops[i];
if (op->Type() == op_type && i < predict_op_count) {
DLOG << "匹配到: " << op->Type();
/// test first meeting op in program
......@@ -72,11 +74,17 @@ class Executor4Test : public Executor<DeviceType> {
op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), this->program_.scope);
this->ops_of_block_[*block_desc.get()].push_back(op_ptr);
break;
}
}
}
this->InitMemory();
std::shared_ptr<paddle_mobile::framework::BlockDesc> to_predict_block =
this->to_predict_program_->Block(0);
auto &ops = this->ops_of_block_[*to_predict_block.get()];
for (const auto &op : ops) {
op->Init();
}
}
template <typename T = LoDTensor>
......@@ -130,9 +138,6 @@ class Executor4Test : public Executor<DeviceType> {
auto *output_tensor = con_output->GetMutable<LoDTensor>();
output_tensor->mutable_data<float>(dDim);
std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
out_tensor.reset(output_tensor);
std::shared_ptr<paddle_mobile::framework::BlockDesc> to_predict_block =
this->to_predict_program_->Block(0);
for (int j = 0; j < this->ops_of_block_[*to_predict_block.get()].size();
......@@ -141,6 +146,7 @@ class Executor4Test : public Executor<DeviceType> {
op->Run();
}
return out_tensor;
return std::make_shared<paddle_mobile::framework::Tensor>(
paddle_mobile::framework::Tensor(*output_tensor));
}
};
......@@ -20,22 +20,20 @@ int main() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile.SetThreadNum(4);
auto time1 = time();
auto isok = paddle_mobile.Load(g_mobilenet_ssd_gesture + "/model",
g_mobilenet_ssd_gesture + "/params", true);
auto isok = paddle_mobile.Load(
std::string(g_mobilenet_ssd_gesture) + "/model",
std::string(g_mobilenet_ssd_gesture) + "/params", true);
// auto isok = paddle_mobile.Load(g_mobilenet_ssd, false);
if (isok) {
auto time2 = time();
std::cout << "load cost :" << time_diff(time1, time2) << "ms" << std::endl;
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 300, 300};
Tensor input_tensor;
SetupTensor<float>(&input_tensor, {1, 3, 300, 300}, static_cast<float>(0),
static_cast<float>(1));
GetInput<float>(g_hand, &input, dims);
std::vector<float> input(input_tensor.data<float>(),
input_tensor.data<float>() + input_tensor.numel());
auto time3 = time();
paddle_mobile.Predict(input, dims);
auto output = paddle_mobile.Predict(input, dims);
auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
<< std::endl;
......
......@@ -24,19 +24,21 @@ int main() {
auto time2 = time();
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224};
Tensor input_tensor;
SetupTensor<float>(&input_tensor, {1, 3, 224, 224}, static_cast<float>(0),
static_cast<float>(1));
std::vector<float> input(input_tensor.data<float>(),
input_tensor.data<float>() + input_tensor.numel());
auto time3 = time();
auto vec_result = paddle_mobile.Predict(input, dims);
auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
<< std::endl;
GetInput<float>(g_test_image_1x3x224x224, &input, dims);
for (int i = 0; i < 10; ++i) {
auto time3 = time();
auto vec_result = paddle_mobile.Predict(input, dims);
auto time4 = time();
std::vector<float>::iterator biggest =
std::max_element(std::begin(vec_result), std::end(vec_result));
std::cout << " Max element is " << *biggest << " at position "
<< std::distance(std::begin(vec_result), biggest) << std::endl;
std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
<< std::endl;
}
}
return 0;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_include.h"
#include "operators/fusion_conv_add_bn_relu_op.h"
int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader;
// ../models/image_classification_resnet.inference.model
auto program = loader.Load(g_mobilenet, true);
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail");
Executor4Test<paddle_mobile::CPU,
paddle_mobile::operators::FusionConvAddBNReluOp<
paddle_mobile::CPU, float>>
executor(program, "fusion_conv_add_bn_relu", true);
std::cout << "executor 4 test: " << std::endl;
paddle_mobile::framework::Tensor input;
GetInput<float>(g_test_image_1x3x224x224_banana, &input, {1, 3, 224, 224});
// // use SetupTensor if not has local input image .
// SetupTensor<float>(&input, {1, 3, 224, 224}, static_cast<float>(0),
// static_cast<float>(1));
DLOG << " fuck: " << input;
auto out_ddim = paddle_mobile::framework::make_ddim({1, 32, 112, 112});
std::cout << "before predict: " << std::endl;
auto output =
executor.Predict(input, "data", "conv2_1_dw_bn.tmp_2", out_ddim);
std::cout << "after predict " << std::endl;
auto output_ptr = output->data<float>();
int stride = output->numel() / 100;
for (int i = 0; i < 100; i++) {
DLOG << " index:" << i * stride << " value: " << output_ptr[i * stride];
}
// for (int i = 0; i < 100; i++) {
// DLOG << " index:" << i << " value: "<< output_ptr[i];
// }
// for (int j = 0; j < output->numel(); ++j) {
// std::cout << " (index: " << j << " value: " << output_ptr[j] << ") ";
// }
std::cout << std::endl;
return 0;
}
......@@ -24,18 +24,21 @@ limitations under the License. */
#include "framework/ddim.h"
#include "framework/tensor.h"
static const std::string g_mobilenet_ssd = "../models/mobilenet+ssd";
static const std::string g_mobilenet_ssd_gesture =
"../models/mobilenet+ssd_gesture";
static const std::string g_squeezenet = "../models/squeezenet";
static const std::string g_googlenet = "../models/googlenet";
static const std::string g_mobilenet = "../models/mobilenet";
static const std::string g_resnet_50 = "../models/resnet_50";
static const std::string g_resnet = "../models/resnet";
static const std::string g_googlenet_combine = "../models/googlenet_combine";
static const std::string g_yolo = "../models/yolo";
static const std::string g_test_image_1x3x224x224 =
static const char *g_mobilenet_ssd = "../models/mobilenet+ssd";
static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture";
static const char *g_squeezenet = "../models/squeezenet";
static const char *g_googlenet = "../models/googlenet";
static const char *g_mobilenet = "../models/mobilenet";
static const char *g_resnet_50 = "../models/resnet_50";
static const char *g_resnet = "../models/resnet";
static const char *g_googlenet_combine = "../models/googlenet_combine";
static const char *g_yolo = "../models/yolo";
static const char *g_test_image_1x3x224x224 =
"../images/test_image_1x3x224x224_float";
static const char *g_test_image_1x3x224x224_banana =
"../images/input_3x224x224_banana";
static const char *g_hand = "../images/hand_image";
using paddle_mobile::framework::DDim;
using paddle_mobile::framework::Tensor;
......
set(dir ${CMAKE_CURRENT_SOURCE_DIR})
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${dir}/build")
cmake_minimum_required(VERSION 3.6)
project(quali)
add_definitions(-DENABLE_EXCEPTION)
ADD_EXECUTABLE(convert convert.cpp)
target_link_libraries(convert paddle-mobile)
\ No newline at end of file
set(CMAKE_CXX_STANDARD 11)
file(GLOB_RECURSE QULIFICATON_CC src/*.cc src/*.cpp src/*.c src/*.mm)
file(GLOB_RECURSE QULIFICATON_H src/*.h)
include_directories(. src/)
#add_library(paddle-mobile SHARED ${QULIFICATON_CC} ${QULIFICATON_H} convert.cpp)
add_executable(quantify convert.cpp ${QULIFICATON_CC} ${QULIFICATON_H})
\ No newline at end of file
# 模型量化脚本
#### 量化脚本使用指南
1. 在PaddleMobile项目目录下(如 ~/PaddleProject/paddle-mobile)
2. cd到 tools/quantification/ 目录
3. cmake编译
``` sh
cmake .
make
```
4. 运行量化脚本
```sh
./quantify (0:seperated. 1:combined ) (输入路径) (输出路径)
# quantify googlenet seperated from /Users/xiebaiyuan/PaddleProject/quali/models/googlenet to ./googlenet_min
./quantify 0 /Users/xiebaiyuan/PaddleProject/quali/models/googlenet ./googlenet_min
```
*注:*
*量化工具中*
*1.seperated模型model文件默认命名为 "__model__";*
*2.combined模型的model文件默认命名为 "model",参数文件默认命名为"params";*
##### 整体如下:
以googlenet非combined为例:
```sh
cd tools/quantification/
cmake .
make
./quantify 0 /Users/xiebaiyuan/PaddleProject/quali/models/googlenet ./googlenet_min
```
#include "io/paddle_mobile.h"
#include "src/enforce.h"
#include "src/var_desc.h"
#include "src/program_desc.h"
#include <cstdlib>
using std::string;
static const std::string g_googlenet_combine = "../models/googlenet_combine";
static const std::string g_googlenet = "../models/googlenet";
using paddle_mobile::Executor;
using paddle_mobile::framework::Program;
char *Get_binary_data(std::string filename) {
FILE *file = fopen(filename.c_str(), "rb");
PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ",
filename.c_str());
fseek(file, 0, SEEK_END);
int64_t size = ftell(file);
PADDLE_MOBILE_ENFORCE(size > 0, "size is too small");
rewind(file);
char *data = new char[size];
size_t bytes_read = fread(data, 1, size, file);
PADDLE_MOBILE_ENFORCE(bytes_read == size,
"read binary file bytes do not match with fseek");
DLOG << "Get_binary_data end";
fclose(file);
return data;
#include <string>
#include <cmath>
#include <iostream>
#include <utility>
#include <vector>
#include "src/framework.pb-c.h"
#include "src/protobuf-c.h"
#include <fstream>
#include <iostream>
const size_t kSize64 = sizeof(uint64_t);
const size_t kSize32 = sizeof(uint32_t);
char *Get_binary_data(const std::string &filename) {
FILE *file = fopen(filename.c_str(), "rb");
PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ",
filename.c_str());
fseek(file, 0, SEEK_END);
int64_t size = ftell(file);
PADDLE_MOBILE_ENFORCE(size > 0, "size is too small");
rewind(file);
auto *data = new char[size];
size_t bytes_read = fread(data, 1, static_cast<size_t>(size), file);
PADDLE_MOBILE_ENFORCE(bytes_read == size,
"read binary file bytes do not match with fseek");
fclose(file);
return data;
}
static size_t ReadBuffer(const char *file_name, uint8_t **out) {
FILE *fp;
fp = fopen(file_name, "rb");
PADDLE_MOBILE_ENFORCE(fp != nullptr, " %s open failed !", file_name);
fseek(fp, 0, SEEK_END);
auto size = static_cast<size_t>(ftell(fp));
rewind(fp);
*out = reinterpret_cast<uint8_t *>(malloc(size));
size_t cur_len = 0;
size_t nread;
while ((nread = fread(*out + cur_len, 1, size - cur_len, fp)) != 0) {
cur_len += nread;
}
fclose(fp);
return cur_len;
}
void LoadWithDump(const paddle_mobile::framework::VarDesc var_desc,
paddle_mobile::framework::LoDTensor *tensor, char **data, FILE *out_file) {
// 1. version
uint32_t version = *reinterpret_cast<uint32_t *>(*data);
// write version
fwrite(&version, sizeof(uint32_t), 1, out_file );
(*data) += sizeof(uint32_t);
// 2 Lod information
uint64_t *lod_level_ptr = new uint64_t();
memcpy(lod_level_ptr, (*data), sizeof(uint64_t));
uint64_t lod_level = 0;
// write lod Information
fwrite(&lod_level, sizeof(uint64_t), 1, out_file);
delete lod_level_ptr;
(*data) += sizeof(uint64_t);
auto &lod = *tensor->mutable_lod();
lod.resize(lod_level);
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size = *reinterpret_cast<uint64_t *>(*data);
// write lod size
fwrite(&size, sizeof(uint64_t), 1, out_file);
(*data) += sizeof(uint64_t);
std::vector<size_t> tmp(size / sizeof(size_t));
for (int k = 0; k < tmp.size(); ++k) {
tmp[k] = *reinterpret_cast<size_t *>(*data);
(*data) += sizeof(size_t);
}
// write lod size vector
fwrite(&tmp, sizeof(size_t), tmp.size(), out_file );
std::shared_ptr<ProgramDesc> loadParams(const std::string &model_path) {
PaddleMobile__Framework__Proto__ProgramDesc *c_program;
uint8_t *buf = nullptr;
size_t read_size = ReadBuffer(model_path.c_str(), &buf);
PADDLE_MOBILE_ENFORCE(buf != nullptr, "read from __model__ is null");
c_program = paddle_mobile__framework__proto__program_desc__unpack(
nullptr, read_size, buf);
PADDLE_MOBILE_ENFORCE(c_program != nullptr, "program is null");
auto originProgramDesc = std::make_shared<ProgramDesc>(c_program);
return originProgramDesc;
lod[i] = tmp;
}
}
// 3. tensor version
uint32_t tensor_version = *reinterpret_cast<uint32_t *>(*data);
// write tensor version
fwrite(&tensor_version, sizeof(uint32_t), 1, out_file);
(*data) += sizeof(uint32_t);
// 4. tensor desc
int32_t size = *reinterpret_cast<int32_t *>(*data);
// write tensor desc
fwrite(&size, sizeof(int32_t), 1, out_file);
(*data) += sizeof(int32_t);
std::unique_ptr<char[]> buf(new char[size]);
for (int m = 0; m < size; ++m) {
buf.get()[m] = (*data)[m];
}
fwrite(buf.get(), sizeof(char), size, out_file);
(*data) += (sizeof(char) * size);
void LoadWithDump(const paddle_mobile::framework::VarDesc &var_desc, char *dataP, FILE *out_file) {
// 1. version
uint32_t version = *reinterpret_cast<uint32_t *>(dataP);
const paddle_mobile::framework::TensorDesc &desc = var_desc.Tensor_desc();
int memory_size = 1;
for (auto l : desc.Dims()) {
memory_size *= l;
}
tensor->Resize(paddle_mobile::framework::make_ddim(desc.Dims()));
void *memory = tensor;
int type_size = 0;
switch (desc.DataType()) {
case paddle_mobile::framework::VARTYPE_TYPE_FP16:
type_size = 2;
break;
case paddle_mobile::framework::VARTYPE_TYPE_FP32:
type_size = 4;
memory = tensor->mutable_data<float>();
break;
case paddle_mobile::framework::VARTYPE_TYPE_FP64:
type_size = 8;
break;
case paddle_mobile::framework::VARTYPE_TYPE_INT32:
type_size = 4;
break;
case paddle_mobile::framework::VARTYPE_TYPE_INT64:
type_size = 8;
break;
case paddle_mobile::framework::VARTYPE_TYPE_BOOL:
type_size = 1;
break;
default:
break;
}
for (int n = 0; n < memory_size * type_size; ++n) {
static_cast<char *>(memory)[n] = (*data)[n];
}
(*data) += (sizeof(char) * memory_size * type_size);
// for float 32
float min_value = std::numeric_limits<float>::max();
float max_value = std::numeric_limits<float>::min();
for (int k = 0; k < memory_size; ++k) {
min_value = std::min(min_value, static_cast<float *> (memory)[k]);
max_value = std::max(max_value, static_cast<float *> (memory)[k]);
}
fwrite(&min_value, sizeof(float), 1, out_file);
fwrite(&max_value, sizeof(float), 1, out_file);
for (int g = 0; g < memory_size; ++g) {
float value = static_cast<float *> (memory)[g];
uint8_t factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255);
fwrite(&factor, sizeof(uint8_t), 1, out_file);
// write version
fwrite(&version, kSize32, 1, out_file);
dataP += kSize32;
// 2 Lod information
auto *lod_level_ptr = new uint64_t();
memcpy(lod_level_ptr, dataP, kSize64);
uint64_t lod_level = 0;
// write lod Information
fwrite(&lod_level, kSize64, 1, out_file);
delete lod_level_ptr;
dataP += kSize64;
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size = *reinterpret_cast<uint64_t *>(dataP);
// write lod size
fwrite(&size, kSize64, 1, out_file);
(dataP) += kSize64;
std::vector<size_t> tmp(size / sizeof(size_t));
for (unsigned long &k : tmp) {
k = *reinterpret_cast<size_t *>(dataP);
(dataP) += sizeof(size_t);
}
// write lod size vector
fwrite(&tmp, sizeof(size_t), tmp.size(), out_file);
}
// 3. tensor version
uint32_t tensor_version = *reinterpret_cast<uint32_t *>(dataP);
// write tensor version
fwrite(&tensor_version, kSize32, 1, out_file);
(dataP) += kSize32;
// 4. tensor desc
int32_t size = *reinterpret_cast<int32_t *>(dataP);
// write tensor desc
fwrite(&size, sizeof(int32_t), 1, out_file);
(dataP) += sizeof(int32_t);
std::unique_ptr<char[]> buf(new char[size]);
for (int m = 0; m < size; ++m) {
buf.get()[m] = (dataP)[m];
}
fwrite(buf.get(), sizeof(char), static_cast<size_t>(size), out_file);
(dataP) += (sizeof(char) * size);
const paddle_mobile::framework::TensorDesc &desc = var_desc.Tensor_desc();
int memory_size = 1;
for (auto l : desc.Dims()) {
memory_size *= l;
}
void *memory = nullptr;
int type_size = 0;
switch (desc.DataType()) {
case paddle_mobile::framework::VARTYPE_TYPE_FP16:
type_size = 2;
break;
case paddle_mobile::framework::VARTYPE_TYPE_FP32:
type_size = 4;
break;
case paddle_mobile::framework::VARTYPE_TYPE_FP64:
type_size = 8;
break;
case paddle_mobile::framework::VARTYPE_TYPE_INT32:
type_size = 4;
break;
case paddle_mobile::framework::VARTYPE_TYPE_INT64:
type_size = 8;
break;
case paddle_mobile::framework::VARTYPE_TYPE_BOOL:
type_size = 1;
break;
default:
break;
}
size_t tensorSize = sizeof(char) * memory_size * type_size;
memory = new char[tensorSize];
for (int n = 0; n < tensorSize; ++n) {
static_cast<char *>(memory)[n] = (dataP)[n];
}
dataP += tensorSize;
// for float 32
float min_value = std::numeric_limits<float>::max();
float max_value = std::numeric_limits<float>::min();
for (int k = 0; k < memory_size; ++k) {
min_value = std::min(min_value, static_cast<float *> (memory)[k]);
max_value = std::max(max_value, static_cast<float *> (memory)[k]);
}
fwrite(&min_value, sizeof(float), 1, out_file);
fwrite(&max_value, sizeof(float), 1, out_file);
for (int g = 0; g < memory_size; ++g) {
float value = static_cast<float *> (memory)[g];
auto factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255);
fwrite(&factor, sizeof(uint8_t), 1, out_file);
}
}
void quantificate_combined(std::string model_path, std::string param_path, std::string param_min_path){
paddle_mobile::Loader<paddle_mobile::CPU,paddle_mobile::Precision::FP32 > loader;
bool optimize = true;
auto program = loader.Load(model_path, param_path, optimize);
char *origin_data = Get_binary_data(program.para_path);
char *data = origin_data;
FILE *out_file = fopen(param_min_path.c_str(), "wb");
for (const auto &block : program.originProgram->Blocks()) {
for (const auto &var_desc : block->Vars()) {
auto var = program.scope->Var(var_desc->Name());
if(var_desc ->Persistable()) {
auto tensor = var->template GetMutable<paddle_mobile::framework::LoDTensor>();
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue;
}
LoadWithDump(*var_desc, tensor, &data,out_file);
void
quantificate_combined(const std::string &model_path, const std::string &param_path, const std::string &param_min_path) {
auto program = loadParams(model_path);
char *origin_data = Get_binary_data(param_path);
char *data = origin_data;
FILE *out_file = fopen(param_min_path.c_str(), "wb");
for (const auto &block : program->Blocks()) {
for (const auto &var_desc : block->Vars()) {
if (var_desc->Persistable()) {
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue;
}
LoadWithDump(*var_desc, data, out_file);
}
}
fclose(out_file);
delete origin_data;
}
void quantificate_seperated(std::string model_dir, std::string param_min_path) {
paddle_mobile::Loader<paddle_mobile::CPU,paddle_mobile::Precision::FP32 > loader;
bool optimize = true;
auto program = loader.Load(model_dir, optimize);
std::string shell_command = "mkdir "+param_min_path;
system(shell_command.c_str());
for (const auto &block : program.originProgram->Blocks()) {
for (const auto &var_desc : block->Vars()) {
auto var = program.scope->Var(var_desc->Name());
if(var_desc ->Persistable()) {
auto tensor = var->template GetMutable<paddle_mobile::framework::LoDTensor>();
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue;
}
std::string file_name = param_min_path +"/"+ var_desc->Name();
FILE *out_file = fopen(file_name.c_str(), "wb");
char *origin_data =
Get_binary_data(program.model_path + "/" + var_desc->Name());
char *data = origin_data;
LoadWithDump(*var_desc, tensor, &data,out_file);
delete origin_data;
fclose(out_file);
fclose(out_file);
delete origin_data;
}
void quantificate_seperated(const std::string model_dir, const std::string param_min_path) {
auto program = loadParams(model_dir + "/__model__");
std::string shell_command = "mkdir " + param_min_path;
system(shell_command.c_str());
for (const auto &block : program->Blocks()) {
for (const auto &var_desc : block->Vars()) {
if (var_desc->Persistable()) {
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue;
}
std::string file_name = param_min_path + "/" + var_desc->Name();
FILE *out_file = fopen(file_name.c_str(), "wb");
char *origin_data = Get_binary_data(model_dir + "/" + var_desc->Name());
char *data = origin_data;
LoadWithDump(*var_desc, data, out_file);
delete origin_data;
fclose(out_file);
}
}
}
}
int main(int argc, char **argv) {
const std::string kNoteEg = "( eg: ./quantify 1 your_combined_model_path output_path or ./quantify 0 your_seperated_model_path output_path)";
PADDLE_MOBILE_ENFORCE(argc > 1, "wee need params.%s ", kNoteEg.c_str());
std::string action_type = argv[1];
PADDLE_MOBILE_ENFORCE(argc > 1 && (action_type) == "1" || action_type == "0",
"only 1 or 2 supported, current is %s %s ",
action_type.c_str(),
kNoteEg.c_str());
PADDLE_MOBILE_ENFORCE(argc > 2, "we need your model path. %s ", kNoteEg.c_str());
std::string base_path = argv[2];
PADDLE_MOBILE_ENFORCE(argc > 3, "we need your output path. %s ", kNoteEg.c_str());
std::string output_path = argv[3];
if (action_type == "0") {
// for seperated
const std::string &seperated_min_dir = output_path;
quantificate_seperated(base_path, seperated_min_dir);
return 0;
}
int main() {
std::string filename = "params_min";
std::string model_path = g_googlenet_combine + "/model";
std::string param_path = g_googlenet_combine + "/params";
std::string dirname = "param_min_dir";
std::string model_dir = g_googlenet;
// quantificate_combined(model_path, param_path,filename);
quantificate_seperated(model_dir, dirname);
if (action_type == "1") {
// for combined
const std::string &combined_min_dir = output_path;
std::string model_path = base_path + "/model";
std::string param_path = base_path + "/params";
quantificate_combined(model_path, param_path, combined_min_dir);
return 0;
}
return -1;
}
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
//
// Created by 谢柏渊 on 2018/7/25.
//
#include "src/block_desc_local.h"
#include <algorithm>
#include <memory>
#include <vector>
#include "src/framework.pb-c.h"
std::vector<std::shared_ptr<paddle_mobile::framework::VarDesc>>
BlockDesc::Vars() const {
return vars_;
}
BlockDesc::BlockDesc(PaddleMobile__Framework__Proto__BlockDesc *desc)
: index_(desc->idx), parent_index_(desc->idx) {
for (int i = 0; i < desc->n_vars; ++i) {
PaddleMobile__Framework__Proto__VarDesc *var_desc = desc->vars[i];
vars_.emplace_back(std::shared_ptr<paddle_mobile::framework::VarDesc>(
new paddle_mobile::framework::VarDesc(var_desc)));
}
std::sort(vars_.begin(), vars_.end(),
[](std::shared_ptr<paddle_mobile::framework::VarDesc> left,
std::shared_ptr<paddle_mobile::framework::VarDesc> right) {
return left->Name() < right->Name();
});
// for (int j = 0; j < desc->n_ops; ++j) {
// PaddleMobile__Framework__Proto__OpDesc *op_desc = desc->ops[j];
// ops_.emplace_back(new OpDesc(op_desc));
// }
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
//
// Created by 谢柏渊 on 2018/7/25.
//
#ifndef TOOLS_QUANTIFICATION_SRC_BLOCK_DESC_LOCAL_H_
#define TOOLS_QUANTIFICATION_SRC_BLOCK_DESC_LOCAL_H_
#include <vector>
#include "src/var_desc.h"
class BlockDesc {
public:
friend class Node;
friend class ProgramOptimize;
BlockDesc() {}
explicit BlockDesc(PaddleMobile__Framework__Proto__BlockDesc *desc);
const int &ID() const { return index_; }
const bool &MultiThread() const { return multi_thread_; }
const int &Parent() const { return parent_index_; }
bool operator==(const BlockDesc &in_block) const {
return this->ID() == in_block.ID() && this->Parent() == in_block.Parent();
}
bool operator<(const BlockDesc &in_block) const {
return this->ID() < in_block.ID() && this->Parent() < in_block.Parent();
}
std::vector<std::shared_ptr<paddle_mobile::framework::VarDesc>> Vars() const;
private:
int index_;
bool multi_thread_;
int parent_index_;
std::vector<std::shared_ptr<paddle_mobile::framework::VarDesc>> vars_;
};
#endif // TOOLS_QUANTIFICATION_SRC_BLOCK_DESC_LOCAL_H_
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef ENABLE_EXCEPTION
#include <stdio.h>
#include <exception>
#include <string>
#endif
namespace paddle_mobile {
#ifdef ENABLE_EXCEPTION
struct PaddleMobileException : public std::exception {
const std::string exception_prefix = "paddle mobile C++ Exception: \n";
std::string message;
PaddleMobileException(const char *header, const char *detail,
const char *file, const int line) {
char buffer[1500];
snprintf(buffer, sizeof(buffer),
"%s| %s \n| [in file] : %s\n| [on line] : %d\n| [detail] : %s\n",
exception_prefix.c_str(), header, file, line, detail);
message = std::string(buffer);
}
const char *what() const noexcept { return message.c_str(); }
};
#define PADDLE_MOBILE_THROW_EXCEPTION(...) \
{ \
char buffer[1000]; \
snprintf(buffer, sizeof(buffer), __VA_ARGS__); \
std::string detail(buffer); \
throw paddle_mobile::PaddleMobileException("Custom Exception", buffer, \
__FILE__, __LINE__); \
}
#define PADDLE_MOBILE_ENFORCE(stat, ...) \
{ \
if (stat) { \
} else { \
char buffer[1000]; \
snprintf(buffer, sizeof(buffer), __VA_ARGS__); \
std::string detail(buffer); \
throw paddle_mobile::PaddleMobileException("paddle-mobile enforce", \
buffer, __FILE__, __LINE__); \
} \
}
#else
#define PADDLE_MOBILE_THROW_EXCEPTION(...)
#define PADDLE_MOBILE_ENFORCE(stat, ...)
#endif
} // namespace paddle_mobile
此差异已折叠。
此差异已折叠。
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
//
// Created by 谢柏渊 on 2018/7/25.
//
#include "src/program_desc.h"
#include <vector>
ProgramDesc::ProgramDesc(PaddleMobile__Framework__Proto__ProgramDesc *desc) {
for (int i = 0; i < desc->n_blocks; ++i) {
blocks_.emplace_back(std::make_shared<BlockDesc>(desc->blocks[i]));
}
}
const std::vector<std::shared_ptr<BlockDesc>> ProgramDesc::Blocks() {
return blocks_;
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
//
// Created by 谢柏渊 on 2018/7/25.
//
#ifndef TOOLS_QUANTIFICATION_SRC_PROGRAM_DESC_H_
#define TOOLS_QUANTIFICATION_SRC_PROGRAM_DESC_H_
#include <memory>
#include <vector>
#include "src/block_desc_local.h"
#include "src/framework.pb-c.h"
class ProgramDesc {
public:
// friend class Node;
//
// friend class ProgramOptimize;
explicit ProgramDesc(PaddleMobile__Framework__Proto__ProgramDesc *desc);
const std::vector<std::shared_ptr<BlockDesc>> Blocks();
private:
std::vector<std::shared_ptr<BlockDesc>> blocks_;
};
#endif // TOOLS_QUANTIFICATION_SRC_PROGRAM_DESC_H_
此差异已折叠。
此差异已折叠。
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "src/framework.pb-c.h"
namespace paddle_mobile {
namespace framework {
enum VarType_Type {
VARTYPE_TYPE_BOOL = 0,
VARTYPE_TYPE_INT16 = 1,
VARTYPE_TYPE_INT32 = 2,
VARTYPE_TYPE_INT64 = 3,
VARTYPE_TYPE_FP16 = 4,
VARTYPE_TYPE_FP32 = 5,
VARTYPE_TYPE_FP64 = 6,
VARTYPE_TYPE_LOD_TENSOR = 7,
VARTYPE_TYPE_SELECTED_ROWS = 8,
VARTYPE_TYPE_FEED_MINIBATCH = 9,
VARTYPE_TYPE_FETCH_LIST = 10,
VARTYPE_TYPE_STEP_SCOPES = 11,
VARTYPE_TYPE_STEP_LOD_RANK_TABLE = 12,
VARTYPE_TYPE_STEP_LOD_TENSOR_ARRAY = 13,
VARTYPE_TYPE_STEP_PLACE_LIST = 14,
VARTYPE_TYPE_READER = 15,
VARTYPE_TYPE_CHANNEL = 16,
VARTYPE_TYPE_RAW = 17,
VARTYPE_TYPE_TUPLE = 18
};
class TensorDesc {
public:
TensorDesc() = default;
TensorDesc(const TensorDesc &desc) {
this->dims_ = desc.dims_;
this->data_type_ = desc.data_type_;
}
explicit TensorDesc(
PaddleMobile__Framework__Proto__VarType__TensorDesc *desc) {
for (int i = 0; i < desc->n_dims; ++i) {
int64_t d = desc->dims[i];
dims_.emplace_back(d);
}
data_type_ = (VarType_Type)desc->data_type;
}
std::vector<int64_t> Dims() const { return dims_; }
VarType_Type DataType() const { return data_type_; }
private:
std::vector<int64_t> dims_;
VarType_Type data_type_;
};
} // namespace framework
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "src/framework.pb-c.h"
#include "src/tensor_desc.h"
namespace paddle_mobile {
namespace framework {
class VarDesc {
public:
VarDesc(const VarDesc &var_desc) {
this->data_type_ = var_desc.data_type_;
this->name_ = var_desc.name_;
this->persistable_ = var_desc.persistable_;
this->tensor_desc_ = var_desc.tensor_desc_;
this->type_ = var_desc.type_;
}
explicit VarDesc(PaddleMobile__Framework__Proto__VarDesc *desc) {
type_ = (VarType_Type)desc->type->type;
name_ = std::string(desc->name);
persistable_ = static_cast<bool>(desc->persistable);
switch (type_) {
case VARTYPE_TYPE_SELECTED_ROWS:
tensor_desc_ = TensorDesc(desc->type->selected_rows);
break;
case VARTYPE_TYPE_LOD_TENSOR:
tensor_desc_ = TensorDesc(desc->type->lod_tensor->tensor);
break;
case VARTYPE_TYPE_STEP_LOD_TENSOR_ARRAY:
// desc->type->tensor_array->tensor->data_type;
tensor_desc_ = TensorDesc(desc->type->tensor_array->tensor);
break;
default:
break;
}
switch (type_) {
case VARTYPE_TYPE_CHANNEL:
data_type_ = (VarType_Type)desc->type->channel->data_type;
break;
default:
data_type_ = tensor_desc_.DataType();
break;
}
}
std::string Name() const { return name_; }
VarType_Type Type() const { return type_; }
bool Persistable() const { return persistable_; }
const TensorDesc &Tensor_desc() const { return tensor_desc_; }
private:
std::string name_;
bool persistable_;
TensorDesc tensor_desc_;
VarType_Type type_;
VarType_Type data_type_;
};
} // namespace framework
} // namespace paddle_mobile
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册