Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
dd2b700d
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
dd2b700d
编写于
4月 27, 2020
作者:
H
hanbuhe
浏览文件
操作
浏览文件
下载
差异文件
merge memory leak into eb1.4.0
上级
fc659486
d0177f95
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
268 addition
and
53 deletion
+268
-53
CMakeLists.txt
CMakeLists.txt
+3
-0
lite/backends/fpga/KD/debugger.hpp
lite/backends/fpga/KD/debugger.hpp
+12
-0
lite/backends/fpga/KD/dl_engine.hpp
lite/backends/fpga/KD/dl_engine.hpp
+1
-0
lite/backends/fpga/KD/io.cpp
lite/backends/fpga/KD/io.cpp
+43
-0
lite/backends/fpga/KD/io.hpp
lite/backends/fpga/KD/io.hpp
+50
-0
lite/backends/fpga/KD/llapi/filter.cpp
lite/backends/fpga/KD/llapi/filter.cpp
+2
-2
lite/backends/fpga/KD/llapi/zynqmp_api.cpp
lite/backends/fpga/KD/llapi/zynqmp_api.cpp
+1
-1
lite/backends/fpga/KD/pes/fully_connected_pe.hpp
lite/backends/fpga/KD/pes/fully_connected_pe.hpp
+55
-14
lite/backends/fpga/KD/pes/input_pe.hpp
lite/backends/fpga/KD/pes/input_pe.hpp
+1
-0
lite/backends/fpga/KD/pes/output_pe.hpp
lite/backends/fpga/KD/pes/output_pe.hpp
+7
-0
lite/backends/fpga/KD/tensor.hpp
lite/backends/fpga/KD/tensor.hpp
+30
-18
lite/backends/fpga/lite_tensor.cc
lite/backends/fpga/lite_tensor.cc
+24
-6
lite/backends/fpga/lite_tensor.h
lite/backends/fpga/lite_tensor.h
+32
-6
lite/kernels/arm/sequence_pool_compute.cc
lite/kernels/arm/sequence_pool_compute.cc
+1
-0
lite/kernels/fpga/CMakeLists.txt
lite/kernels/fpga/CMakeLists.txt
+0
-1
lite/kernels/fpga/feed_compute.cc
lite/kernels/fpga/feed_compute.cc
+1
-1
lite/kernels/fpga/fetch_compute.cc
lite/kernels/fpga/fetch_compute.cc
+1
-1
lite/kernels/fpga/mul_compute.cc
lite/kernels/fpga/mul_compute.cc
+2
-1
lite/kernels/host/one_hot_compute.cc
lite/kernels/host/one_hot_compute.cc
+1
-1
lite/operators/one_hot_op.cc
lite/operators/one_hot_op.cc
+1
-1
未找到文件。
CMakeLists.txt
浏览文件 @
dd2b700d
...
@@ -22,6 +22,9 @@ if (WITH_PADDLE_MOBILE)
...
@@ -22,6 +22,9 @@ if (WITH_PADDLE_MOBILE)
return
()
return
()
endif
(
WITH_PADDLE_MOBILE
)
endif
(
WITH_PADDLE_MOBILE
)
# set(CMAKE_BUILD_TYPE DEBUG)
set
(
PADDLE_SOURCE_DIR
${
CMAKE_CURRENT_SOURCE_DIR
}
)
set
(
PADDLE_SOURCE_DIR
${
CMAKE_CURRENT_SOURCE_DIR
}
)
set
(
PADDLE_BINARY_DIR
${
CMAKE_CURRENT_BINARY_DIR
}
)
set
(
PADDLE_BINARY_DIR
${
CMAKE_CURRENT_BINARY_DIR
}
)
set
(
CMAKE_CXX_STANDARD 11
)
set
(
CMAKE_CXX_STANDARD 11
)
...
...
lite/backends/fpga/KD/debugger.hpp
浏览文件 @
dd2b700d
...
@@ -14,6 +14,8 @@
...
@@ -14,6 +14,8 @@
#pragma once
#pragma once
#include <fstream>
#include <iostream>
#include <string>
#include <string>
#include <unordered_map>
#include <unordered_map>
...
@@ -37,8 +39,18 @@ class Debugger {
...
@@ -37,8 +39,18 @@ class Debugger {
}
}
}
}
void
tick
(
std
::
string
key
)
{
float
value
=
0
;
if
(
tick_tock_map
.
count
(
key
)
>
0
)
{
value
+=
tick_tock_map
[
key
]
=
value
;
}
}
void
tock
(
std
::
string
key
)
{}
private:
private:
std
::
unordered_map
<
std
::
string
,
bool
>
op_config
;
std
::
unordered_map
<
std
::
string
,
bool
>
op_config
;
std
::
unordered_map
<
std
::
string
,
float
>
tick_tock_map
;
Debugger
()
{
Debugger
()
{
op_config
[
"concat"
]
=
true
;
op_config
[
"concat"
]
=
true
;
op_config
[
"pooling"
]
=
true
;
op_config
[
"pooling"
]
=
true
;
...
...
lite/backends/fpga/KD/dl_engine.hpp
浏览文件 @
dd2b700d
...
@@ -15,6 +15,7 @@ limitations under the License. */
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <stdio.h>
#include <stdio.h>
#include "lite/backends/fpga/KD/llapi/filter.h"
#include "lite/backends/fpga/KD/llapi/filter.h"
#include "lite/backends/fpga/KD/llapi/zynqmp_api.h"
#include "lite/backends/fpga/KD/llapi/zynqmp_api.h"
...
...
lite/backends/fpga/KD/io.cpp
0 → 100644
浏览文件 @
dd2b700d
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "io.hpp"
namespace
paddle
{
namespace
zynqmp
{
// FpgaIO::FpgaIO() {}
// void FpgaIO::setMutex(std::mutex* mtx) { mtx_ = mtx; }
// void FpgaIO::setConditionVariable(std::condition_variable* condition) {
// condition_ = condition;
// }
// void FpgaIO::lock() {
// if (mtx_ != nullptr && !locked_) {
// mtx_->lock();
// locked_ = true;
// }
// }
// void FpgaIO::unlock() {
// if (mtx_ != nullptr) {
// mtx_->unlock();
// condition_->notify_one();
// }
// locked_ = false;
// }
}
// namespace zynqmp
}
// namespace paddle
lite/backends/fpga/KD/io.hpp
0 → 100644
浏览文件 @
dd2b700d
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <stdio.h>
// #include <condition_variable>
// #include <mutex>
namespace
paddle
{
namespace
zynqmp
{
class
FpgaIO
{
public:
static
FpgaIO
&
get_instance
()
{
static
FpgaIO
s_instance
;
return
s_instance
;
}
void
allocData
(
size_t
s
)
{
data_
=
new
float
[
s
];
}
float
*
getData
()
{
return
data_
;
}
// void setMutex(std::mutex* mtx);
// void setConditionVariable(std::condition_variable* condition);
// void lock();
// void unlock();
private:
// std::mutex* mtx_ = nullptr;
// std::condition_variable* condition_ = nullptr;
// bool locked_ = false;
float
*
data_
=
nullptr
;
FpgaIO
();
};
}
// namespace zynqmp
}
// namespace paddle
lite/backends/fpga/KD/llapi/filter.cpp
浏览文件 @
dd2b700d
...
@@ -240,8 +240,8 @@ int8_t* format_filter(float* data_in,
...
@@ -240,8 +240,8 @@ int8_t* format_filter(float* data_in,
for
(
int
n
=
0
;
n
<
num
;
n
++
)
{
for
(
int
n
=
0
;
n
<
num
;
n
++
)
{
float
*
filter_start
=
data_in
+
n
*
chw
;
float
*
filter_start
=
data_in
+
n
*
chw
;
int8_t
*
quantized_start
=
quantized_data
+
n
*
chw
;
int8_t
*
quantized_start
=
quantized_data
+
n
*
chw
;
quantize
(
filter_start
,
quantized_start
,
chw
,
max
);
quantize
(
filter_start
,
quantized_start
,
chw
,
f_
max
);
filter_max
.
push_back
(
1
);
filter_max
.
push_back
(
f_max
);
}
}
int8_t
*
hwc_data
=
int8_t
*
hwc_data
=
...
...
lite/backends/fpga/KD/llapi/zynqmp_api.cpp
浏览文件 @
dd2b700d
...
@@ -205,7 +205,7 @@ int get_device_info(const struct DeviceInfo &args) {
...
@@ -205,7 +205,7 @@ int get_device_info(const struct DeviceInfo &args) {
int
perform_bypass
(
const
struct
BypassArgs
&
args
)
{
int
perform_bypass
(
const
struct
BypassArgs
&
args
)
{
int
ret
=
-
1
;
int
ret
=
-
1
;
int
size
=
args
.
image
.
channels
*
args
.
image
.
width
*
args
.
image
.
height
;
int
size
=
args
.
image
.
channels
*
args
.
image
.
width
*
args
.
image
.
height
;
int
max_size
=
1
<<
2
1
;
int
max_size
=
1
<<
2
2
;
float
times
=
1.0
*
size
/
max_size
;
float
times
=
1.0
*
size
/
max_size
;
int
count
=
static_cast
<
int
>
(
times
);
int
count
=
static_cast
<
int
>
(
times
);
...
...
lite/backends/fpga/KD/pes/fully_connected_pe.hpp
浏览文件 @
dd2b700d
...
@@ -14,6 +14,8 @@ limitations under the License. */
...
@@ -14,6 +14,8 @@ limitations under the License. */
#pragma once
#pragma once
#include <math.h>
#include <cmath>
#include <vector>
#include <vector>
#include "lite/backends/fpga/KD/pe.hpp"
#include "lite/backends/fpga/KD/pe.hpp"
...
@@ -38,7 +40,6 @@ class FullyConnectedPE : public PE {
...
@@ -38,7 +40,6 @@ class FullyConnectedPE : public PE {
Tensor
*
input
=
param_
.
input
;
Tensor
*
input
=
param_
.
input
;
convParam_
.
input
=
param_
.
input
;
convParam_
.
input
=
param_
.
input
;
convParam_
.
output
=
param_
.
output
;
convParam_
.
output
=
param_
.
output
;
// convParam_.relu = param_.relu;
convParam_
.
activeParam
.
type
=
param_
.
activeParam
.
type
;
convParam_
.
activeParam
.
type
=
param_
.
activeParam
.
type
;
convParam_
.
groups
=
1
;
convParam_
.
groups
=
1
;
convParam_
.
strides
=
{
1
,
1
};
convParam_
.
strides
=
{
1
,
1
};
...
@@ -54,32 +55,42 @@ class FullyConnectedPE : public PE {
...
@@ -54,32 +55,42 @@ class FullyConnectedPE : public PE {
int
height
=
param_
.
input
->
shape
().
height
();
int
height
=
param_
.
input
->
shape
().
height
();
int
width
=
param_
.
input
->
shape
().
width
();
int
width
=
param_
.
input
->
shape
().
width
();
int
filter_channel
=
chw
/
height
/
width
;
//
int filter_channel = chw / height / width;
int
channel
=
param_
.
output
->
shape
().
channel
();
int
channel
=
param_
.
output
->
shape
().
channel
();
Shape
shape
(
NCHW
,
{
num
,
filter_channel
,
height
,
width
});
Shape
shape
(
NCHW
,
{
num
,
chw_aligned
,
1
,
1
});
Tensor
*
conv_filter
=
new
Tensor
();
float
*
new_filter_data
=
conv_filter_
.
mutableData
<
float
>
(
FP32
,
shape
);
float
*
new_filter_data
=
conv_filter
->
mutableData
<
float
>
(
FP32
,
shape
);
float
*
filter_data
=
param_
.
filter
->
data
<
float
>
();
float
*
filter_data
=
param_
.
filter
->
data
<
float
>
();
memset
(
new_filter_data
,
0
,
num
*
chw_aligned
*
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
for
(
int
j
=
0
;
j
<
chw
;
j
++
)
{
for
(
int
j
=
0
;
j
<
chw
;
j
++
)
{
float
scale
=
filter_data
[
j
*
num
+
i
];
float
scale
=
filter_data
[
j
*
num
+
i
];
new_filter_data
[
i
*
chw
+
j
]
=
scale
;
new_filter_data
[
i
*
chw
_aligned
+
j
]
=
scale
;
}
}
}
}
conv_filter
->
flush
();
conv_filter
->
flush
();
convParam_
.
filter
=
conv_filter
;
convParam_
.
filter
=
conv_filter
;
Shape
sb_shape
(
N
,
{
channel
});
conv_filter_
.
flush
();
convParam_
.
filter
=
&
conv_filter_
;
// param_.filter->saveToFile("param_filter", true);
// conv_filter->saveToFile("conv_filter", true);
// exit(-1);
Shape
sb_shape
(
N
,
{
num
});
float
*
scale_data
=
convParam_
.
scale
()
->
mutableData
<
float
>
(
FP32
,
sb_shape
);
float
*
scale_data
=
convParam_
.
scale
()
->
mutableData
<
float
>
(
FP32
,
sb_shape
);
float
*
bias_data
=
convParam_
.
bias
()
->
mutableData
<
float
>
(
FP32
,
sb_shape
);
float
*
bias_data
=
convParam_
.
bias
()
->
mutableData
<
float
>
(
FP32
,
sb_shape
);
for
(
int
i
=
0
;
i
<
channel
;
i
++
)
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
scale_data
[
i
]
=
1.0
f
;
scale_data
[
i
]
=
1.0
f
;
bias_data
[
i
]
=
param_
.
bias
->
data
<
float
>
()[
i
];
bias_data
[
i
]
=
param_
.
bias
->
data
<
float
>
()[
i
];
}
}
// for (int i = 0; i < num; i++) {
// scale_data[i] = 1.0f;
// bias_data[i] = param_.bias->data<float>()[i];
// }
convParam_
.
scale
()
->
flush
();
convParam_
.
scale
()
->
flush
();
convParam_
.
bias
()
->
flush
();
convParam_
.
bias
()
->
flush
();
...
@@ -115,14 +126,41 @@ class FullyConnectedPE : public PE {
...
@@ -115,14 +126,41 @@ class FullyConnectedPE : public PE {
output
->
flush
();
output
->
flush
();
output
->
scale
()[
0
]
=
max
/
127.0
f
;
output
->
scale
()[
0
]
=
max
/
127.0
f
;
output
->
scale
()[
1
]
=
127.0
f
/
max
;
output
->
scale
()[
1
]
=
127.0
f
/
max
;
output
->
saveToFile
(
"cpu_compute"
,
true
);
// exit(-1);
}
void
batch_to_w
()
{
ConvParam
&
convParam_
=
convPE_
.
param
();
int
channel
=
param_
.
input
->
shape
().
channel
();
param_
.
input
->
invalidate
();
int
remainder
=
aligned_input_
.
shape
().
channel
()
-
param_
.
input
->
shape
().
channel
();
float
max
=
0
;
for
(
int
n
=
0
;
n
<
param_
.
input
->
shape
().
num
();
n
++
)
{
memset
(
aligned_input_
.
data
<
float16
>
(),
0
,
aligned_input_
.
shape
().
channel
()
*
sizeof
(
float16
));
memcpy
(
aligned_input_
.
data
<
float16
>
()
+
n
*
aligned_input_
.
shape
().
channel
(),
param_
.
input
->
data
<
float16
>
()
+
n
*
channel
,
channel
*
sizeof
(
float16
));
aligned_input_
.
copyScaleFrom
(
param_
.
input
);
aligned_input_
.
flush
();
}
convPE_
.
dispatch
();
}
}
bool
dispatch
()
{
bool
dispatch
()
{
//
int num = param_.filter->shape().channel
();
//
batch_to_w
();
//
if (num == 2) {
//
return 1;
//
cpu_compute
();
//
cpu_compute1
();
//
return 1;
// return 1;
// } else {
return
convPE_
.
dispatch
();
return
convPE_
.
dispatch
();
// }
// }
}
}
...
@@ -131,7 +169,10 @@ class FullyConnectedPE : public PE {
...
@@ -131,7 +169,10 @@ class FullyConnectedPE : public PE {
private:
private:
FullyConnectedParam
param_
;
FullyConnectedParam
param_
;
Tensor
aligned_input_
;
Tensor
aligned_output_
;
ConvPE
convPE_
;
ConvPE
convPE_
;
Tensor
conv_filter_
;
};
};
}
// namespace zynqmp
}
// namespace zynqmp
}
// namespace paddle
}
// namespace paddle
lite/backends/fpga/KD/pes/input_pe.hpp
浏览文件 @
dd2b700d
...
@@ -29,6 +29,7 @@ class InputPE : public PE {
...
@@ -29,6 +29,7 @@ class InputPE : public PE {
}
}
bool
dispatch
()
{
bool
dispatch
()
{
// std::cout << "input_dispatch()\n";
Tensor
*
input
=
param_
.
input
;
Tensor
*
input
=
param_
.
input
;
Tensor
*
output
=
param_
.
output
;
Tensor
*
output
=
param_
.
output
;
...
...
lite/backends/fpga/KD/pes/output_pe.hpp
浏览文件 @
dd2b700d
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#pragma once
#include "lite/backends/fpga/KD/llapi/zynqmp_api.h"
#include "lite/backends/fpga/KD/pe.hpp"
#include "lite/backends/fpga/KD/pe.hpp"
#include "lite/backends/fpga/KD/pe_params.hpp"
#include "lite/backends/fpga/KD/pe_params.hpp"
...
@@ -52,6 +53,12 @@ class OutputPE : public PE {
...
@@ -52,6 +53,12 @@ class OutputPE : public PE {
memcpy
(
DLEngine
::
get_instance
().
out_data
,
memcpy
(
DLEngine
::
get_instance
().
out_data
,
output
->
data
<
void
>
(),
output
->
data
<
void
>
(),
output
->
shape
().
numel
()
*
sizeof
(
float
));
output
->
shape
().
numel
()
*
sizeof
(
float
));
fpga_reset
();
auto
max
=
fpga_get_memory_size_max
();
std
::
cout
<<
"PL ===== Max: ===== :: "
<<
max
<<
std
::
endl
;
return
true
;
return
true
;
}
}
...
...
lite/backends/fpga/KD/tensor.hpp
浏览文件 @
dd2b700d
...
@@ -103,12 +103,18 @@ class Tensor {
...
@@ -103,12 +103,18 @@ class Tensor {
return
reinterpret_cast
<
Dtype
*>
(
ptr
);
return
reinterpret_cast
<
Dtype
*>
(
ptr
);
}
}
void
releaseData
()
{
released
=
true
;
placeHolder_
.
reset
();
}
template
<
typename
Dtype
>
template
<
typename
Dtype
>
Dtype
*
mutableData
(
DataType
dataType
,
const
Shape
&
shape
)
{
Dtype
*
mutableData
(
DataType
dataType
,
const
Shape
&
shape
)
{
if
(
this
->
shape_
!=
nullptr
)
{
// std::cout << "enter \n";
delete
shape_
;
// std::cout << "before new shape\n";
}
// this->shape_ = new Shape(shape);
this
->
shape_
=
new
Shape
(
shape
);
this
->
shape_
.
reset
(
new
Shape
(
shape
));
// std::cout << "new shape \n";
this
->
dataType_
=
dataType
;
this
->
dataType_
=
dataType
;
return
mutableData
<
Dtype
>
();
return
mutableData
<
Dtype
>
();
}
}
...
@@ -117,11 +123,14 @@ class Tensor {
...
@@ -117,11 +123,14 @@ class Tensor {
Dtype
*
mutableData
()
{
Dtype
*
mutableData
()
{
size_t
memorySize
=
size_t
memorySize
=
shape_
->
memorySize
(
CellSize
(
dataType_
))
*
mem_scale_factor_
;
shape_
->
memorySize
(
CellSize
(
dataType_
))
*
mem_scale_factor_
;
// std::cout << "mem_size:" << memorySize << std::endl;
if
(
placeHolder_
!=
nullptr
)
{
if
(
placeHolder_
!=
nullptr
)
{
// std::cout << "placeHolder_ not null"<< std::endl;
if
(
memorySize
>
placeHolder_
->
memorySize
())
{
if
(
memorySize
>
placeHolder_
->
memorySize
())
{
placeHolder_
.
reset
(
new
PlaceHolder
(
memorySize
));
placeHolder_
.
reset
(
new
PlaceHolder
(
memorySize
));
}
}
}
else
{
}
else
{
// std::cout << "placeHolder_ null"<< std::endl;
placeHolder_
.
reset
(
new
PlaceHolder
(
memorySize
));
placeHolder_
.
reset
(
new
PlaceHolder
(
memorySize
));
}
}
return
data
<
Dtype
>
();
return
data
<
Dtype
>
();
...
@@ -138,7 +147,7 @@ class Tensor {
...
@@ -138,7 +147,7 @@ class Tensor {
DataType
dataType
()
{
return
this
->
dataType_
;
}
DataType
dataType
()
{
return
this
->
dataType_
;
}
Shape
&
shape
()
{
return
*
shape_
;
}
Shape
&
shape
()
{
return
*
(
shape_
.
get
())
;
}
bool
aligned
()
{
return
this
->
aligned_
;
}
bool
aligned
()
{
return
this
->
aligned_
;
}
...
@@ -247,15 +256,17 @@ class Tensor {
...
@@ -247,15 +256,17 @@ class Tensor {
void
shareDataWith
(
Tensor
*
src
)
{
shareDataWith
(
src
,
src
->
shape
());
}
void
shareDataWith
(
Tensor
*
src
)
{
shareDataWith
(
src
,
src
->
shape
());
}
void
shareDataWith
(
Tensor
*
src
,
const
Shape
&
shape
,
int
offset
=
0
)
{
void
shareDataWith
(
Tensor
*
src
,
const
Shape
&
shape
,
int
offset
=
0
)
{
if
(
shape_
!=
nullptr
)
{
// if (shape_ != nullptr) {
delete
shape_
;
// delete shape_;
}
// }
this
->
placeHolder_
=
src
->
placeHolder_
;
this
->
placeHolder_
=
src
->
placeHolder_
;
this
->
dataType_
=
src
->
dataType_
;
this
->
dataType_
=
src
->
dataType_
;
this
->
aligned_
=
src
->
aligned_
;
this
->
aligned_
=
src
->
aligned_
;
this
->
dateLocation_
=
src
->
dateLocation_
;
this
->
dateLocation_
=
src
->
dateLocation_
;
this
->
offset
=
offset
;
this
->
offset
=
offset
;
shape_
=
new
Shape
(
const_cast
<
Shape
&>
(
shape
));
// shape_ = new Shape(const_cast<Shape&>(shape));
shape_
.
reset
(
new
Shape
(
shape
));
}
}
void
copyFrom
(
Tensor
*
src
)
{
void
copyFrom
(
Tensor
*
src
)
{
...
@@ -300,7 +311,13 @@ class Tensor {
...
@@ -300,7 +311,13 @@ class Tensor {
}
}
void
flush
()
{
void
flush
()
{
size_t
memorySize
=
placeHolder_
->
memorySize
();
if
(
released
)
{
// std::cout << "flush::" << this << std::endl;
return
;
}
size_t
memorySize
=
shape_
->
memorySize
(
CellSize
(
dataType_
))
*
mem_scale_factor_
;
fpga_flush
(
placeHolder_
->
data
(),
memorySize
);
fpga_flush
(
placeHolder_
->
data
(),
memorySize
);
}
}
...
@@ -451,18 +468,13 @@ class Tensor {
...
@@ -451,18 +468,13 @@ class Tensor {
return
os
;
return
os
;
}
}
~
Tensor
()
{
if
(
shape_
!=
nullptr
)
{
delete
shape_
;
shape_
=
nullptr
;
}
}
private:
private:
bool
released
=
false
;
int
offset
=
0
;
int
offset
=
0
;
float
mem_scale_factor_
=
1.0
f
;
float
mem_scale_factor_
=
1.0
f
;
std
::
shared_ptr
<
PlaceHolder
>
placeHolder_
;
std
::
shared_ptr
<
PlaceHolder
>
placeHolder_
;
Shape
*
shape_
=
nullptr
;
std
::
shared_ptr
<
Shape
>
shape_
;
// Shape* shape_ = nullptr;
DataType
dataType_
=
FP32
;
DataType
dataType_
=
FP32
;
bool
aligned_
=
false
;
bool
aligned_
=
false
;
DataSyncStatus
synchedStatus_
=
Synched
;
DataSyncStatus
synchedStatus_
=
Synched
;
...
...
lite/backends/fpga/lite_tensor.cc
浏览文件 @
dd2b700d
...
@@ -69,7 +69,7 @@ std::string DDimLite::repr() const {
...
@@ -69,7 +69,7 @@ std::string DDimLite::repr() const {
}
}
void
TensorLite
::
ShareDataWith
(
const
TensorLite
&
other
)
{
void
TensorLite
::
ShareDataWith
(
const
TensorLite
&
other
)
{
buffer_
=
other
.
buffer_
;
buffer_
=
other
.
buffer_
;
// TODO(chonwhite) delete buffer;
dims_
=
other
.
dims_
;
dims_
=
other
.
dims_
;
zynq_tensor_
=
other
.
zynq_tensor_
;
zynq_tensor_
=
other
.
zynq_tensor_
;
target_
=
other
.
target_
;
target_
=
other
.
target_
;
...
@@ -79,10 +79,10 @@ void TensorLite::ShareDataWith(const TensorLite &other) {
...
@@ -79,10 +79,10 @@ void TensorLite::ShareDataWith(const TensorLite &other) {
}
}
void
*
TensorLite
::
mutable_data
(
size_t
memory_size
)
{
void
*
TensorLite
::
mutable_data
(
size_t
memory_size
)
{
memory_size_
=
memory_size
;
memory_size_
=
memory_size
;
// TODO(chonwhite) delete buffer;
buffer_
->
ResetLazy
(
target_
,
memory_size_
);
buffer_
->
ResetLazy
(
target_
,
memory_size_
);
// throw -1;
// throw -1;
std
::
cout
<<
memory_size
<<
std
::
endl
;
//
std::cout << memory_size << std::endl;
return
buffer_
->
data
();
return
buffer_
->
data
();
}
}
...
@@ -92,16 +92,34 @@ void *TensorLite::mutable_data(TargetType target, size_t memory_size) {
...
@@ -92,16 +92,34 @@ void *TensorLite::mutable_data(TargetType target, size_t memory_size) {
}
}
void
TensorLite
::
CopyDataFrom
(
const
TensorLite
&
other
)
{
void
TensorLite
::
CopyDataFrom
(
const
TensorLite
&
other
)
{
// std::cout << "other11:: "<< &other << std::endl;
dims_
=
other
.
dims_
;
dims_
=
other
.
dims_
;
target_
=
other
.
target_
;
target_
=
other
.
target_
;
lod_
=
other
.
lod_
;
lod_
=
other
.
lod_
;
auto
dt
=
zynq_tensor_
->
dataType
()
;
// std::cout << "before dataType\n"
;
auto
shape
=
other
.
zynq_tensor_
->
shape
();
if
(
zynq_tensor_
.
get
()
==
nullptr
)
{
zynq_tensor_
.
reset
(
new
zynqmp
::
Tensor
());
}
auto
dt
=
zynq_tensor_
->
dataType
();
// std::cout << "after dataType\n";
// std::cout << "before resize\n";
Resize
(
other
.
dims
());
Resize
(
other
.
dims
());
auto
shape
=
other
.
zynq_tensor_
->
shape
();
// std::cout << "after resize\n";
zynq_tensor_
->
mutableData
<
void
>
(
zynq_tensor_
->
dataType
(),
shape
);
zynq_tensor_
->
mutableData
<
void
>
(
zynq_tensor_
->
dataType
(),
shape
);
this
->
ZynqTensor
()
->
copyFrom
(
other
.
ZynqTensor
());
// std::cout << "after mutableData\n";
// std::cout << "ZynqTensor():" << this->ZynqTensor() << std::endl;
// std::cout << "other Tensor():" << other.ZynqTensor() << std::endl;
// this->ZynqTensor()->copyFrom(other.ZynqTensor());
memcpy
(
this
->
ZynqTensor
()
->
data
<
void
>
(),
other
.
ZynqTensor
()
->
data
<
void
>
(),
other
.
ZynqTensor
()
->
shape
().
numel
()
*
sizeof
(
float
));
// memcpy()
// std::cout << "after copyFrom\n";
}
}
}
// namespace lite
}
// namespace lite
...
...
lite/backends/fpga/lite_tensor.h
浏览文件 @
dd2b700d
...
@@ -81,6 +81,10 @@ class DDimLite {
...
@@ -81,6 +81,10 @@ class DDimLite {
return
!
(
a
==
b
);
return
!
(
a
==
b
);
}
}
~
DDimLite
()
{
// std::cout << "free DDimLite\n";
}
private:
private:
std
::
vector
<
value_type
>
data_
;
std
::
vector
<
value_type
>
data_
;
};
};
...
@@ -109,7 +113,12 @@ class TensorLite {
...
@@ -109,7 +113,12 @@ class TensorLite {
return
zynq_tensor_
->
data
<
R
>
()
+
offset_
;
return
zynq_tensor_
->
data
<
R
>
()
+
offset_
;
}
}
void
Resize
(
const
DDimLite
&
ddim
)
{
dims_
=
ddim
;
}
void
Resize
(
const
DDimLite
&
ddim
)
{
// std::cout << "Resize \n";
// std::cout << "ddim:" << & ddim << std::endl;
dims_
=
ddim
;
// std::cout << "after Reize \n";
}
void
Resize
(
const
std
::
vector
<
int64_t
>
&
x
)
{
dims_
=
DDimLite
(
x
);
}
void
Resize
(
const
std
::
vector
<
int64_t
>
&
x
)
{
dims_
=
DDimLite
(
x
);
}
const
DDimLite
&
dims
()
const
{
return
dims_
;
}
const
DDimLite
&
dims
()
const
{
return
dims_
;
}
...
@@ -142,7 +151,9 @@ class TensorLite {
...
@@ -142,7 +151,9 @@ class TensorLite {
void
*
mutable_data
(
size_t
memory_size
);
void
*
mutable_data
(
size_t
memory_size
);
void
*
mutable_data
(
TargetType
target
,
size_t
memory_size
);
void
*
mutable_data
(
TargetType
target
,
size_t
memory_size
);
const
void
*
raw_data
()
const
{
return
buffer_
->
data
();
}
const
void
*
raw_data
()
const
{
return
buffer_
->
data
();
}
// TODO(chonwhite) delete buffer;
size_t
data_size
()
const
{
return
this
->
dims
().
production
();
}
size_t
data_size
()
const
{
return
this
->
dims
().
production
();
}
...
@@ -150,7 +161,9 @@ class TensorLite {
...
@@ -150,7 +161,9 @@ class TensorLite {
size_t
offset
()
const
{
return
offset_
;
}
size_t
offset
()
const
{
return
offset_
;
}
bool
IsInitialized
()
const
{
return
buffer_
->
data
();
}
bool
IsInitialized
()
const
{
return
buffer_
->
data
();
}
// TODO(chonwhite) delete buffer;
// Other share data to this.
// Other share data to this.
void
ShareDataWith
(
const
TensorLite
&
other
);
void
ShareDataWith
(
const
TensorLite
&
other
);
...
@@ -165,7 +178,10 @@ class TensorLite {
...
@@ -165,7 +178,10 @@ class TensorLite {
TargetType
target
()
const
{
return
target_
;
}
TargetType
target
()
const
{
return
target_
;
}
zynqmp
::
Tensor
*
ZynqTensor
()
const
{
return
zynq_tensor_
;
}
// template <typename T>
// TensorLite Slice(int64_t begin, int64_t end) const;
zynqmp
::
Tensor
*
ZynqTensor
()
const
{
return
zynq_tensor_
.
get
();
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
TensorLite
&
tensor
)
{
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
TensorLite
&
tensor
)
{
os
<<
"Tensor:"
<<
'\n'
;
os
<<
"Tensor:"
<<
'\n'
;
...
@@ -194,7 +210,8 @@ class TensorLite {
...
@@ -194,7 +210,8 @@ class TensorLite {
size_t
memory_size_
{};
size_t
memory_size_
{};
size_t
offset_
{
0
};
size_t
offset_
{
0
};
zynqmp
::
Tensor
*
zynq_tensor_
=
new
zynqmp
::
Tensor
();
// zynqmp::Tensor *zynq_tensor_ = new zynqmp::Tensor();
std
::
shared_ptr
<
zynqmp
::
Tensor
>
zynq_tensor_
;
template
<
typename
T
>
template
<
typename
T
>
void
mutable_data_internal
();
void
mutable_data_internal
();
...
@@ -203,6 +220,7 @@ class TensorLite {
...
@@ -203,6 +220,7 @@ class TensorLite {
template
<
typename
T
,
typename
R
>
template
<
typename
T
,
typename
R
>
R
*
TensorLite
::
mutable_data
()
{
R
*
TensorLite
::
mutable_data
()
{
std
::
vector
<
int
>
v
;
std
::
vector
<
int
>
v
;
// std::cout << "mutable_data \n";
for
(
int
i
=
0
;
i
<
dims_
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
dims_
.
size
();
i
++
)
{
v
.
push_back
(
dims_
[
i
]);
v
.
push_back
(
dims_
[
i
]);
}
}
...
@@ -225,7 +243,7 @@ R *TensorLite::mutable_data() {
...
@@ -225,7 +243,7 @@ R *TensorLite::mutable_data() {
break
;
break
;
}
}
zynqmp
::
Shape
input_shape
(
layout_type
,
v
);
zynqmp
::
Shape
input_shape
(
layout_type
,
v
);
// std::cout << "input_shape \n";
zynqmp
::
DataType
data_type
=
zynqmp
::
FP32
;
zynqmp
::
DataType
data_type
=
zynqmp
::
FP32
;
if
(
typeid
(
T
)
==
typeid
(
float
))
{
if
(
typeid
(
T
)
==
typeid
(
float
))
{
data_type
=
zynqmp
::
FP32
;
data_type
=
zynqmp
::
FP32
;
...
@@ -233,6 +251,13 @@ R *TensorLite::mutable_data() {
...
@@ -233,6 +251,13 @@ R *TensorLite::mutable_data() {
if
(
typeid
(
T
)
==
typeid
(
zynqmp
::
float16
))
{
if
(
typeid
(
T
)
==
typeid
(
zynqmp
::
float16
))
{
data_type
=
zynqmp
::
FP16
;
data_type
=
zynqmp
::
FP16
;
}
}
// std::cout << "mutableData \n";
// std::cout << "zynq_tensor_:" << zynq_tensor_.get() << std::endl;
if
(
zynq_tensor_
.
get
()
==
nullptr
)
{
zynq_tensor_
.
reset
(
new
zynqmp
::
Tensor
());
}
return
zynq_tensor_
->
mutableData
<
R
>
(
data_type
,
input_shape
);
return
zynq_tensor_
->
mutableData
<
R
>
(
data_type
,
input_shape
);
}
}
...
@@ -272,6 +297,7 @@ TensorLite TensorLite::Slice(int64_t begin, int64_t end) const {
...
@@ -272,6 +297,7 @@ TensorLite TensorLite::Slice(int64_t begin, int64_t end) const {
template
<
typename
T
>
template
<
typename
T
>
void
TensorLite
::
Slice
(
TensorLite
&
dst
,
int64_t
begin
,
int64_t
end
)
const
{
void
TensorLite
::
Slice
(
TensorLite
&
dst
,
int64_t
begin
,
int64_t
end
)
const
{
// TODO(chonwhite) delete this function;
CHECK_GE
(
begin
,
0
);
CHECK_GE
(
begin
,
0
);
CHECK_LE
(
end
,
dims_
[
0
]);
CHECK_LE
(
end
,
dims_
[
0
]);
CHECK_LT
(
begin
,
end
);
CHECK_LT
(
begin
,
end
);
...
...
lite/kernels/arm/sequence_pool_compute.cc
浏览文件 @
dd2b700d
...
@@ -59,6 +59,7 @@ void SequencePoolCompute::Run() {
...
@@ -59,6 +59,7 @@ void SequencePoolCompute::Run() {
for
(
int
i
=
0
;
i
<=
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<=
batch_size
;
i
++
)
{
offset_new
[
i
]
=
i
;
offset_new
[
i
]
=
i
;
}
}
(
output
->
mutable_lod
())
->
clear
();
(
output
->
mutable_lod
())
->
push_back
(
offset_new
);
(
output
->
mutable_lod
())
->
push_back
(
offset_new
);
}
}
...
...
lite/kernels/fpga/CMakeLists.txt
浏览文件 @
dd2b700d
...
@@ -14,7 +14,6 @@ add_kernel(conv_compute_fpga FPGA basic SRCS conv_compute.cc DEPS ${fpga_deps})
...
@@ -14,7 +14,6 @@ add_kernel(conv_compute_fpga FPGA basic SRCS conv_compute.cc DEPS ${fpga_deps})
# add_kernel(density_prior_box_compute_fpga FPGA basic SRCS density_prior_box_compute.cc DEPS ${fpga_deps})
# add_kernel(density_prior_box_compute_fpga FPGA basic SRCS density_prior_box_compute.cc DEPS ${fpga_deps})
add_kernel
(
dropout_compute_fpga FPGA basic SRCS dropout_compute.cc DEPS
${
fpga_deps
}
)
add_kernel
(
dropout_compute_fpga FPGA basic SRCS dropout_compute.cc DEPS
${
fpga_deps
}
)
add_kernel
(
elementwise_compute_fpga FPGA basic SRCS elementwise_compute.cc DEPS
${
fpga_deps
}
)
add_kernel
(
elementwise_compute_fpga FPGA basic SRCS elementwise_compute.cc DEPS
${
fpga_deps
}
)
# add_kernel(feed_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps})
add_kernel
(
fc_compute_fpga FPGA basic SRCS fc_compute.cc DEPS
${
fpga_deps
}
)
add_kernel
(
fc_compute_fpga FPGA basic SRCS fc_compute.cc DEPS
${
fpga_deps
}
)
add_kernel
(
gru_compute_fpga FPGA extra SRCS gru_compute.cc DEPS
${
fpga_deps
}
)
add_kernel
(
gru_compute_fpga FPGA extra SRCS gru_compute.cc DEPS
${
fpga_deps
}
)
...
...
lite/kernels/fpga/feed_compute.cc
浏览文件 @
dd2b700d
...
@@ -40,8 +40,8 @@ void FeedCompute::PrepareForRun() {
...
@@ -40,8 +40,8 @@ void FeedCompute::PrepareForRun() {
void
FeedCompute
::
Run
()
{
void
FeedCompute
::
Run
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
auto
&
param
=
this
->
Param
<
param_t
>
();
Tensor
&
x
=
param
.
feed_list
->
at
(
param
.
col
);
Tensor
&
x
=
param
.
feed_list
->
at
(
param
.
col
);
pe_
.
param
().
input
=
x
.
ZynqTensor
();
pe_
.
dispatch
();
pe_
.
dispatch
();
auto
out_lod
=
param
.
out
->
mutable_lod
();
auto
out_lod
=
param
.
out
->
mutable_lod
();
*
out_lod
=
x
.
lod
();
*
out_lod
=
x
.
lod
();
...
...
lite/kernels/fpga/fetch_compute.cc
浏览文件 @
dd2b700d
...
@@ -82,6 +82,6 @@ REGISTER_LITE_KERNEL(fetch,
...
@@ -82,6 +82,6 @@ REGISTER_LITE_KERNEL(fetch,
kNHWC
,
kNHWC
,
paddle
::
lite
::
kernels
::
fpga
::
FetchCompute
,
paddle
::
lite
::
kernels
::
fpga
::
FetchCompute
,
host_host
)
host_host
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
k
Host
))})
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
k
ARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
Finalize
();
.
Finalize
();
lite/kernels/fpga/mul_compute.cc
浏览文件 @
dd2b700d
...
@@ -80,7 +80,8 @@ void mul(MulCompute* k) {
...
@@ -80,7 +80,8 @@ void mul(MulCompute* k) {
}
}
void
MulCompute
::
Run
()
{
void
MulCompute
::
Run
()
{
pe_
.
dispatch
();
// pe_.dispatch();
mul
(
this
);
#ifdef FPGA_PRINT_TENSOR
#ifdef FPGA_PRINT_TENSOR
zynqmp
::
FullyConnectedParam
&
fc_param
=
pe_
.
param
();
zynqmp
::
FullyConnectedParam
&
fc_param
=
pe_
.
param
();
Debugger
::
get_instance
().
registerOutput
(
"mul"
,
fc_param
.
output
);
Debugger
::
get_instance
().
registerOutput
(
"mul"
,
fc_param
.
output
);
...
...
lite/kernels/host/one_hot_compute.cc
浏览文件 @
dd2b700d
...
@@ -16,7 +16,7 @@
...
@@ -16,7 +16,7 @@
#include <utility>
#include <utility>
#include <vector>
#include <vector>
#include "lite/backends/fpga/KD/debugger.hpp"
//
#include "lite/backends/fpga/KD/debugger.hpp"
#include "lite/kernels/host/one_hot_compute.h"
#include "lite/kernels/host/one_hot_compute.h"
#include "lite/utils/paddle_enforce.h"
#include "lite/utils/paddle_enforce.h"
...
...
lite/operators/one_hot_op.cc
浏览文件 @
dd2b700d
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
#include "lite/operators/one_hot_op.h"
#include "lite/operators/one_hot_op.h"
#include "lite/core/op_registry.h"
#include "lite/core/op_registry.h"
#include "lite/backends/fpga/KD/debugger.hpp"
//
#include "lite/backends/fpga/KD/debugger.hpp"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录