Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
9d2d3d0f
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9d2d3d0f
编写于
3月 16, 2020
作者:
C
chonwhite
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fixed memory leak
上级
9c15846a
变更
12
显示空白变更内容
内联
并排
Showing
12 changed file
with
425 addition
and
46 deletion
+425
-46
lite/backends/fpga/KD/debugger.hpp
lite/backends/fpga/KD/debugger.hpp
+12
-0
lite/backends/fpga/KD/dl_engine.hpp
lite/backends/fpga/KD/dl_engine.hpp
+1
-0
lite/backends/fpga/KD/io.cpp
lite/backends/fpga/KD/io.cpp
+43
-0
lite/backends/fpga/KD/io.hpp
lite/backends/fpga/KD/io.hpp
+50
-0
lite/backends/fpga/KD/pes/fully_connected_pe.hpp
lite/backends/fpga/KD/pes/fully_connected_pe.hpp
+218
-15
lite/backends/fpga/KD/pes/input_pe.hpp
lite/backends/fpga/KD/pes/input_pe.hpp
+9
-0
lite/backends/fpga/KD/pes/output_pe.hpp
lite/backends/fpga/KD/pes/output_pe.hpp
+5
-0
lite/backends/fpga/KD/tensor.hpp
lite/backends/fpga/KD/tensor.hpp
+31
-17
lite/backends/fpga/lite_tensor.cc
lite/backends/fpga/lite_tensor.cc
+24
-6
lite/backends/fpga/lite_tensor.h
lite/backends/fpga/lite_tensor.h
+29
-6
lite/kernels/fpga/feed_compute.cc
lite/kernels/fpga/feed_compute.cc
+1
-1
lite/kernels/fpga/mul_compute.cc
lite/kernels/fpga/mul_compute.cc
+2
-1
未找到文件。
lite/backends/fpga/KD/debugger.hpp
浏览文件 @
9d2d3d0f
...
...
@@ -14,6 +14,8 @@
#pragma once
#include <fstream>
#include <iostream>
#include <string>
#include <unordered_map>
...
...
@@ -37,8 +39,18 @@ class Debugger {
}
}
void
tick
(
std
::
string
key
)
{
float
value
=
0
;
if
(
tick_tock_map
.
count
(
key
)
>
0
)
{
value
+=
tick_tock_map
[
key
]
=
value
;
}
}
void
tock
(
std
::
string
key
)
{}
private:
std
::
unordered_map
<
std
::
string
,
bool
>
op_config
;
std
::
unordered_map
<
std
::
string
,
float
>
tick_tock_map
;
Debugger
()
{
op_config
[
"concat"
]
=
true
;
op_config
[
"pooling"
]
=
true
;
...
...
lite/backends/fpga/KD/dl_engine.hpp
浏览文件 @
9d2d3d0f
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <stdio.h>
#include "lite/backends/fpga/KD/llapi/filter.h"
#include "lite/backends/fpga/KD/llapi/zynqmp_api.h"
...
...
lite/backends/fpga/KD/io.cpp
0 → 100644
浏览文件 @
9d2d3d0f
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "io.hpp"
namespace
paddle
{
namespace
zynqmp
{
// FpgaIO::FpgaIO() {}
// void FpgaIO::setMutex(std::mutex* mtx) { mtx_ = mtx; }
// void FpgaIO::setConditionVariable(std::condition_variable* condition) {
// condition_ = condition;
// }
// void FpgaIO::lock() {
// if (mtx_ != nullptr && !locked_) {
// mtx_->lock();
// locked_ = true;
// }
// }
// void FpgaIO::unlock() {
// if (mtx_ != nullptr) {
// mtx_->unlock();
// condition_->notify_one();
// }
// locked_ = false;
// }
}
// namespace zynqmp
}
// namespace paddle
lite/backends/fpga/KD/io.hpp
0 → 100644
浏览文件 @
9d2d3d0f
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <stdio.h>
// #include <condition_variable>
// #include <mutex>
namespace
paddle
{
namespace
zynqmp
{
class
FpgaIO
{
public:
static
FpgaIO
&
get_instance
()
{
static
FpgaIO
s_instance
;
return
s_instance
;
}
void
allocData
(
size_t
s
)
{
data_
=
new
float
[
s
];
}
float
*
getData
()
{
return
data_
;
}
// void setMutex(std::mutex* mtx);
// void setConditionVariable(std::condition_variable* condition);
// void lock();
// void unlock();
private:
std
::
mutex
*
mtx_
=
nullptr
;
std
::
condition_variable
*
condition_
=
nullptr
;
bool
locked_
=
false
;
float
*
data_
=
nullptr
;
FpgaIO
();
};
}
// namespace zynqmp
}
// namespace paddle
lite/backends/fpga/KD/pes/fully_connected_pe.hpp
浏览文件 @
9d2d3d0f
...
...
@@ -14,6 +14,8 @@ limitations under the License. */
#pragma once
#include <math.h>
#include <cmath>
#include <vector>
#include "lite/backends/fpga/KD/pe.hpp"
...
...
@@ -37,10 +39,9 @@ class FullyConnectedPE : public PE {
ConvParam
&
convParam_
=
convPE_
.
param
();
Tensor
*
input
=
param_
.
input
;
convParam_
.
input
=
param_
.
input
;
num_
=
param_
.
input
->
shape
().
num
();
convParam_
.
output
=
param_
.
output
;
convParam_
.
relu
=
param_
.
relu
;
// convParam_.activeParam.type = param_.activeParam.type;
convParam_
.
groups
=
1
;
convParam_
.
strides
=
{
1
,
1
};
convParam_
.
paddings
=
{
0
,
0
};
...
...
@@ -49,34 +50,54 @@ class FullyConnectedPE : public PE {
int
num
=
param_
.
filter
->
shape
().
channel
();
int
chw
=
param_
.
filter
->
shape
().
num
();
int
align
=
32
;
int
chw_aligned
=
((
chw
+
align
-
1
)
/
align
)
*
align
;
int
infer_num
=
1
;
Shape
in_shape
(
NCHW
,
{
infer_num
,
chw_aligned
,
1
,
1
});
aligned_input_
.
mutableData
<
float16
>
(
FP16
,
in_shape
);
convParam_
.
input
=
&
aligned_input_
;
Shape
out_shape
(
NCHW
,
{
infer_num
,
num
,
1
,
1
});
aligned_output_
.
mutableData
<
float16
>
(
FP16
,
out_shape
);
convParam_
.
output
=
&
aligned_output_
;
int
height
=
param_
.
input
->
shape
().
height
();
int
width
=
param_
.
input
->
shape
().
width
();
int
filter_channel
=
chw
/
height
/
width
;
//
int filter_channel = chw / height / width;
int
channel
=
param_
.
output
->
shape
().
channel
();
Shape
shape
(
NCHW
,
{
num
,
filter_channel
,
height
,
width
});
Tensor
*
conv_filter
=
new
Tensor
();
float
*
new_filter_data
=
conv_filter
->
mutableData
<
float
>
(
FP32
,
shape
);
Shape
shape
(
NCHW
,
{
num
,
chw_aligned
,
1
,
1
});
float
*
new_filter_data
=
conv_filter_
.
mutableData
<
float
>
(
FP32
,
shape
);
float
*
filter_data
=
param_
.
filter
->
data
<
float
>
();
memset
(
new_filter_data
,
0
,
num
*
chw_aligned
*
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
for
(
int
j
=
0
;
j
<
chw
;
j
++
)
{
float
scale
=
filter_data
[
j
*
num
+
i
];
new_filter_data
[
i
*
chw
+
j
]
=
scale
;
new_filter_data
[
i
*
chw
_aligned
+
j
]
=
scale
;
}
}
conv_filter
->
flush
();
convParam_
.
filter
=
conv_filter
;
Shape
sb_shape
(
N
,
{
channel
});
conv_filter_
.
flush
();
convParam_
.
filter
=
&
conv_filter_
;
// param_.filter->saveToFile("param_filter", true);
// conv_filter->saveToFile("conv_filter", true);
// exit(-1);
Shape
sb_shape
(
N
,
{
num
});
float
*
scale_data
=
convParam_
.
scale
()
->
mutableData
<
float
>
(
FP32
,
sb_shape
);
float
*
bias_data
=
convParam_
.
bias
()
->
mutableData
<
float
>
(
FP32
,
sb_shape
);
for
(
int
i
=
0
;
i
<
channel
;
i
++
)
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
scale_data
[
i
]
=
1.0
f
;
bias_data
[
i
]
=
param_
.
bias
->
data
<
float
>
()[
i
];
}
// for (int i = 0; i < num; i++) {
// scale_data[i] = 1.0f;
// bias_data[i] = param_.bias->data<float>()[i];
// }
convParam_
.
scale
()
->
flush
();
convParam_
.
bias
()
->
flush
();
...
...
@@ -84,15 +105,197 @@ class FullyConnectedPE : public PE {
convPE_
.
apply
();
}
bool
dispatch
()
{
return
convPE_
.
dispatch
();
}
void
cpu_compute
()
{
int
num
=
param_
.
filter
->
shape
().
channel
();
int
chw
=
param_
.
filter
->
shape
().
num
();
float
*
filter_data
=
param_
.
filter
->
data
<
float
>
();
float
max
=
0.0
f
;
Tensor
*
input
=
param_
.
input
;
Tensor
*
output
=
param_
.
output
;
float16
*
input_data
=
input
->
data
<
float16
>
();
float16
*
output_data
=
output
->
data
<
float16
>
();
for
(
int
n
=
0
;
n
<
input
->
shape
().
num
();
n
++
)
{
float16
*
input_data
=
input
->
data
<
float16
>
()
+
n
*
chw
;
float16
*
output_data
=
output
->
data
<
float16
>
()
+
n
*
output
->
shape
().
channel
();
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
float
sum
=
0
;
float
bias
=
param_
.
bias
->
data
<
float
>
()[
i
];
for
(
int
j
=
0
;
j
<
chw
;
j
++
)
{
float
scale
=
filter_data
[
j
*
num
+
i
];
float
data
=
half_to_float
(
input_data
[
j
]);
sum
+=
scale
*
data
;
}
output_data
[
i
]
=
float_to_half
(
sum
+
bias
);
if
(
max
<
output_data
[
i
])
{
max
=
output_data
[
i
];
}
}
}
output
->
flush
();
output
->
scale
()[
0
]
=
max
/
127.0
f
;
output
->
scale
()[
1
]
=
127.0
f
/
max
;
}
void
cpu_compute1
()
{
int
num
=
conv_filter_
.
shape
().
num
();
int
chw
=
conv_filter_
.
shape
().
channel
();
// chw = 336;
float
*
filter_data
=
conv_filter_
.
data
<
float
>
();
float
max
=
0.0
f
;
Tensor
*
input
=
param_
.
input
;
Tensor
*
output
=
param_
.
output
;
float16
*
input_data
=
input
->
data
<
float16
>
();
float16
*
output_data
=
output
->
data
<
float16
>
();
for
(
int
n
=
0
;
n
<
input
->
shape
().
num
();
n
++
)
{
float16
*
input_data
=
input
->
data
<
float16
>
()
+
n
*
chw
;
float16
*
output_data
=
output
->
data
<
float16
>
()
+
n
*
output
->
shape
().
channel
();
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
float
sum
=
0
;
float
bias
=
param_
.
bias
->
data
<
float
>
()[
i
];
for
(
int
j
=
0
;
j
<
chw
;
j
++
)
{
float
scale
=
filter_data
[
i
*
chw
+
j
];
float
data
=
half_to_float
(
input_data
[
j
]);
sum
+=
scale
*
data
;
}
float
value
=
sum
+
bias
;
if
(
std
::
isinf
(
value
)
||
i
>
321
)
{
std
::
cout
<<
"i:"
<<
i
<<
" sum:"
<<
sum
<<
" bias:"
<<
bias
<<
std
::
endl
;
// exit(-1);
}
if
(
i
>
321
)
{
std
::
cout
<<
"i:"
<<
i
<<
" sum:"
<<
sum
<<
" bias:"
<<
bias
<<
std
::
endl
;
// exit(-1);
}
output_data
[
i
]
=
float_to_half
(
value
);
if
(
max
<
value
)
{
max
=
value
;
}
}
}
output
->
flush
();
output
->
scale
()[
0
]
=
max
/
127.0
f
;
output
->
scale
()[
1
]
=
127.0
f
/
max
;
output
->
saveToFile
(
"cpu_compute"
,
true
);
// exit(-1);
}
void
batch_to_w
()
{
ConvParam
&
convParam_
=
convPE_
.
param
();
int
channel
=
param_
.
input
->
shape
().
channel
();
param_
.
input
->
invalidate
();
int
remainder
=
aligned_input_
.
shape
().
channel
()
-
param_
.
input
->
shape
().
channel
();
float
max
=
0
;
for
(
int
n
=
0
;
n
<
param_
.
input
->
shape
().
num
();
n
++
)
{
memset
(
aligned_input_
.
data
<
float16
>
(),
0
,
aligned_input_
.
shape
().
channel
()
*
sizeof
(
float16
));
memcpy
(
aligned_input_
.
data
<
float16
>
()
+
n
*
aligned_input_
.
shape
().
channel
(),
param_
.
input
->
data
<
float16
>
()
+
n
*
channel
,
channel
*
sizeof
(
float16
));
aligned_input_
.
copyScaleFrom
(
param_
.
input
);
aligned_input_
.
flush
();
}
convPE_
.
dispatch
();
}
bool
dispatch
()
{
// batch_to_w();
// return 1;
// cpu_compute1();
// return 1;
// int num = param_.filter->shape().channel();
// if (num == 2) {
// cpu_compute();
// return 1;
// } else {
// return convPE_.dispatch();
// }
ConvParam
&
convParam_
=
convPE_
.
param
();
if
(
param_
.
input
->
shape
().
channel
()
==
321
&&
param_
.
output
->
shape
().
channel
()
==
384
)
{
// conv_filter_.saveToFile("conv_filter", true);
// cpu_compute1();
// return 1;
}
int
channel
=
param_
.
input
->
shape
().
channel
();
param_
.
input
->
invalidate
();
int
remainder
=
aligned_input_
.
shape
().
channel
()
-
param_
.
input
->
shape
().
channel
();
float
max
=
0
;
for
(
int
n
=
0
;
n
<
param_
.
input
->
shape
().
num
();
n
++
)
{
memset
(
aligned_input_
.
data
<
float16
>
(),
0
,
aligned_input_
.
shape
().
channel
()
*
sizeof
(
float16
));
memcpy
(
aligned_input_
.
data
<
float16
>
(),
param_
.
input
->
data
<
float16
>
()
+
n
*
channel
,
channel
*
sizeof
(
float16
));
aligned_input_
.
copyScaleFrom
(
param_
.
input
);
aligned_input_
.
flush
();
if
(
param_
.
input
->
shape
().
channel
()
==
321
&&
param_
.
output
->
shape
().
channel
()
==
384
)
{
// aligned_input_.saveToFile("aligned_input_", true);
// convParam_.filter->saveToFile("conv_filter", true);
}
convPE_
.
dispatch
();
aligned_output_
.
invalidate
();
if
(
param_
.
input
->
shape
().
num
()
==
230
)
{
// aligned_output_.saveToFile("ao", true);
}
//
float16
*
src
=
aligned_output_
.
data
<
float16
>
();
float16
*
dst
=
param_
.
output
->
data
<
float16
>
()
+
n
*
param_
.
output
->
shape
().
channel
();
memcpy
(
dst
,
src
,
param_
.
output
->
shape
().
channel
()
*
sizeof
(
float16
));
if
(
aligned_output_
.
scale
()[
0
]
>
max
)
{
max
=
aligned_output_
.
scale
()[
0
];
}
}
param_
.
output
->
flush
();
param_
.
output
->
scale
()[
0
]
=
max
/
127.0
f
;
param_
.
output
->
scale
()[
1
]
=
127.0
f
/
max
;
// param_.output->saveToFile("out", true);
// exit(-1);
// cpu_compute();
// ConvParam& convParam_ = convPE_.param();
// convParam_.scale()->saveToFile("scale", true);
return
true
;
}
FullyConnectedParam
&
param
()
{
return
param_
;
}
private:
FullyConnectedParam
param_
;
Tensor
aligned_input_
;
Tensor
aligned_output_
;
ConvPE
convPE_
;
Tensor
tempOut_
;
int
num_
=
1
;
Tensor
conv_filter_
;
};
}
// namespace zynqmp
}
// namespace paddle
lite/backends/fpga/KD/pes/input_pe.hpp
浏览文件 @
9d2d3d0f
...
...
@@ -29,19 +29,28 @@ class InputPE : public PE {
}
bool
dispatch
()
{
std
::
cout
<<
"input_dispatch()
\n
"
;
Tensor
*
input
=
param_
.
input
;
Tensor
*
output
=
param_
.
output
;
Tensor
*
src
=
input
;
// std::cout << "input:" << input << std::endl;
input
->
flush
();
// std::cout << "input_flush()\n";
Tensor
half_tensor
;
if
(
input
->
dataType
()
==
DataType
::
FP32
)
{
// std::cout << "2()\n";
half_tensor
.
mutableData
<
void
*>
(
DataType
::
FP16
,
input
->
shape
());
// std::cout << "3()\n";
half_tensor
.
copyFrom
(
input
);
// std::cout << "4()\n";
src
=
&
half_tensor
;
}
// std::cout << "5()\n";
output
->
mutableData
<
void
>
();
// std::cout << "6()\n";
src
->
alignImage
(
output
,
true
);
// std::cout << "7()\n";
return
true
;
}
...
...
lite/backends/fpga/KD/pes/output_pe.hpp
浏览文件 @
9d2d3d0f
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include "lite/backends/fpga/KD/llapi/zynqmp_api.h"
#include "lite/backends/fpga/KD/pe.hpp"
#include "lite/backends/fpga/KD/pe_params.hpp"
...
...
@@ -52,6 +53,10 @@ class OutputPE : public PE {
memcpy
(
DLEngine
::
get_instance
().
out_data
,
output
->
data
<
void
>
(),
output
->
shape
().
numel
()
*
sizeof
(
float
));
// auto max = fpga_get_memory_size_max();
// std::cout << "===== Max: ===== :: " << max << std::endl;
return
true
;
}
...
...
lite/backends/fpga/KD/tensor.hpp
浏览文件 @
9d2d3d0f
...
...
@@ -103,12 +103,18 @@ class Tensor {
return
reinterpret_cast
<
Dtype
*>
(
ptr
);
}
void
releaseData
()
{
released
=
true
;
placeHolder_
.
reset
();
}
template
<
typename
Dtype
>
Dtype
*
mutableData
(
DataType
dataType
,
const
Shape
&
shape
)
{
if
(
this
->
shape_
!=
nullptr
)
{
delete
shape_
;
}
this
->
shape_
=
new
Shape
(
shape
);
// std::cout << "enter \n";
// std::cout << "before new shape\n";
// this->shape_ = new Shape(shape);
this
->
shape_
.
reset
(
new
Shape
(
shape
));
// std::cout << "new shape \n";
this
->
dataType_
=
dataType
;
return
mutableData
<
Dtype
>
();
}
...
...
@@ -117,11 +123,14 @@ class Tensor {
Dtype
*
mutableData
()
{
size_t
memorySize
=
shape_
->
memorySize
(
CellSize
(
dataType_
))
*
mem_scale_factor_
;
// std::cout << "mem_size:" << memorySize << std::endl;
if
(
placeHolder_
!=
nullptr
)
{
// std::cout << "placeHolder_ not null"<< std::endl;
if
(
memorySize
>
placeHolder_
->
memorySize
())
{
placeHolder_
.
reset
(
new
PlaceHolder
(
memorySize
));
}
}
else
{
// std::cout << "placeHolder_ null"<< std::endl;
placeHolder_
.
reset
(
new
PlaceHolder
(
memorySize
));
}
return
data
<
Dtype
>
();
...
...
@@ -138,7 +147,7 @@ class Tensor {
DataType
dataType
()
{
return
this
->
dataType_
;
}
Shape
&
shape
()
{
return
*
shape_
;
}
Shape
&
shape
()
{
return
*
(
shape_
.
get
())
;
}
bool
aligned
()
{
return
this
->
aligned_
;
}
...
...
@@ -247,15 +256,17 @@ class Tensor {
void
shareDataWith
(
Tensor
*
src
)
{
shareDataWith
(
src
,
src
->
shape
());
}
void
shareDataWith
(
Tensor
*
src
,
const
Shape
&
shape
,
int
offset
=
0
)
{
if
(
shape_
!=
nullptr
)
{
delete
shape_
;
}
// if (shape_ != nullptr) {
// delete shape_;
// }
this
->
placeHolder_
=
src
->
placeHolder_
;
this
->
dataType_
=
src
->
dataType_
;
this
->
aligned_
=
src
->
aligned_
;
this
->
dateLocation_
=
src
->
dateLocation_
;
this
->
offset
=
offset
;
shape_
=
new
Shape
(
const_cast
<
Shape
&>
(
shape
));
// shape_ = new Shape(const_cast<Shape&>(shape));
shape_
.
reset
(
new
Shape
(
shape
));
}
void
copyFrom
(
Tensor
*
src
)
{
...
...
@@ -300,6 +311,14 @@ class Tensor {
}
void
flush
()
{
// std::cout << "released:" << released << std::endl;
// std::cout << "placeHolder_" << placeHolder_.get() << std::endl;
if
(
released
)
{
// std::cout << "flush::" << this << std::endl;
return
;
}
size_t
memorySize
=
shape_
->
memorySize
(
CellSize
(
dataType_
))
*
mem_scale_factor_
;
fpga_flush
(
placeHolder_
->
data
(),
memorySize
);
...
...
@@ -463,18 +482,13 @@ class Tensor {
return
os
;
}
~
Tensor
()
{
if
(
shape_
!=
nullptr
)
{
delete
shape_
;
shape_
=
nullptr
;
}
}
private:
bool
released
=
false
;
int
offset
=
0
;
float
mem_scale_factor_
=
1.0
f
;
std
::
shared_ptr
<
PlaceHolder
>
placeHolder_
;
Shape
*
shape_
=
nullptr
;
std
::
shared_ptr
<
Shape
>
shape_
;
// Shape* shape_ = nullptr;
DataType
dataType_
=
FP32
;
bool
aligned_
=
false
;
DataSyncStatus
synchedStatus_
=
Synched
;
...
...
lite/backends/fpga/lite_tensor.cc
浏览文件 @
9d2d3d0f
...
...
@@ -69,7 +69,7 @@ std::string DDimLite::repr() const {
}
void
TensorLite
::
ShareDataWith
(
const
TensorLite
&
other
)
{
buffer_
=
other
.
buffer_
;
buffer_
=
other
.
buffer_
;
// TODO(chonwhite) delete buffer;
dims_
=
other
.
dims_
;
zynq_tensor_
=
other
.
zynq_tensor_
;
target_
=
other
.
target_
;
...
...
@@ -79,10 +79,10 @@ void TensorLite::ShareDataWith(const TensorLite &other) {
}
void
*
TensorLite
::
mutable_data
(
size_t
memory_size
)
{
memory_size_
=
memory_size
;
memory_size_
=
memory_size
;
// TODO(chonwhite) delete buffer;
buffer_
->
ResetLazy
(
target_
,
memory_size_
);
// throw -1;
std
::
cout
<<
memory_size
<<
std
::
endl
;
//
std::cout << memory_size << std::endl;
return
buffer_
->
data
();
}
...
...
@@ -92,16 +92,34 @@ void *TensorLite::mutable_data(TargetType target, size_t memory_size) {
}
void
TensorLite
::
CopyDataFrom
(
const
TensorLite
&
other
)
{
// std::cout << "other11:: "<< &other << std::endl;
dims_
=
other
.
dims_
;
target_
=
other
.
target_
;
lod_
=
other
.
lod_
;
auto
dt
=
zynq_tensor_
->
dataType
()
;
// std::cout << "before dataType\n"
;
auto
shape
=
other
.
zynq_tensor_
->
shape
();
if
(
zynq_tensor_
.
get
()
==
nullptr
)
{
zynq_tensor_
.
reset
(
new
zynqmp
::
Tensor
());
}
auto
dt
=
zynq_tensor_
->
dataType
();
// std::cout << "after dataType\n";
// std::cout << "before resize\n";
Resize
(
other
.
dims
());
auto
shape
=
other
.
zynq_tensor_
->
shape
();
// std::cout << "after resize\n";
zynq_tensor_
->
mutableData
<
void
>
(
zynq_tensor_
->
dataType
(),
shape
);
this
->
ZynqTensor
()
->
copyFrom
(
other
.
ZynqTensor
());
// std::cout << "after mutableData\n";
// std::cout << "ZynqTensor():" << this->ZynqTensor() << std::endl;
// std::cout << "other Tensor():" << other.ZynqTensor() << std::endl;
// this->ZynqTensor()->copyFrom(other.ZynqTensor());
memcpy
(
this
->
ZynqTensor
()
->
data
<
void
>
(),
other
.
ZynqTensor
()
->
data
<
void
>
(),
other
.
ZynqTensor
()
->
shape
().
numel
()
*
sizeof
(
float
));
// memcpy()
// std::cout << "after copyFrom\n";
}
}
// namespace lite
...
...
lite/backends/fpga/lite_tensor.h
浏览文件 @
9d2d3d0f
...
...
@@ -81,6 +81,10 @@ class DDimLite {
return
!
(
a
==
b
);
}
~
DDimLite
()
{
// std::cout << "free DDimLite\n";
}
private:
std
::
vector
<
value_type
>
data_
;
};
...
...
@@ -109,7 +113,12 @@ class TensorLite {
return
zynq_tensor_
->
data
<
R
>
()
+
offset_
;
}
void
Resize
(
const
DDimLite
&
ddim
)
{
dims_
=
ddim
;
}
void
Resize
(
const
DDimLite
&
ddim
)
{
// std::cout << "Resize \n";
// std::cout << "ddim:" << & ddim << std::endl;
dims_
=
ddim
;
// std::cout << "after Reize \n";
}
void
Resize
(
const
std
::
vector
<
int64_t
>
&
x
)
{
dims_
=
DDimLite
(
x
);
}
const
DDimLite
&
dims
()
const
{
return
dims_
;
}
...
...
@@ -142,7 +151,9 @@ class TensorLite {
void
*
mutable_data
(
size_t
memory_size
);
void
*
mutable_data
(
TargetType
target
,
size_t
memory_size
);
const
void
*
raw_data
()
const
{
return
buffer_
->
data
();
}
const
void
*
raw_data
()
const
{
return
buffer_
->
data
();
}
// TODO(chonwhite) delete buffer;
size_t
data_size
()
const
{
return
this
->
dims
().
production
();
}
...
...
@@ -150,7 +161,9 @@ class TensorLite {
size_t
offset
()
const
{
return
offset_
;
}
bool
IsInitialized
()
const
{
return
buffer_
->
data
();
}
bool
IsInitialized
()
const
{
return
buffer_
->
data
();
}
// TODO(chonwhite) delete buffer;
// Other share data to this.
void
ShareDataWith
(
const
TensorLite
&
other
);
...
...
@@ -168,7 +181,7 @@ class TensorLite {
// template <typename T>
// TensorLite Slice(int64_t begin, int64_t end) const;
zynqmp
::
Tensor
*
ZynqTensor
()
const
{
return
zynq_tensor_
;
}
zynqmp
::
Tensor
*
ZynqTensor
()
const
{
return
zynq_tensor_
.
get
()
;
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
TensorLite
&
tensor
)
{
os
<<
"Tensor:"
<<
'\n'
;
...
...
@@ -197,7 +210,8 @@ class TensorLite {
size_t
memory_size_
{};
size_t
offset_
{
0
};
zynqmp
::
Tensor
*
zynq_tensor_
=
new
zynqmp
::
Tensor
();
// zynqmp::Tensor *zynq_tensor_ = new zynqmp::Tensor();
std
::
shared_ptr
<
zynqmp
::
Tensor
>
zynq_tensor_
;
template
<
typename
T
>
void
mutable_data_internal
();
...
...
@@ -206,6 +220,7 @@ class TensorLite {
template
<
typename
T
,
typename
R
>
R
*
TensorLite
::
mutable_data
()
{
std
::
vector
<
int
>
v
;
// std::cout << "mutable_data \n";
for
(
int
i
=
0
;
i
<
dims_
.
size
();
i
++
)
{
v
.
push_back
(
dims_
[
i
]);
}
...
...
@@ -228,7 +243,7 @@ R *TensorLite::mutable_data() {
break
;
}
zynqmp
::
Shape
input_shape
(
layout_type
,
v
);
// std::cout << "input_shape \n";
zynqmp
::
DataType
data_type
=
zynqmp
::
FP32
;
if
(
typeid
(
T
)
==
typeid
(
float
))
{
data_type
=
zynqmp
::
FP32
;
...
...
@@ -236,6 +251,13 @@ R *TensorLite::mutable_data() {
if
(
typeid
(
T
)
==
typeid
(
zynqmp
::
float16
))
{
data_type
=
zynqmp
::
FP16
;
}
// std::cout << "mutableData \n";
// std::cout << "zynq_tensor_:" << zynq_tensor_.get() << std::endl;
if
(
zynq_tensor_
.
get
()
==
nullptr
)
{
zynq_tensor_
.
reset
(
new
zynqmp
::
Tensor
());
}
return
zynq_tensor_
->
mutableData
<
R
>
(
data_type
,
input_shape
);
}
...
...
@@ -276,6 +298,7 @@ TensorLite TensorLite::Slice(int64_t begin, int64_t end) const {
template
<
typename
T
>
void
TensorLite
::
Slice
(
TensorLite
&
dst
,
int64_t
begin
,
int64_t
end
)
const
{
// TODO(chonwhite) delete this function;
CHECK_GE
(
begin
,
0
);
CHECK_LE
(
end
,
dims_
[
0
]);
CHECK_LT
(
begin
,
end
);
...
...
lite/kernels/fpga/feed_compute.cc
浏览文件 @
9d2d3d0f
...
...
@@ -40,8 +40,8 @@ void FeedCompute::PrepareForRun() {
void
FeedCompute
::
Run
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
Tensor
&
x
=
param
.
feed_list
->
at
(
param
.
col
);
pe_
.
param
().
input
=
x
.
ZynqTensor
();
pe_
.
dispatch
();
auto
out_lod
=
param
.
out
->
mutable_lod
();
*
out_lod
=
x
.
lod
();
...
...
lite/kernels/fpga/mul_compute.cc
浏览文件 @
9d2d3d0f
...
...
@@ -80,7 +80,8 @@ void mul(MulCompute* k) {
}
void
MulCompute
::
Run
()
{
pe_
.
dispatch
();
// pe_.dispatch();
mul
(
this
);
#ifdef FPGA_PRINT_TENSOR
zynqmp
::
FullyConnectedParam
&
fc_param
=
pe_
.
param
();
Debugger
::
get_instance
().
registerOutput
(
"mul"
,
fc_param
.
output
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录