提交 b02b1822 编写于 作者: C chonwhite

shared library works

上级 a8b4a533
......@@ -104,3 +104,10 @@ metal/paddle-mobile-demo/paddle-mobile-demo/Resources
metal/paddle-mobile-demo/paddle-mobile-demo/Resources/images
metal/paddle-mobile-demo/paddle-mobile-demo/Resources/models
metal/MobileNetDemo/MobileNetDemo/Resources
# generated files
lite/api/paddle_use_kernels.h
lite/api/paddle_use_ops.h
lite/backends/arm/math/dotprod/gemm_sdot.h
lite/tools/cmake_tools/ast.pyc
......@@ -186,6 +186,8 @@ void Predictor::PrepareFeedFetch() {
}
}
#ifndef LITE_WITH_FPGA
const lite::Tensor *Predictor::GetOutput(size_t offset) const {
CHECK(output_names_.size() > offset)
<< "The network has " << output_names_.size() << " outputs"
......@@ -205,6 +207,29 @@ std::vector<const lite::Tensor *> Predictor::GetOutputs() const {
}
return outputs;
}
#else
const lite::Tensor *Predictor::GetOutput(size_t offset) const {
auto *_fetch_list = exec_scope_->FindVar("fetch");
CHECK(_fetch_list) << "no fatch variable in exec_scope";
auto &fetch_list = *_fetch_list->GetMutable<std::vector<lite::Tensor>>();
CHECK_LT(offset, fetch_list.size()) << "offset " << offset << " overflow";
return &fetch_list.at(offset);
}
std::vector<const lite::Tensor *> Predictor::GetOutputs() const {
auto *_fetch_list = exec_scope_->FindVar("fetch");
CHECK(_fetch_list) << "no fatch variable in exec_scope";
auto &fetch_list = *_fetch_list->GetMutable<std::vector<lite::Tensor>>();
std::vector<const lite::Tensor *> outputs;
for (auto out : fetch_list) {
outputs.push_back(&out);
}
return outputs;
}
#endif
const cpp::ProgramDesc &Predictor::program_desc() const {
return program_desc_;
......
......@@ -33,10 +33,6 @@ class Debugger {
}
void registerOutput(std::string op_type, zynqmp::Tensor* tensor) {
// tensor->printScale();
// if (op_type != "conv") {
// // tensor->saveToFile(op_type, true);
// }
if (op_config[op_type]) {
tensor->saveToFile(op_type, true);
}
......@@ -45,12 +41,19 @@ class Debugger {
private:
std::unordered_map<std::string, bool> op_config;
Debugger() {
op_config["concat"] = true;
op_config["conv"] = true;
op_config["crop"] = true;
op_config["fetch"] = true;
op_config["fc"] = true;
op_config["softmax"] = true;
// op_config["concat"] = true;
// op_config["pooling"] = true;
// op_config["conv"] = true;
// op_config["crop"] = true;
// op_config["feed"] = true;
// op_config["fetch"] = true;
// op_config["boxes"] = true;
// op_config["scores"] = true;
// op_config["nms"] = true;
// op_config["pb_boxes"] = true;
// op_config["pb_variances"] = true;
// // op_config["fc"] = true;
// op_config["softmax"] = true;
}
};
......
......@@ -59,6 +59,8 @@ namespace arm {
template <>
void NCHWToNHWCCompute<PRECISION(kFloat)>::Run() {
NCHWTONHWC(float);
// auto& param = this->template Param<param_t>();
// param.y->ZynqTensor()->copyFrom(param.x->ZynqTensor());
}
template <>
......@@ -69,6 +71,9 @@ void NCHWToNHWCCompute<PRECISION(kInt8)>::Run() {
template <>
void NHWCToNCHWCompute<PRECISION(kFloat)>::Run() {
NHWCTONCHW(float);
// auto& param = this->template Param<param_t>();
// param.y->mutable_data<float>();
// param.y->ZynqTensor()->copyFrom(param.x->ZynqTensor());
}
template <>
......
......@@ -7,7 +7,7 @@ set(fpga_deps fpga_target_wrapper kernel_fpga)
# add_kernel(activation_compute_fpga FPGA basic SRCS activation_compute.cc DEPS ${fpga_deps})
# add_kernel(box_coder_compute_fpga FPGA basic SRCS box_coder_compute.cc DEPS ${fpga_deps})
add_kernel(concat_compute_fpga FPGA basic SRCS concat_compute.cc DEPS ${fpga_deps})
# add_kernel(concat_compute_fpga FPGA basic SRCS concat_compute.cc DEPS ${fpga_deps})
add_kernel(conv_compute_fpga FPGA basic SRCS conv_compute.cc DEPS ${fpga_deps})
# add_kernel(density_prior_box_compute_fpga FPGA basic SRCS density_prior_box_compute.cc DEPS ${fpga_deps})
add_kernel(dropout_compute_fpga FPGA basic SRCS dropout_compute.cc DEPS ${fpga_deps})
......@@ -15,18 +15,18 @@ add_kernel(elementwise_compute_fpga FPGA basic SRCS elementwise_compute.cc DEPS
# add_kernel(feed_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps})
add_kernel(fc_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps})
# add_kernel(gru_compute_fpga FPGA extra SRCS gru_compute.cc DEPS ${fpga_deps})
add_kernel(mul_compute_fpga FPGA basic SRCS mul_compute.cc DEPS ${fpga_deps})
add_kernel(gru_compute_fpga FPGA extra SRCS gru_compute.cc DEPS ${fpga_deps})
# add_kernel(mul_compute_fpga FPGA basic SRCS mul_compute.cc DEPS ${fpga_deps})
add_kernel(multiclass_nms_compute_fpga FPGA basic SRCS multiclass_nms_compute.cc DEPS ${fpga_deps})
add_kernel(norm_compute_fpga FPGA basic SRCS norm_compute.cc DEPS ${fpga_deps})
# add_kernel(im2sequence_compute_fpga FPGA basic SRCS im2sequence_compute.cc DEPS ${fpga_deps})
add_kernel(pooling_compute_fpga FPGA basic SRCS pooling_compute.cc DEPS ${fpga_deps})
add_kernel(prior_box_compute_fpga FPGA basic SRCS prior_box_compute.cc DEPS ${fpga_deps})
add_kernel(reshape_compute_fpga FPGA basic SRCS reshape_compute.cc DEPS ${fpga_deps} reshape_op)
# add_kernel(reshape_compute_fpga FPGA basic SRCS reshape_compute.cc DEPS ${fpga_deps} reshape_op)
# add_kernel(sequence_pool_compute_fpga FPGA basic SRCS sequence_pool_compute.cc DEPS ${fpga_deps})
add_kernel(scale_compute_fpga FPGA basic SRCS scale_compute.cc DEPS ${fpga_deps})
add_kernel(softmax_compute_fpga FPGA basic SRCS softmax_compute.cc DEPS ${fpga_deps})
add_kernel(transpose_compute_fpga FPGA basic SRCS transpose_compute.cc DEPS ${fpga_deps})
# add_kernel(softmax_compute_fpga FPGA basic SRCS softmax_compute.cc DEPS ${fpga_deps})
# add_kernel(transpose_compute_fpga FPGA basic SRCS transpose_compute.cc DEPS ${fpga_deps})
add_kernel(io_copy_compute_fpga FPGA basic SRCS io_copy_compute.cc DEPS ${fpga_deps})
add_kernel(calib_compute_fpga FPGA basic SRCS calib_compute.cc DEPS ${fpga_deps})
......
......@@ -118,6 +118,9 @@ class IoCopyFpgaToHostCompute
param.y->ZynqTensor()->flush();
auto out_lod = param.y->mutable_lod();
*out_lod = param.x->lod();
// param.x->ZynqTensor()->saveToFile("io_x", true);
// param.y->ZynqTensor()->saveToFile("io_y", true);
}
std::string doc() const override { return "Copy IO from FPGA to HOST"; }
......@@ -144,6 +147,21 @@ REGISTER_LITE_KERNEL(io_copy,
DATALAYOUT(kAny))})
.Finalize();
REGISTER_LITE_KERNEL(io_copy,
kFPGA,
kAny,
kAny,
paddle::lite::kernels::fpga::IoCopyHostToFpgaCompute,
host_to_device_any_any)
.BindInput("Input",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny), -1)})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kFPGA),
PRECISION(kFP16),
DATALAYOUT(kNHWC))})
.Finalize();
REGISTER_LITE_KERNEL(io_copy,
kFPGA,
kAny,
......
......@@ -46,9 +46,40 @@ void ReshapeCompute::Run() {
} else {
output->CopyDataFrom(*x);
}
param.x->ZynqTensor()->saveToFile("reshape_in", true);
output->ZynqTensor()->saveToFile("reshape_out", true);
output->Resize(output_dims);
}
// void ReshapeComputeFpgaToHost::Run() {
// auto& param = Param<operators::ReshapeParam>();
// param.output->mutable_data<float>();
// auto x = param.x;
// // auto actual_shape = param.actual_shape;
// Tensor* actual_shape = nullptr; // TODO(chonwhite) change it.
// auto output = param.output;
// bool inplace = param.inplace;
// auto x_dims = x->dims();
// auto output_dims = output->dims();
// if (actual_shape) {
// auto actual_shape_dims = actual_shape->dims();
// auto* actual_shape_data = actual_shape->data<int>();
// auto shape = std::vector<int>(
// actual_shape_data, actual_shape_data +
// actual_shape_dims.production());
// output_dims = lite::operators::ValidateShape(shape, x_dims);
// output->Resize(output_dims);
// }
// if (inplace) {
// output->ShareDataWith(*x);
// } else {
// output->CopyDataFrom(*x);
// }
// output->Resize(output_dims);
// }
} // namespace fpga
} // namespace kernels
} // namespace lite
......
......@@ -30,6 +30,14 @@ class ReshapeCompute
virtual ~ReshapeCompute() = default;
};
class ReshapeComputeFpgaToHost
: public KernelLite<TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)> {
public:
void Run() override;
virtual ~ReshapeComputeFpgaToHost() = default;
};
} // namespace fpga
} // namespace kernels
} // namespace lite
......
......@@ -27,6 +27,62 @@ namespace fpga {
using float16 = zynqmp::float16;
void transposeCompute(operators::TransposeParam param) {
// copy from;
const auto* input_x = param.x;
const auto input_x_dims = input_x->dims();
input_x->ZynqTensor()->invalidate();
input_x->ZynqTensor()->unalignImage();
Tensor float_input;
float_input.Resize(input_x_dims);
float_input.mutable_data<float>();
float_input.ZynqTensor()->copyFrom(input_x->ZynqTensor());
// const auto* input_x_data = input_x->data<float>();
const auto* input_x_data = float_input.data<float>();
// auto& param = this->Param<param_t>();
auto* out = param.output;
const auto axis = param.axis;
auto* out_data = out->mutable_data<float>();
size_t ndim = axis.size();
std::vector<int> xdim(ndim);
std::vector<int> xstride(ndim);
std::vector<int> xout(ndim);
for (int i = 0; i < ndim; i++) {
int j = ndim - 1 - i;
xdim[j] = input_x_dims[axis[i]];
xstride[j] = 1;
for (int k = axis[i] + 1; k < ndim; k++) {
xstride[j] *= input_x_dims[k];
}
xout[j] = xstride[j] * xdim[j];
}
auto numel = input_x->numel();
size_t pind = 0;
std::vector<int> ind(ndim);
for (int i = 0; i < numel; i++) {
out_data[i] = input_x_data[pind];
ind[0]++;
pind += xstride[0];
for (int j = 0; j < ndim - 1; j++) {
if (ind[j] == xdim[j]) {
ind[j + 1]++;
ind[j] = 0;
pind += xstride[j + 1];
pind -= xout[j];
} else {
break;
}
}
}
}
// Transpose
void TransposeCompute::Run() {
auto& param = this->Param<param_t>();
......@@ -40,7 +96,7 @@ void Transpose2Compute::Run() {
param.x->ZynqTensor()->invalidate();
param.x->ZynqTensor()->unalignImage();
if (param.x->dims().size() != 4) {
// TransposeCompute<float>(param);
transposeCompute(param);
// auto out = param.Out();
// auto out_data = out->data<half>();
......@@ -54,6 +110,8 @@ void Transpose2Compute::Run() {
// index++;
// }
// }
// param.output->ZynqTensor()->copyFrom(param.x->ZynqTensor());
} else {
param.x->ZynqTensor()->saveToFile("tx", true);
param.output->ZynqTensor()->copyFrom(param.x->ZynqTensor());
......
......@@ -22,10 +22,10 @@ cmake .. \
-DLITE_WITH_FPGA=ON \
-DLITE_WITH_OPENMP=ON \
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \
-DWITH_TESTING=ON \
-DWITH_TESTING=OFF \
-DARM_TARGET_OS=armlinux \
-DLITE_BUILD_EXTRA=ON \
-DLITE_WITH_PROFILE=ON
-DLITE_WITH_PROFILE=OFF
make -j42
cd -
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册