提交 dd2b700d 编写于 作者: H hanbuhe

merge memory leak into eb1.4.0

...@@ -22,6 +22,9 @@ if (WITH_PADDLE_MOBILE) ...@@ -22,6 +22,9 @@ if (WITH_PADDLE_MOBILE)
return() return()
endif(WITH_PADDLE_MOBILE) endif(WITH_PADDLE_MOBILE)
# set(CMAKE_BUILD_TYPE DEBUG)
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD 11)
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#pragma once #pragma once
#include <fstream>
#include <iostream>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
...@@ -37,8 +39,18 @@ class Debugger { ...@@ -37,8 +39,18 @@ class Debugger {
} }
} }
void tick(std::string key) {
float value = 0;
if (tick_tock_map.count(key) > 0) {
value += tick_tock_map[key] = value;
}
}
void tock(std::string key) {}
private: private:
std::unordered_map<std::string, bool> op_config; std::unordered_map<std::string, bool> op_config;
std::unordered_map<std::string, float> tick_tock_map;
Debugger() { Debugger() {
op_config["concat"] = true; op_config["concat"] = true;
op_config["pooling"] = true; op_config["pooling"] = true;
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include <stdio.h> #include <stdio.h>
#include "lite/backends/fpga/KD/llapi/filter.h" #include "lite/backends/fpga/KD/llapi/filter.h"
#include "lite/backends/fpga/KD/llapi/zynqmp_api.h" #include "lite/backends/fpga/KD/llapi/zynqmp_api.h"
......
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "io.hpp"
namespace paddle {
namespace zynqmp {
// FpgaIO::FpgaIO() {}
// void FpgaIO::setMutex(std::mutex* mtx) { mtx_ = mtx; }
// void FpgaIO::setConditionVariable(std::condition_variable* condition) {
// condition_ = condition;
// }
// void FpgaIO::lock() {
// if (mtx_ != nullptr && !locked_) {
// mtx_->lock();
// locked_ = true;
// }
// }
// void FpgaIO::unlock() {
// if (mtx_ != nullptr) {
// mtx_->unlock();
// condition_->notify_one();
// }
// locked_ = false;
// }
} // namespace zynqmp
} // namespace paddle
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <stdio.h>
// #include <condition_variable>
// #include <mutex>
namespace paddle {
namespace zynqmp {
class FpgaIO {
public:
static FpgaIO& get_instance() {
static FpgaIO s_instance;
return s_instance;
}
void allocData(size_t s) { data_ = new float[s]; }
float* getData() { return data_; }
// void setMutex(std::mutex* mtx);
// void setConditionVariable(std::condition_variable* condition);
// void lock();
// void unlock();
private:
// std::mutex* mtx_ = nullptr;
// std::condition_variable* condition_ = nullptr;
// bool locked_ = false;
float* data_ = nullptr;
FpgaIO();
};
} // namespace zynqmp
} // namespace paddle
...@@ -240,8 +240,8 @@ int8_t* format_filter(float* data_in, ...@@ -240,8 +240,8 @@ int8_t* format_filter(float* data_in,
for (int n = 0; n < num; n++) { for (int n = 0; n < num; n++) {
float* filter_start = data_in + n * chw; float* filter_start = data_in + n * chw;
int8_t* quantized_start = quantized_data + n * chw; int8_t* quantized_start = quantized_data + n * chw;
quantize(filter_start, quantized_start, chw, max); quantize(filter_start, quantized_start, chw, f_max);
filter_max.push_back(1); filter_max.push_back(f_max);
} }
int8_t* hwc_data = int8_t* hwc_data =
......
...@@ -205,7 +205,7 @@ int get_device_info(const struct DeviceInfo &args) { ...@@ -205,7 +205,7 @@ int get_device_info(const struct DeviceInfo &args) {
int perform_bypass(const struct BypassArgs &args) { int perform_bypass(const struct BypassArgs &args) {
int ret = -1; int ret = -1;
int size = args.image.channels * args.image.width * args.image.height; int size = args.image.channels * args.image.width * args.image.height;
int max_size = 1 << 21; int max_size = 1 << 22;
float times = 1.0 * size / max_size; float times = 1.0 * size / max_size;
int count = static_cast<int>(times); int count = static_cast<int>(times);
......
...@@ -14,6 +14,8 @@ limitations under the License. */ ...@@ -14,6 +14,8 @@ limitations under the License. */
#pragma once #pragma once
#include <math.h>
#include <cmath>
#include <vector> #include <vector>
#include "lite/backends/fpga/KD/pe.hpp" #include "lite/backends/fpga/KD/pe.hpp"
...@@ -38,7 +40,6 @@ class FullyConnectedPE : public PE { ...@@ -38,7 +40,6 @@ class FullyConnectedPE : public PE {
Tensor* input = param_.input; Tensor* input = param_.input;
convParam_.input = param_.input; convParam_.input = param_.input;
convParam_.output = param_.output; convParam_.output = param_.output;
// convParam_.relu = param_.relu;
convParam_.activeParam.type = param_.activeParam.type; convParam_.activeParam.type = param_.activeParam.type;
convParam_.groups = 1; convParam_.groups = 1;
convParam_.strides = {1, 1}; convParam_.strides = {1, 1};
...@@ -54,32 +55,42 @@ class FullyConnectedPE : public PE { ...@@ -54,32 +55,42 @@ class FullyConnectedPE : public PE {
int height = param_.input->shape().height(); int height = param_.input->shape().height();
int width = param_.input->shape().width(); int width = param_.input->shape().width();
int filter_channel = chw / height / width; // int filter_channel = chw / height / width;
int channel = param_.output->shape().channel(); int channel = param_.output->shape().channel();
Shape shape(NCHW, {num, filter_channel, height, width}); Shape shape(NCHW, {num, chw_aligned, 1, 1});
Tensor* conv_filter = new Tensor(); float* new_filter_data = conv_filter_.mutableData<float>(FP32, shape);
float* new_filter_data = conv_filter->mutableData<float>(FP32, shape);
float* filter_data = param_.filter->data<float>(); float* filter_data = param_.filter->data<float>();
memset(new_filter_data, 0, num * chw_aligned * sizeof(float));
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
for (int j = 0; j < chw; j++) { for (int j = 0; j < chw; j++) {
float scale = filter_data[j * num + i]; float scale = filter_data[j * num + i];
new_filter_data[i * chw + j] = scale; new_filter_data[i * chw_aligned + j] = scale;
} }
} }
conv_filter->flush(); conv_filter->flush();
convParam_.filter = conv_filter; convParam_.filter = conv_filter;
Shape sb_shape(N, {channel}); conv_filter_.flush();
convParam_.filter = &conv_filter_;
// param_.filter->saveToFile("param_filter", true);
// conv_filter->saveToFile("conv_filter", true);
// exit(-1);
Shape sb_shape(N, {num});
float* scale_data = convParam_.scale()->mutableData<float>(FP32, sb_shape); float* scale_data = convParam_.scale()->mutableData<float>(FP32, sb_shape);
float* bias_data = convParam_.bias()->mutableData<float>(FP32, sb_shape); float* bias_data = convParam_.bias()->mutableData<float>(FP32, sb_shape);
for (int i = 0; i < channel; i++) { for (int i = 0; i < num; i++) {
scale_data[i] = 1.0f; scale_data[i] = 1.0f;
bias_data[i] = param_.bias->data<float>()[i]; bias_data[i] = param_.bias->data<float>()[i];
} }
// for (int i = 0; i < num; i++) {
// scale_data[i] = 1.0f;
// bias_data[i] = param_.bias->data<float>()[i];
// }
convParam_.scale()->flush(); convParam_.scale()->flush();
convParam_.bias()->flush(); convParam_.bias()->flush();
...@@ -115,14 +126,41 @@ class FullyConnectedPE : public PE { ...@@ -115,14 +126,41 @@ class FullyConnectedPE : public PE {
output->flush(); output->flush();
output->scale()[0] = max / 127.0f; output->scale()[0] = max / 127.0f;
output->scale()[1] = 127.0f / max; output->scale()[1] = 127.0f / max;
output->saveToFile("cpu_compute", true);
// exit(-1);
}
void batch_to_w() {
ConvParam& convParam_ = convPE_.param();
int channel = param_.input->shape().channel();
param_.input->invalidate();
int remainder =
aligned_input_.shape().channel() - param_.input->shape().channel();
float max = 0;
for (int n = 0; n < param_.input->shape().num(); n++) {
memset(aligned_input_.data<float16>(),
0,
aligned_input_.shape().channel() * sizeof(float16));
memcpy(
aligned_input_.data<float16>() + n * aligned_input_.shape().channel(),
param_.input->data<float16>() + n * channel,
channel * sizeof(float16));
aligned_input_.copyScaleFrom(param_.input);
aligned_input_.flush();
}
convPE_.dispatch();
} }
bool dispatch() { bool dispatch() {
// int num = param_.filter->shape().channel(); // batch_to_w();
// if (num == 2) { // return 1;
// cpu_compute(); // cpu_compute1();
// return 1; // return 1;
// } else {
return convPE_.dispatch(); return convPE_.dispatch();
// } // }
} }
...@@ -131,7 +169,10 @@ class FullyConnectedPE : public PE { ...@@ -131,7 +169,10 @@ class FullyConnectedPE : public PE {
private: private:
FullyConnectedParam param_; FullyConnectedParam param_;
Tensor aligned_input_;
Tensor aligned_output_;
ConvPE convPE_; ConvPE convPE_;
Tensor conv_filter_;
}; };
} // namespace zynqmp } // namespace zynqmp
} // namespace paddle } // namespace paddle
...@@ -29,6 +29,7 @@ class InputPE : public PE { ...@@ -29,6 +29,7 @@ class InputPE : public PE {
} }
bool dispatch() { bool dispatch() {
// std::cout << "input_dispatch()\n";
Tensor* input = param_.input; Tensor* input = param_.input;
Tensor* output = param_.output; Tensor* output = param_.output;
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include "lite/backends/fpga/KD/llapi/zynqmp_api.h"
#include "lite/backends/fpga/KD/pe.hpp" #include "lite/backends/fpga/KD/pe.hpp"
#include "lite/backends/fpga/KD/pe_params.hpp" #include "lite/backends/fpga/KD/pe_params.hpp"
...@@ -52,6 +53,12 @@ class OutputPE : public PE { ...@@ -52,6 +53,12 @@ class OutputPE : public PE {
memcpy(DLEngine::get_instance().out_data, memcpy(DLEngine::get_instance().out_data,
output->data<void>(), output->data<void>(),
output->shape().numel() * sizeof(float)); output->shape().numel() * sizeof(float));
fpga_reset();
auto max = fpga_get_memory_size_max();
std::cout << "PL ===== Max: ===== :: " << max << std::endl;
return true; return true;
} }
......
...@@ -103,12 +103,18 @@ class Tensor { ...@@ -103,12 +103,18 @@ class Tensor {
return reinterpret_cast<Dtype*>(ptr); return reinterpret_cast<Dtype*>(ptr);
} }
void releaseData() {
released = true;
placeHolder_.reset();
}
template <typename Dtype> template <typename Dtype>
Dtype* mutableData(DataType dataType, const Shape& shape) { Dtype* mutableData(DataType dataType, const Shape& shape) {
if (this->shape_ != nullptr) { // std::cout << "enter \n";
delete shape_; // std::cout << "before new shape\n";
} // this->shape_ = new Shape(shape);
this->shape_ = new Shape(shape); this->shape_.reset(new Shape(shape));
// std::cout << "new shape \n";
this->dataType_ = dataType; this->dataType_ = dataType;
return mutableData<Dtype>(); return mutableData<Dtype>();
} }
...@@ -117,11 +123,14 @@ class Tensor { ...@@ -117,11 +123,14 @@ class Tensor {
Dtype* mutableData() { Dtype* mutableData() {
size_t memorySize = size_t memorySize =
shape_->memorySize(CellSize(dataType_)) * mem_scale_factor_; shape_->memorySize(CellSize(dataType_)) * mem_scale_factor_;
// std::cout << "mem_size:" << memorySize << std::endl;
if (placeHolder_ != nullptr) { if (placeHolder_ != nullptr) {
// std::cout << "placeHolder_ not null"<< std::endl;
if (memorySize > placeHolder_->memorySize()) { if (memorySize > placeHolder_->memorySize()) {
placeHolder_.reset(new PlaceHolder(memorySize)); placeHolder_.reset(new PlaceHolder(memorySize));
} }
} else { } else {
// std::cout << "placeHolder_ null"<< std::endl;
placeHolder_.reset(new PlaceHolder(memorySize)); placeHolder_.reset(new PlaceHolder(memorySize));
} }
return data<Dtype>(); return data<Dtype>();
...@@ -138,7 +147,7 @@ class Tensor { ...@@ -138,7 +147,7 @@ class Tensor {
DataType dataType() { return this->dataType_; } DataType dataType() { return this->dataType_; }
Shape& shape() { return *shape_; } Shape& shape() { return *(shape_.get()); }
bool aligned() { return this->aligned_; } bool aligned() { return this->aligned_; }
...@@ -247,15 +256,17 @@ class Tensor { ...@@ -247,15 +256,17 @@ class Tensor {
void shareDataWith(Tensor* src) { shareDataWith(src, src->shape()); } void shareDataWith(Tensor* src) { shareDataWith(src, src->shape()); }
void shareDataWith(Tensor* src, const Shape& shape, int offset = 0) { void shareDataWith(Tensor* src, const Shape& shape, int offset = 0) {
if (shape_ != nullptr) { // if (shape_ != nullptr) {
delete shape_; // delete shape_;
} // }
this->placeHolder_ = src->placeHolder_; this->placeHolder_ = src->placeHolder_;
this->dataType_ = src->dataType_; this->dataType_ = src->dataType_;
this->aligned_ = src->aligned_; this->aligned_ = src->aligned_;
this->dateLocation_ = src->dateLocation_; this->dateLocation_ = src->dateLocation_;
this->offset = offset; this->offset = offset;
shape_ = new Shape(const_cast<Shape&>(shape)); // shape_ = new Shape(const_cast<Shape&>(shape));
shape_.reset(new Shape(shape));
} }
void copyFrom(Tensor* src) { void copyFrom(Tensor* src) {
...@@ -300,7 +311,13 @@ class Tensor { ...@@ -300,7 +311,13 @@ class Tensor {
} }
void flush() { void flush() {
size_t memorySize = placeHolder_->memorySize(); if (released) {
// std::cout << "flush::" << this << std::endl;
return;
}
size_t memorySize =
shape_->memorySize(CellSize(dataType_)) * mem_scale_factor_;
fpga_flush(placeHolder_->data(), memorySize); fpga_flush(placeHolder_->data(), memorySize);
} }
...@@ -451,18 +468,13 @@ class Tensor { ...@@ -451,18 +468,13 @@ class Tensor {
return os; return os;
} }
~Tensor() {
if (shape_ != nullptr) {
delete shape_;
shape_ = nullptr;
}
}
private: private:
bool released = false;
int offset = 0; int offset = 0;
float mem_scale_factor_ = 1.0f; float mem_scale_factor_ = 1.0f;
std::shared_ptr<PlaceHolder> placeHolder_; std::shared_ptr<PlaceHolder> placeHolder_;
Shape* shape_ = nullptr; std::shared_ptr<Shape> shape_;
// Shape* shape_ = nullptr;
DataType dataType_ = FP32; DataType dataType_ = FP32;
bool aligned_ = false; bool aligned_ = false;
DataSyncStatus synchedStatus_ = Synched; DataSyncStatus synchedStatus_ = Synched;
......
...@@ -69,7 +69,7 @@ std::string DDimLite::repr() const { ...@@ -69,7 +69,7 @@ std::string DDimLite::repr() const {
} }
void TensorLite::ShareDataWith(const TensorLite &other) { void TensorLite::ShareDataWith(const TensorLite &other) {
buffer_ = other.buffer_; buffer_ = other.buffer_; // TODO(chonwhite) delete buffer;
dims_ = other.dims_; dims_ = other.dims_;
zynq_tensor_ = other.zynq_tensor_; zynq_tensor_ = other.zynq_tensor_;
target_ = other.target_; target_ = other.target_;
...@@ -79,10 +79,10 @@ void TensorLite::ShareDataWith(const TensorLite &other) { ...@@ -79,10 +79,10 @@ void TensorLite::ShareDataWith(const TensorLite &other) {
} }
void *TensorLite::mutable_data(size_t memory_size) { void *TensorLite::mutable_data(size_t memory_size) {
memory_size_ = memory_size; memory_size_ = memory_size; // TODO(chonwhite) delete buffer;
buffer_->ResetLazy(target_, memory_size_); buffer_->ResetLazy(target_, memory_size_);
// throw -1; // throw -1;
std::cout << memory_size << std::endl; // std::cout << memory_size << std::endl;
return buffer_->data(); return buffer_->data();
} }
...@@ -92,16 +92,34 @@ void *TensorLite::mutable_data(TargetType target, size_t memory_size) { ...@@ -92,16 +92,34 @@ void *TensorLite::mutable_data(TargetType target, size_t memory_size) {
} }
void TensorLite::CopyDataFrom(const TensorLite &other) { void TensorLite::CopyDataFrom(const TensorLite &other) {
// std::cout << "other11:: "<< &other << std::endl;
dims_ = other.dims_; dims_ = other.dims_;
target_ = other.target_; target_ = other.target_;
lod_ = other.lod_; lod_ = other.lod_;
auto dt = zynq_tensor_->dataType(); // std::cout << "before dataType\n";
auto shape = other.zynq_tensor_->shape(); if (zynq_tensor_.get() == nullptr) {
zynq_tensor_.reset(new zynqmp::Tensor());
}
auto dt = zynq_tensor_->dataType();
// std::cout << "after dataType\n";
// std::cout << "before resize\n";
Resize(other.dims()); Resize(other.dims());
auto shape = other.zynq_tensor_->shape();
// std::cout << "after resize\n";
zynq_tensor_->mutableData<void>(zynq_tensor_->dataType(), shape); zynq_tensor_->mutableData<void>(zynq_tensor_->dataType(), shape);
this->ZynqTensor()->copyFrom(other.ZynqTensor()); // std::cout << "after mutableData\n";
// std::cout << "ZynqTensor():" << this->ZynqTensor() << std::endl;
// std::cout << "other Tensor():" << other.ZynqTensor() << std::endl;
// this->ZynqTensor()->copyFrom(other.ZynqTensor());
memcpy(this->ZynqTensor()->data<void>(),
other.ZynqTensor()->data<void>(),
other.ZynqTensor()->shape().numel() * sizeof(float));
// memcpy()
// std::cout << "after copyFrom\n";
} }
} // namespace lite } // namespace lite
......
...@@ -81,6 +81,10 @@ class DDimLite { ...@@ -81,6 +81,10 @@ class DDimLite {
return !(a == b); return !(a == b);
} }
~DDimLite() {
// std::cout << "free DDimLite\n";
}
private: private:
std::vector<value_type> data_; std::vector<value_type> data_;
}; };
...@@ -109,7 +113,12 @@ class TensorLite { ...@@ -109,7 +113,12 @@ class TensorLite {
return zynq_tensor_->data<R>() + offset_; return zynq_tensor_->data<R>() + offset_;
} }
void Resize(const DDimLite &ddim) { dims_ = ddim; } void Resize(const DDimLite &ddim) {
// std::cout << "Resize \n";
// std::cout << "ddim:" << & ddim << std::endl;
dims_ = ddim;
// std::cout << "after Reize \n";
}
void Resize(const std::vector<int64_t> &x) { dims_ = DDimLite(x); } void Resize(const std::vector<int64_t> &x) { dims_ = DDimLite(x); }
const DDimLite &dims() const { return dims_; } const DDimLite &dims() const { return dims_; }
...@@ -142,7 +151,9 @@ class TensorLite { ...@@ -142,7 +151,9 @@ class TensorLite {
void *mutable_data(size_t memory_size); void *mutable_data(size_t memory_size);
void *mutable_data(TargetType target, size_t memory_size); void *mutable_data(TargetType target, size_t memory_size);
const void *raw_data() const { return buffer_->data(); } const void *raw_data() const {
return buffer_->data();
} // TODO(chonwhite) delete buffer;
size_t data_size() const { return this->dims().production(); } size_t data_size() const { return this->dims().production(); }
...@@ -150,7 +161,9 @@ class TensorLite { ...@@ -150,7 +161,9 @@ class TensorLite {
size_t offset() const { return offset_; } size_t offset() const { return offset_; }
bool IsInitialized() const { return buffer_->data(); } bool IsInitialized() const {
return buffer_->data();
} // TODO(chonwhite) delete buffer;
// Other share data to this. // Other share data to this.
void ShareDataWith(const TensorLite &other); void ShareDataWith(const TensorLite &other);
...@@ -165,7 +178,10 @@ class TensorLite { ...@@ -165,7 +178,10 @@ class TensorLite {
TargetType target() const { return target_; } TargetType target() const { return target_; }
zynqmp::Tensor *ZynqTensor() const { return zynq_tensor_; } // template <typename T>
// TensorLite Slice(int64_t begin, int64_t end) const;
zynqmp::Tensor *ZynqTensor() const { return zynq_tensor_.get(); }
friend std::ostream &operator<<(std::ostream &os, const TensorLite &tensor) { friend std::ostream &operator<<(std::ostream &os, const TensorLite &tensor) {
os << "Tensor:" << '\n'; os << "Tensor:" << '\n';
...@@ -194,7 +210,8 @@ class TensorLite { ...@@ -194,7 +210,8 @@ class TensorLite {
size_t memory_size_{}; size_t memory_size_{};
size_t offset_{0}; size_t offset_{0};
zynqmp::Tensor *zynq_tensor_ = new zynqmp::Tensor(); // zynqmp::Tensor *zynq_tensor_ = new zynqmp::Tensor();
std::shared_ptr<zynqmp::Tensor> zynq_tensor_;
template <typename T> template <typename T>
void mutable_data_internal(); void mutable_data_internal();
...@@ -203,6 +220,7 @@ class TensorLite { ...@@ -203,6 +220,7 @@ class TensorLite {
template <typename T, typename R> template <typename T, typename R>
R *TensorLite::mutable_data() { R *TensorLite::mutable_data() {
std::vector<int> v; std::vector<int> v;
// std::cout << "mutable_data \n";
for (int i = 0; i < dims_.size(); i++) { for (int i = 0; i < dims_.size(); i++) {
v.push_back(dims_[i]); v.push_back(dims_[i]);
} }
...@@ -225,7 +243,7 @@ R *TensorLite::mutable_data() { ...@@ -225,7 +243,7 @@ R *TensorLite::mutable_data() {
break; break;
} }
zynqmp::Shape input_shape(layout_type, v); zynqmp::Shape input_shape(layout_type, v);
// std::cout << "input_shape \n";
zynqmp::DataType data_type = zynqmp::FP32; zynqmp::DataType data_type = zynqmp::FP32;
if (typeid(T) == typeid(float)) { if (typeid(T) == typeid(float)) {
data_type = zynqmp::FP32; data_type = zynqmp::FP32;
...@@ -233,6 +251,13 @@ R *TensorLite::mutable_data() { ...@@ -233,6 +251,13 @@ R *TensorLite::mutable_data() {
if (typeid(T) == typeid(zynqmp::float16)) { if (typeid(T) == typeid(zynqmp::float16)) {
data_type = zynqmp::FP16; data_type = zynqmp::FP16;
} }
// std::cout << "mutableData \n";
// std::cout << "zynq_tensor_:" << zynq_tensor_.get() << std::endl;
if (zynq_tensor_.get() == nullptr) {
zynq_tensor_.reset(new zynqmp::Tensor());
}
return zynq_tensor_->mutableData<R>(data_type, input_shape); return zynq_tensor_->mutableData<R>(data_type, input_shape);
} }
...@@ -272,6 +297,7 @@ TensorLite TensorLite::Slice(int64_t begin, int64_t end) const { ...@@ -272,6 +297,7 @@ TensorLite TensorLite::Slice(int64_t begin, int64_t end) const {
template <typename T> template <typename T>
void TensorLite::Slice(TensorLite &dst, int64_t begin, int64_t end) const { void TensorLite::Slice(TensorLite &dst, int64_t begin, int64_t end) const {
// TODO(chonwhite) delete this function;
CHECK_GE(begin, 0); CHECK_GE(begin, 0);
CHECK_LE(end, dims_[0]); CHECK_LE(end, dims_[0]);
CHECK_LT(begin, end); CHECK_LT(begin, end);
......
...@@ -59,6 +59,7 @@ void SequencePoolCompute::Run() { ...@@ -59,6 +59,7 @@ void SequencePoolCompute::Run() {
for (int i = 0; i <= batch_size; i++) { for (int i = 0; i <= batch_size; i++) {
offset_new[i] = i; offset_new[i] = i;
} }
(output->mutable_lod())->clear();
(output->mutable_lod())->push_back(offset_new); (output->mutable_lod())->push_back(offset_new);
} }
......
...@@ -14,7 +14,6 @@ add_kernel(conv_compute_fpga FPGA basic SRCS conv_compute.cc DEPS ${fpga_deps}) ...@@ -14,7 +14,6 @@ add_kernel(conv_compute_fpga FPGA basic SRCS conv_compute.cc DEPS ${fpga_deps})
# add_kernel(density_prior_box_compute_fpga FPGA basic SRCS density_prior_box_compute.cc DEPS ${fpga_deps}) # add_kernel(density_prior_box_compute_fpga FPGA basic SRCS density_prior_box_compute.cc DEPS ${fpga_deps})
add_kernel(dropout_compute_fpga FPGA basic SRCS dropout_compute.cc DEPS ${fpga_deps}) add_kernel(dropout_compute_fpga FPGA basic SRCS dropout_compute.cc DEPS ${fpga_deps})
add_kernel(elementwise_compute_fpga FPGA basic SRCS elementwise_compute.cc DEPS ${fpga_deps}) add_kernel(elementwise_compute_fpga FPGA basic SRCS elementwise_compute.cc DEPS ${fpga_deps})
# add_kernel(feed_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps})
add_kernel(fc_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps}) add_kernel(fc_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps})
add_kernel(gru_compute_fpga FPGA extra SRCS gru_compute.cc DEPS ${fpga_deps}) add_kernel(gru_compute_fpga FPGA extra SRCS gru_compute.cc DEPS ${fpga_deps})
......
...@@ -40,8 +40,8 @@ void FeedCompute::PrepareForRun() { ...@@ -40,8 +40,8 @@ void FeedCompute::PrepareForRun() {
void FeedCompute::Run() { void FeedCompute::Run() {
auto& param = this->Param<param_t>(); auto& param = this->Param<param_t>();
Tensor& x = param.feed_list->at(param.col); Tensor& x = param.feed_list->at(param.col);
pe_.param().input = x.ZynqTensor();
pe_.dispatch(); pe_.dispatch();
auto out_lod = param.out->mutable_lod(); auto out_lod = param.out->mutable_lod();
*out_lod = x.lod(); *out_lod = x.lod();
......
...@@ -82,6 +82,6 @@ REGISTER_LITE_KERNEL(fetch, ...@@ -82,6 +82,6 @@ REGISTER_LITE_KERNEL(fetch,
kNHWC, kNHWC,
paddle::lite::kernels::fpga::FetchCompute, paddle::lite::kernels::fpga::FetchCompute,
host_host) host_host)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))}) .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize(); .Finalize();
...@@ -80,7 +80,8 @@ void mul(MulCompute* k) { ...@@ -80,7 +80,8 @@ void mul(MulCompute* k) {
} }
void MulCompute::Run() { void MulCompute::Run() {
pe_.dispatch(); // pe_.dispatch();
mul(this);
#ifdef FPGA_PRINT_TENSOR #ifdef FPGA_PRINT_TENSOR
zynqmp::FullyConnectedParam& fc_param = pe_.param(); zynqmp::FullyConnectedParam& fc_param = pe_.param();
Debugger::get_instance().registerOutput("mul", fc_param.output); Debugger::get_instance().registerOutput("mul", fc_param.output);
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "lite/backends/fpga/KD/debugger.hpp" // #include "lite/backends/fpga/KD/debugger.hpp"
#include "lite/kernels/host/one_hot_compute.h" #include "lite/kernels/host/one_hot_compute.h"
#include "lite/utils/paddle_enforce.h" #include "lite/utils/paddle_enforce.h"
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "lite/operators/one_hot_op.h" #include "lite/operators/one_hot_op.h"
#include "lite/core/op_registry.h" #include "lite/core/op_registry.h"
#include "lite/backends/fpga/KD/debugger.hpp" // #include "lite/backends/fpga/KD/debugger.hpp"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册