提交 9d2d3d0f 编写于 作者: C chonwhite

fixed memory leak

上级 9c15846a
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#pragma once #pragma once
#include <fstream>
#include <iostream>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
...@@ -37,8 +39,18 @@ class Debugger { ...@@ -37,8 +39,18 @@ class Debugger {
} }
} }
void tick(std::string key) {
float value = 0;
if (tick_tock_map.count(key) > 0) {
value += tick_tock_map[key] = value;
}
}
void tock(std::string key) {}
private: private:
std::unordered_map<std::string, bool> op_config; std::unordered_map<std::string, bool> op_config;
std::unordered_map<std::string, float> tick_tock_map;
Debugger() { Debugger() {
op_config["concat"] = true; op_config["concat"] = true;
op_config["pooling"] = true; op_config["pooling"] = true;
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include <stdio.h> #include <stdio.h>
#include "lite/backends/fpga/KD/llapi/filter.h" #include "lite/backends/fpga/KD/llapi/filter.h"
#include "lite/backends/fpga/KD/llapi/zynqmp_api.h" #include "lite/backends/fpga/KD/llapi/zynqmp_api.h"
......
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "io.hpp"
namespace paddle {
namespace zynqmp {
// FpgaIO::FpgaIO() {}
// void FpgaIO::setMutex(std::mutex* mtx) { mtx_ = mtx; }
// void FpgaIO::setConditionVariable(std::condition_variable* condition) {
// condition_ = condition;
// }
// void FpgaIO::lock() {
// if (mtx_ != nullptr && !locked_) {
// mtx_->lock();
// locked_ = true;
// }
// }
// void FpgaIO::unlock() {
// if (mtx_ != nullptr) {
// mtx_->unlock();
// condition_->notify_one();
// }
// locked_ = false;
// }
} // namespace zynqmp
} // namespace paddle
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <stdio.h>
// #include <condition_variable>
// #include <mutex>
namespace paddle {
namespace zynqmp {
class FpgaIO {
public:
static FpgaIO& get_instance() {
static FpgaIO s_instance;
return s_instance;
}
void allocData(size_t s) { data_ = new float[s]; }
float* getData() { return data_; }
// void setMutex(std::mutex* mtx);
// void setConditionVariable(std::condition_variable* condition);
// void lock();
// void unlock();
private:
std::mutex* mtx_ = nullptr;
std::condition_variable* condition_ = nullptr;
bool locked_ = false;
float* data_ = nullptr;
FpgaIO();
};
} // namespace zynqmp
} // namespace paddle
...@@ -14,6 +14,8 @@ limitations under the License. */ ...@@ -14,6 +14,8 @@ limitations under the License. */
#pragma once #pragma once
#include <math.h>
#include <cmath>
#include <vector> #include <vector>
#include "lite/backends/fpga/KD/pe.hpp" #include "lite/backends/fpga/KD/pe.hpp"
...@@ -37,10 +39,9 @@ class FullyConnectedPE : public PE { ...@@ -37,10 +39,9 @@ class FullyConnectedPE : public PE {
ConvParam& convParam_ = convPE_.param(); ConvParam& convParam_ = convPE_.param();
Tensor* input = param_.input; Tensor* input = param_.input;
convParam_.input = param_.input; convParam_.input = param_.input;
num_ = param_.input->shape().num();
convParam_.output = param_.output; convParam_.output = param_.output;
convParam_.relu = param_.relu;
// convParam_.activeParam.type = param_.activeParam.type;
convParam_.groups = 1; convParam_.groups = 1;
convParam_.strides = {1, 1}; convParam_.strides = {1, 1};
convParam_.paddings = {0, 0}; convParam_.paddings = {0, 0};
...@@ -49,34 +50,54 @@ class FullyConnectedPE : public PE { ...@@ -49,34 +50,54 @@ class FullyConnectedPE : public PE {
int num = param_.filter->shape().channel(); int num = param_.filter->shape().channel();
int chw = param_.filter->shape().num(); int chw = param_.filter->shape().num();
int align = 32;
int chw_aligned = ((chw + align - 1) / align) * align;
int infer_num = 1;
Shape in_shape(NCHW, {infer_num, chw_aligned, 1, 1});
aligned_input_.mutableData<float16>(FP16, in_shape);
convParam_.input = &aligned_input_;
Shape out_shape(NCHW, {infer_num, num, 1, 1});
aligned_output_.mutableData<float16>(FP16, out_shape);
convParam_.output = &aligned_output_;
int height = param_.input->shape().height(); int height = param_.input->shape().height();
int width = param_.input->shape().width(); int width = param_.input->shape().width();
int filter_channel = chw / height / width; // int filter_channel = chw / height / width;
int channel = param_.output->shape().channel(); int channel = param_.output->shape().channel();
Shape shape(NCHW, {num, filter_channel, height, width}); Shape shape(NCHW, {num, chw_aligned, 1, 1});
Tensor* conv_filter = new Tensor(); float* new_filter_data = conv_filter_.mutableData<float>(FP32, shape);
float* new_filter_data = conv_filter->mutableData<float>(FP32, shape);
float* filter_data = param_.filter->data<float>(); float* filter_data = param_.filter->data<float>();
memset(new_filter_data, 0, num * chw_aligned * sizeof(float));
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
for (int j = 0; j < chw; j++) { for (int j = 0; j < chw; j++) {
float scale = filter_data[j * num + i]; float scale = filter_data[j * num + i];
new_filter_data[i * chw + j] = scale; new_filter_data[i * chw_aligned + j] = scale;
} }
} }
conv_filter->flush();
convParam_.filter = conv_filter;
Shape sb_shape(N, {channel}); conv_filter_.flush();
convParam_.filter = &conv_filter_;
// param_.filter->saveToFile("param_filter", true);
// conv_filter->saveToFile("conv_filter", true);
// exit(-1);
Shape sb_shape(N, {num});
float* scale_data = convParam_.scale()->mutableData<float>(FP32, sb_shape); float* scale_data = convParam_.scale()->mutableData<float>(FP32, sb_shape);
float* bias_data = convParam_.bias()->mutableData<float>(FP32, sb_shape); float* bias_data = convParam_.bias()->mutableData<float>(FP32, sb_shape);
for (int i = 0; i < channel; i++) { for (int i = 0; i < num; i++) {
scale_data[i] = 1.0f; scale_data[i] = 1.0f;
bias_data[i] = param_.bias->data<float>()[i]; bias_data[i] = param_.bias->data<float>()[i];
} }
// for (int i = 0; i < num; i++) {
// scale_data[i] = 1.0f;
// bias_data[i] = param_.bias->data<float>()[i];
// }
convParam_.scale()->flush(); convParam_.scale()->flush();
convParam_.bias()->flush(); convParam_.bias()->flush();
...@@ -84,15 +105,197 @@ class FullyConnectedPE : public PE { ...@@ -84,15 +105,197 @@ class FullyConnectedPE : public PE {
convPE_.apply(); convPE_.apply();
} }
bool dispatch() { return convPE_.dispatch(); } void cpu_compute() {
int num = param_.filter->shape().channel();
int chw = param_.filter->shape().num();
float* filter_data = param_.filter->data<float>();
float max = 0.0f;
Tensor* input = param_.input;
Tensor* output = param_.output;
float16* input_data = input->data<float16>();
float16* output_data = output->data<float16>();
for (int n = 0; n < input->shape().num(); n++) {
float16* input_data = input->data<float16>() + n * chw;
float16* output_data =
output->data<float16>() + n * output->shape().channel();
for (int i = 0; i < num; i++) {
float sum = 0;
float bias = param_.bias->data<float>()[i];
for (int j = 0; j < chw; j++) {
float scale = filter_data[j * num + i];
float data = half_to_float(input_data[j]);
sum += scale * data;
}
output_data[i] = float_to_half(sum + bias);
if (max < output_data[i]) {
max = output_data[i];
}
}
}
output->flush();
output->scale()[0] = max / 127.0f;
output->scale()[1] = 127.0f / max;
}
void cpu_compute1() {
int num = conv_filter_.shape().num();
int chw = conv_filter_.shape().channel();
// chw = 336;
float* filter_data = conv_filter_.data<float>();
float max = 0.0f;
Tensor* input = param_.input;
Tensor* output = param_.output;
float16* input_data = input->data<float16>();
float16* output_data = output->data<float16>();
for (int n = 0; n < input->shape().num(); n++) {
float16* input_data = input->data<float16>() + n * chw;
float16* output_data =
output->data<float16>() + n * output->shape().channel();
for (int i = 0; i < num; i++) {
float sum = 0;
float bias = param_.bias->data<float>()[i];
for (int j = 0; j < chw; j++) {
float scale = filter_data[i * chw + j];
float data = half_to_float(input_data[j]);
sum += scale * data;
}
float value = sum + bias;
if (std::isinf(value) || i > 321) {
std::cout << "i:" << i << " sum:" << sum << " bias:" << bias
<< std::endl;
// exit(-1);
}
if (i > 321) {
std::cout << "i:" << i << " sum:" << sum << " bias:" << bias
<< std::endl;
// exit(-1);
}
output_data[i] = float_to_half(value);
if (max < value) {
max = value;
}
}
}
output->flush();
output->scale()[0] = max / 127.0f;
output->scale()[1] = 127.0f / max;
output->saveToFile("cpu_compute", true);
// exit(-1);
}
void batch_to_w() {
ConvParam& convParam_ = convPE_.param();
int channel = param_.input->shape().channel();
param_.input->invalidate();
int remainder =
aligned_input_.shape().channel() - param_.input->shape().channel();
float max = 0;
for (int n = 0; n < param_.input->shape().num(); n++) {
memset(aligned_input_.data<float16>(),
0,
aligned_input_.shape().channel() * sizeof(float16));
memcpy(
aligned_input_.data<float16>() + n * aligned_input_.shape().channel(),
param_.input->data<float16>() + n * channel,
channel * sizeof(float16));
aligned_input_.copyScaleFrom(param_.input);
aligned_input_.flush();
}
convPE_.dispatch();
}
bool dispatch() {
// batch_to_w();
// return 1;
// cpu_compute1();
// return 1;
// int num = param_.filter->shape().channel();
// if (num == 2) {
// cpu_compute();
// return 1;
// } else {
// return convPE_.dispatch();
// }
ConvParam& convParam_ = convPE_.param();
if (param_.input->shape().channel() == 321 &&
param_.output->shape().channel() == 384) {
// conv_filter_.saveToFile("conv_filter", true);
// cpu_compute1();
// return 1;
}
int channel = param_.input->shape().channel();
param_.input->invalidate();
int remainder =
aligned_input_.shape().channel() - param_.input->shape().channel();
float max = 0;
for (int n = 0; n < param_.input->shape().num(); n++) {
memset(aligned_input_.data<float16>(),
0,
aligned_input_.shape().channel() * sizeof(float16));
memcpy(aligned_input_.data<float16>(),
param_.input->data<float16>() + n * channel,
channel * sizeof(float16));
aligned_input_.copyScaleFrom(param_.input);
aligned_input_.flush();
if (param_.input->shape().channel() == 321 &&
param_.output->shape().channel() == 384) {
// aligned_input_.saveToFile("aligned_input_", true);
// convParam_.filter->saveToFile("conv_filter", true);
}
convPE_.dispatch();
aligned_output_.invalidate();
if (param_.input->shape().num() == 230) {
// aligned_output_.saveToFile("ao", true);
}
//
float16* src = aligned_output_.data<float16>();
float16* dst =
param_.output->data<float16>() + n * param_.output->shape().channel();
memcpy(dst, src, param_.output->shape().channel() * sizeof(float16));
if (aligned_output_.scale()[0] > max) {
max = aligned_output_.scale()[0];
}
}
param_.output->flush();
param_.output->scale()[0] = max / 127.0f;
param_.output->scale()[1] = 127.0f / max;
// param_.output->saveToFile("out", true);
// exit(-1);
// cpu_compute();
// ConvParam& convParam_ = convPE_.param();
// convParam_.scale()->saveToFile("scale", true);
return true;
}
FullyConnectedParam& param() { return param_; } FullyConnectedParam& param() { return param_; }
private: private:
FullyConnectedParam param_; FullyConnectedParam param_;
Tensor aligned_input_;
Tensor aligned_output_;
ConvPE convPE_; ConvPE convPE_;
Tensor tempOut_; Tensor conv_filter_;
int num_ = 1;
}; };
} // namespace zynqmp } // namespace zynqmp
} // namespace paddle } // namespace paddle
...@@ -29,19 +29,28 @@ class InputPE : public PE { ...@@ -29,19 +29,28 @@ class InputPE : public PE {
} }
bool dispatch() { bool dispatch() {
std::cout << "input_dispatch()\n";
Tensor* input = param_.input; Tensor* input = param_.input;
Tensor* output = param_.output; Tensor* output = param_.output;
Tensor* src = input; Tensor* src = input;
// std::cout << "input:" << input << std::endl;
input->flush(); input->flush();
// std::cout << "input_flush()\n";
Tensor half_tensor; Tensor half_tensor;
if (input->dataType() == DataType::FP32) { if (input->dataType() == DataType::FP32) {
// std::cout << "2()\n";
half_tensor.mutableData<void*>(DataType::FP16, input->shape()); half_tensor.mutableData<void*>(DataType::FP16, input->shape());
// std::cout << "3()\n";
half_tensor.copyFrom(input); half_tensor.copyFrom(input);
// std::cout << "4()\n";
src = &half_tensor; src = &half_tensor;
} }
// std::cout << "5()\n";
output->mutableData<void>(); output->mutableData<void>();
// std::cout << "6()\n";
src->alignImage(output, true); src->alignImage(output, true);
// std::cout << "7()\n";
return true; return true;
} }
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include "lite/backends/fpga/KD/llapi/zynqmp_api.h"
#include "lite/backends/fpga/KD/pe.hpp" #include "lite/backends/fpga/KD/pe.hpp"
#include "lite/backends/fpga/KD/pe_params.hpp" #include "lite/backends/fpga/KD/pe_params.hpp"
...@@ -52,6 +53,10 @@ class OutputPE : public PE { ...@@ -52,6 +53,10 @@ class OutputPE : public PE {
memcpy(DLEngine::get_instance().out_data, memcpy(DLEngine::get_instance().out_data,
output->data<void>(), output->data<void>(),
output->shape().numel() * sizeof(float)); output->shape().numel() * sizeof(float));
// auto max = fpga_get_memory_size_max();
// std::cout << "===== Max: ===== :: " << max << std::endl;
return true; return true;
} }
......
...@@ -103,12 +103,18 @@ class Tensor { ...@@ -103,12 +103,18 @@ class Tensor {
return reinterpret_cast<Dtype*>(ptr); return reinterpret_cast<Dtype*>(ptr);
} }
void releaseData() {
released = true;
placeHolder_.reset();
}
template <typename Dtype> template <typename Dtype>
Dtype* mutableData(DataType dataType, const Shape& shape) { Dtype* mutableData(DataType dataType, const Shape& shape) {
if (this->shape_ != nullptr) { // std::cout << "enter \n";
delete shape_; // std::cout << "before new shape\n";
} // this->shape_ = new Shape(shape);
this->shape_ = new Shape(shape); this->shape_.reset(new Shape(shape));
// std::cout << "new shape \n";
this->dataType_ = dataType; this->dataType_ = dataType;
return mutableData<Dtype>(); return mutableData<Dtype>();
} }
...@@ -117,11 +123,14 @@ class Tensor { ...@@ -117,11 +123,14 @@ class Tensor {
Dtype* mutableData() { Dtype* mutableData() {
size_t memorySize = size_t memorySize =
shape_->memorySize(CellSize(dataType_)) * mem_scale_factor_; shape_->memorySize(CellSize(dataType_)) * mem_scale_factor_;
// std::cout << "mem_size:" << memorySize << std::endl;
if (placeHolder_ != nullptr) { if (placeHolder_ != nullptr) {
// std::cout << "placeHolder_ not null"<< std::endl;
if (memorySize > placeHolder_->memorySize()) { if (memorySize > placeHolder_->memorySize()) {
placeHolder_.reset(new PlaceHolder(memorySize)); placeHolder_.reset(new PlaceHolder(memorySize));
} }
} else { } else {
// std::cout << "placeHolder_ null"<< std::endl;
placeHolder_.reset(new PlaceHolder(memorySize)); placeHolder_.reset(new PlaceHolder(memorySize));
} }
return data<Dtype>(); return data<Dtype>();
...@@ -138,7 +147,7 @@ class Tensor { ...@@ -138,7 +147,7 @@ class Tensor {
DataType dataType() { return this->dataType_; } DataType dataType() { return this->dataType_; }
Shape& shape() { return *shape_; } Shape& shape() { return *(shape_.get()); }
bool aligned() { return this->aligned_; } bool aligned() { return this->aligned_; }
...@@ -247,15 +256,17 @@ class Tensor { ...@@ -247,15 +256,17 @@ class Tensor {
void shareDataWith(Tensor* src) { shareDataWith(src, src->shape()); } void shareDataWith(Tensor* src) { shareDataWith(src, src->shape()); }
void shareDataWith(Tensor* src, const Shape& shape, int offset = 0) { void shareDataWith(Tensor* src, const Shape& shape, int offset = 0) {
if (shape_ != nullptr) { // if (shape_ != nullptr) {
delete shape_; // delete shape_;
} // }
this->placeHolder_ = src->placeHolder_; this->placeHolder_ = src->placeHolder_;
this->dataType_ = src->dataType_; this->dataType_ = src->dataType_;
this->aligned_ = src->aligned_; this->aligned_ = src->aligned_;
this->dateLocation_ = src->dateLocation_; this->dateLocation_ = src->dateLocation_;
this->offset = offset; this->offset = offset;
shape_ = new Shape(const_cast<Shape&>(shape)); // shape_ = new Shape(const_cast<Shape&>(shape));
shape_.reset(new Shape(shape));
} }
void copyFrom(Tensor* src) { void copyFrom(Tensor* src) {
...@@ -300,6 +311,14 @@ class Tensor { ...@@ -300,6 +311,14 @@ class Tensor {
} }
void flush() { void flush() {
// std::cout << "released:" << released << std::endl;
// std::cout << "placeHolder_" << placeHolder_.get() << std::endl;
if (released) {
// std::cout << "flush::" << this << std::endl;
return;
}
size_t memorySize = size_t memorySize =
shape_->memorySize(CellSize(dataType_)) * mem_scale_factor_; shape_->memorySize(CellSize(dataType_)) * mem_scale_factor_;
fpga_flush(placeHolder_->data(), memorySize); fpga_flush(placeHolder_->data(), memorySize);
...@@ -463,18 +482,13 @@ class Tensor { ...@@ -463,18 +482,13 @@ class Tensor {
return os; return os;
} }
~Tensor() {
if (shape_ != nullptr) {
delete shape_;
shape_ = nullptr;
}
}
private: private:
bool released = false;
int offset = 0; int offset = 0;
float mem_scale_factor_ = 1.0f; float mem_scale_factor_ = 1.0f;
std::shared_ptr<PlaceHolder> placeHolder_; std::shared_ptr<PlaceHolder> placeHolder_;
Shape* shape_ = nullptr; std::shared_ptr<Shape> shape_;
// Shape* shape_ = nullptr;
DataType dataType_ = FP32; DataType dataType_ = FP32;
bool aligned_ = false; bool aligned_ = false;
DataSyncStatus synchedStatus_ = Synched; DataSyncStatus synchedStatus_ = Synched;
......
...@@ -69,7 +69,7 @@ std::string DDimLite::repr() const { ...@@ -69,7 +69,7 @@ std::string DDimLite::repr() const {
} }
void TensorLite::ShareDataWith(const TensorLite &other) { void TensorLite::ShareDataWith(const TensorLite &other) {
buffer_ = other.buffer_; buffer_ = other.buffer_; // TODO(chonwhite) delete buffer;
dims_ = other.dims_; dims_ = other.dims_;
zynq_tensor_ = other.zynq_tensor_; zynq_tensor_ = other.zynq_tensor_;
target_ = other.target_; target_ = other.target_;
...@@ -79,10 +79,10 @@ void TensorLite::ShareDataWith(const TensorLite &other) { ...@@ -79,10 +79,10 @@ void TensorLite::ShareDataWith(const TensorLite &other) {
} }
void *TensorLite::mutable_data(size_t memory_size) { void *TensorLite::mutable_data(size_t memory_size) {
memory_size_ = memory_size; memory_size_ = memory_size; // TODO(chonwhite) delete buffer;
buffer_->ResetLazy(target_, memory_size_); buffer_->ResetLazy(target_, memory_size_);
// throw -1; // throw -1;
std::cout << memory_size << std::endl; // std::cout << memory_size << std::endl;
return buffer_->data(); return buffer_->data();
} }
...@@ -92,16 +92,34 @@ void *TensorLite::mutable_data(TargetType target, size_t memory_size) { ...@@ -92,16 +92,34 @@ void *TensorLite::mutable_data(TargetType target, size_t memory_size) {
} }
void TensorLite::CopyDataFrom(const TensorLite &other) { void TensorLite::CopyDataFrom(const TensorLite &other) {
// std::cout << "other11:: "<< &other << std::endl;
dims_ = other.dims_; dims_ = other.dims_;
target_ = other.target_; target_ = other.target_;
lod_ = other.lod_; lod_ = other.lod_;
auto dt = zynq_tensor_->dataType(); // std::cout << "before dataType\n";
auto shape = other.zynq_tensor_->shape(); if (zynq_tensor_.get() == nullptr) {
zynq_tensor_.reset(new zynqmp::Tensor());
}
auto dt = zynq_tensor_->dataType();
// std::cout << "after dataType\n";
// std::cout << "before resize\n";
Resize(other.dims()); Resize(other.dims());
auto shape = other.zynq_tensor_->shape();
// std::cout << "after resize\n";
zynq_tensor_->mutableData<void>(zynq_tensor_->dataType(), shape); zynq_tensor_->mutableData<void>(zynq_tensor_->dataType(), shape);
this->ZynqTensor()->copyFrom(other.ZynqTensor()); // std::cout << "after mutableData\n";
// std::cout << "ZynqTensor():" << this->ZynqTensor() << std::endl;
// std::cout << "other Tensor():" << other.ZynqTensor() << std::endl;
// this->ZynqTensor()->copyFrom(other.ZynqTensor());
memcpy(this->ZynqTensor()->data<void>(),
other.ZynqTensor()->data<void>(),
other.ZynqTensor()->shape().numel() * sizeof(float));
// memcpy()
// std::cout << "after copyFrom\n";
} }
} // namespace lite } // namespace lite
......
...@@ -81,6 +81,10 @@ class DDimLite { ...@@ -81,6 +81,10 @@ class DDimLite {
return !(a == b); return !(a == b);
} }
~DDimLite() {
// std::cout << "free DDimLite\n";
}
private: private:
std::vector<value_type> data_; std::vector<value_type> data_;
}; };
...@@ -109,7 +113,12 @@ class TensorLite { ...@@ -109,7 +113,12 @@ class TensorLite {
return zynq_tensor_->data<R>() + offset_; return zynq_tensor_->data<R>() + offset_;
} }
void Resize(const DDimLite &ddim) { dims_ = ddim; } void Resize(const DDimLite &ddim) {
// std::cout << "Resize \n";
// std::cout << "ddim:" << & ddim << std::endl;
dims_ = ddim;
// std::cout << "after Reize \n";
}
void Resize(const std::vector<int64_t> &x) { dims_ = DDimLite(x); } void Resize(const std::vector<int64_t> &x) { dims_ = DDimLite(x); }
const DDimLite &dims() const { return dims_; } const DDimLite &dims() const { return dims_; }
...@@ -142,7 +151,9 @@ class TensorLite { ...@@ -142,7 +151,9 @@ class TensorLite {
void *mutable_data(size_t memory_size); void *mutable_data(size_t memory_size);
void *mutable_data(TargetType target, size_t memory_size); void *mutable_data(TargetType target, size_t memory_size);
const void *raw_data() const { return buffer_->data(); } const void *raw_data() const {
return buffer_->data();
} // TODO(chonwhite) delete buffer;
size_t data_size() const { return this->dims().production(); } size_t data_size() const { return this->dims().production(); }
...@@ -150,7 +161,9 @@ class TensorLite { ...@@ -150,7 +161,9 @@ class TensorLite {
size_t offset() const { return offset_; } size_t offset() const { return offset_; }
bool IsInitialized() const { return buffer_->data(); } bool IsInitialized() const {
return buffer_->data();
} // TODO(chonwhite) delete buffer;
// Other share data to this. // Other share data to this.
void ShareDataWith(const TensorLite &other); void ShareDataWith(const TensorLite &other);
...@@ -168,7 +181,7 @@ class TensorLite { ...@@ -168,7 +181,7 @@ class TensorLite {
// template <typename T> // template <typename T>
// TensorLite Slice(int64_t begin, int64_t end) const; // TensorLite Slice(int64_t begin, int64_t end) const;
zynqmp::Tensor *ZynqTensor() const { return zynq_tensor_; } zynqmp::Tensor *ZynqTensor() const { return zynq_tensor_.get(); }
friend std::ostream &operator<<(std::ostream &os, const TensorLite &tensor) { friend std::ostream &operator<<(std::ostream &os, const TensorLite &tensor) {
os << "Tensor:" << '\n'; os << "Tensor:" << '\n';
...@@ -197,7 +210,8 @@ class TensorLite { ...@@ -197,7 +210,8 @@ class TensorLite {
size_t memory_size_{}; size_t memory_size_{};
size_t offset_{0}; size_t offset_{0};
zynqmp::Tensor *zynq_tensor_ = new zynqmp::Tensor(); // zynqmp::Tensor *zynq_tensor_ = new zynqmp::Tensor();
std::shared_ptr<zynqmp::Tensor> zynq_tensor_;
template <typename T> template <typename T>
void mutable_data_internal(); void mutable_data_internal();
...@@ -206,6 +220,7 @@ class TensorLite { ...@@ -206,6 +220,7 @@ class TensorLite {
template <typename T, typename R> template <typename T, typename R>
R *TensorLite::mutable_data() { R *TensorLite::mutable_data() {
std::vector<int> v; std::vector<int> v;
// std::cout << "mutable_data \n";
for (int i = 0; i < dims_.size(); i++) { for (int i = 0; i < dims_.size(); i++) {
v.push_back(dims_[i]); v.push_back(dims_[i]);
} }
...@@ -228,7 +243,7 @@ R *TensorLite::mutable_data() { ...@@ -228,7 +243,7 @@ R *TensorLite::mutable_data() {
break; break;
} }
zynqmp::Shape input_shape(layout_type, v); zynqmp::Shape input_shape(layout_type, v);
// std::cout << "input_shape \n";
zynqmp::DataType data_type = zynqmp::FP32; zynqmp::DataType data_type = zynqmp::FP32;
if (typeid(T) == typeid(float)) { if (typeid(T) == typeid(float)) {
data_type = zynqmp::FP32; data_type = zynqmp::FP32;
...@@ -236,6 +251,13 @@ R *TensorLite::mutable_data() { ...@@ -236,6 +251,13 @@ R *TensorLite::mutable_data() {
if (typeid(T) == typeid(zynqmp::float16)) { if (typeid(T) == typeid(zynqmp::float16)) {
data_type = zynqmp::FP16; data_type = zynqmp::FP16;
} }
// std::cout << "mutableData \n";
// std::cout << "zynq_tensor_:" << zynq_tensor_.get() << std::endl;
if (zynq_tensor_.get() == nullptr) {
zynq_tensor_.reset(new zynqmp::Tensor());
}
return zynq_tensor_->mutableData<R>(data_type, input_shape); return zynq_tensor_->mutableData<R>(data_type, input_shape);
} }
...@@ -276,6 +298,7 @@ TensorLite TensorLite::Slice(int64_t begin, int64_t end) const { ...@@ -276,6 +298,7 @@ TensorLite TensorLite::Slice(int64_t begin, int64_t end) const {
template <typename T> template <typename T>
void TensorLite::Slice(TensorLite &dst, int64_t begin, int64_t end) const { void TensorLite::Slice(TensorLite &dst, int64_t begin, int64_t end) const {
// TODO(chonwhite) delete this function;
CHECK_GE(begin, 0); CHECK_GE(begin, 0);
CHECK_LE(end, dims_[0]); CHECK_LE(end, dims_[0]);
CHECK_LT(begin, end); CHECK_LT(begin, end);
......
...@@ -40,8 +40,8 @@ void FeedCompute::PrepareForRun() { ...@@ -40,8 +40,8 @@ void FeedCompute::PrepareForRun() {
void FeedCompute::Run() { void FeedCompute::Run() {
auto& param = this->Param<param_t>(); auto& param = this->Param<param_t>();
Tensor& x = param.feed_list->at(param.col); Tensor& x = param.feed_list->at(param.col);
pe_.param().input = x.ZynqTensor();
pe_.dispatch(); pe_.dispatch();
auto out_lod = param.out->mutable_lod(); auto out_lod = param.out->mutable_lod();
*out_lod = x.lod(); *out_lod = x.lod();
......
...@@ -80,7 +80,8 @@ void mul(MulCompute* k) { ...@@ -80,7 +80,8 @@ void mul(MulCompute* k) {
} }
void MulCompute::Run() { void MulCompute::Run() {
pe_.dispatch(); // pe_.dispatch();
mul(this);
#ifdef FPGA_PRINT_TENSOR #ifdef FPGA_PRINT_TENSOR
zynqmp::FullyConnectedParam& fc_param = pe_.param(); zynqmp::FullyConnectedParam& fc_param = pe_.param();
Debugger::get_instance().registerOutput("mul", fc_param.output); Debugger::get_instance().registerOutput("mul", fc_param.output);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册