diff --git a/CMakeLists.txt b/CMakeLists.txt index 77a94bea1efcdafaa67b4c078bfb0a756f7b1cec..69352c59f0145e1cb418eb068632d048c16a5146 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,6 +22,9 @@ if (WITH_PADDLE_MOBILE) return() endif(WITH_PADDLE_MOBILE) +# set(CMAKE_BUILD_TYPE DEBUG) + + set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(CMAKE_CXX_STANDARD 11) diff --git a/lite/backends/fpga/KD/debugger.hpp b/lite/backends/fpga/KD/debugger.hpp index 9b1189c407d6d601bb3e5ba8172b1455f04710fd..c0510e0f381a2ba6ae355870752dcb7dae1bd93f 100755 --- a/lite/backends/fpga/KD/debugger.hpp +++ b/lite/backends/fpga/KD/debugger.hpp @@ -14,6 +14,8 @@ #pragma once +#include +#include #include #include @@ -37,8 +39,18 @@ class Debugger { } } + void tick(std::string key) { + float value = 0; + if (tick_tock_map.count(key) > 0) { + value += tick_tock_map[key] = value; + } + } + + void tock(std::string key) {} + private: std::unordered_map op_config; + std::unordered_map tick_tock_map; Debugger() { op_config["concat"] = true; op_config["pooling"] = true; diff --git a/lite/backends/fpga/KD/dl_engine.hpp b/lite/backends/fpga/KD/dl_engine.hpp index eddf5ca454cdc9e91f87d6e4f2c8dfc13f35fdc6..fe66c84143fbc05f0b52a11e2e315b7f3db9054c 100755 --- a/lite/backends/fpga/KD/dl_engine.hpp +++ b/lite/backends/fpga/KD/dl_engine.hpp @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "lite/backends/fpga/KD/llapi/filter.h" #include "lite/backends/fpga/KD/llapi/zynqmp_api.h" diff --git a/lite/backends/fpga/KD/io.cpp b/lite/backends/fpga/KD/io.cpp new file mode 100644 index 0000000000000000000000000000000000000000..705faefe7443270c40b7a7c88f517f5381d83919 --- /dev/null +++ b/lite/backends/fpga/KD/io.cpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "io.hpp" + +namespace paddle { +namespace zynqmp { + +// FpgaIO::FpgaIO() {} +// void FpgaIO::setMutex(std::mutex* mtx) { mtx_ = mtx; } + +// void FpgaIO::setConditionVariable(std::condition_variable* condition) { +// condition_ = condition; +// } + +// void FpgaIO::lock() { +// if (mtx_ != nullptr && !locked_) { +// mtx_->lock(); +// locked_ = true; +// } +// } + +// void FpgaIO::unlock() { +// if (mtx_ != nullptr) { +// mtx_->unlock(); +// condition_->notify_one(); +// } +// locked_ = false; +// } + +} // namespace zynqmp +} // namespace paddle diff --git a/lite/backends/fpga/KD/io.hpp b/lite/backends/fpga/KD/io.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ef828da41c50c9d5085a6696a89c76334a73a21f --- /dev/null +++ b/lite/backends/fpga/KD/io.hpp @@ -0,0 +1,50 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +// #include +// #include + +namespace paddle { +namespace zynqmp { + +class FpgaIO { + public: + static FpgaIO& get_instance() { + static FpgaIO s_instance; + return s_instance; + } + + void allocData(size_t s) { data_ = new float[s]; } + + float* getData() { return data_; } + + // void setMutex(std::mutex* mtx); + // void setConditionVariable(std::condition_variable* condition); + // void lock(); + // void unlock(); + + private: + // std::mutex* mtx_ = nullptr; + // std::condition_variable* condition_ = nullptr; + // bool locked_ = false; + + float* data_ = nullptr; + + FpgaIO(); +}; +} // namespace zynqmp +} // namespace paddle diff --git a/lite/backends/fpga/KD/llapi/filter.cpp b/lite/backends/fpga/KD/llapi/filter.cpp index da81565cf5ca152a54b6cc1514cb660589428439..7727345b1c138ba7d84bcbcd078badb2e2fb98d5 100755 --- a/lite/backends/fpga/KD/llapi/filter.cpp +++ b/lite/backends/fpga/KD/llapi/filter.cpp @@ -240,8 +240,8 @@ int8_t* format_filter(float* data_in, for (int n = 0; n < num; n++) { float* filter_start = data_in + n * chw; int8_t* quantized_start = quantized_data + n * chw; - quantize(filter_start, quantized_start, chw, max); - filter_max.push_back(1); + quantize(filter_start, quantized_start, chw, f_max); + filter_max.push_back(f_max); } int8_t* hwc_data = diff --git a/lite/backends/fpga/KD/llapi/zynqmp_api.cpp b/lite/backends/fpga/KD/llapi/zynqmp_api.cpp index 1408a034cb6a975e32d92da0406f98df7f2409c1..7a2c92335788364426b82d60b6a1ad85e633021c 100755 --- a/lite/backends/fpga/KD/llapi/zynqmp_api.cpp +++ b/lite/backends/fpga/KD/llapi/zynqmp_api.cpp @@ -205,7 +205,7 @@ int get_device_info(const struct DeviceInfo &args) { int perform_bypass(const struct BypassArgs &args) { int ret = -1; int size = args.image.channels * args.image.width * args.image.height; - int max_size = 1 << 21; + int max_size = 1 << 22; float times = 1.0 * size / max_size; int count = static_cast(times); diff --git a/lite/backends/fpga/KD/pes/fully_connected_pe.hpp b/lite/backends/fpga/KD/pes/fully_connected_pe.hpp index a2b184e383aa600b1279197a115c58309e204a95..56433ac19f192ef65d2b3b10a10402e6c64624a2 100644 --- a/lite/backends/fpga/KD/pes/fully_connected_pe.hpp +++ b/lite/backends/fpga/KD/pes/fully_connected_pe.hpp @@ -14,6 +14,8 @@ limitations under the License. */ #pragma once +#include +#include #include #include "lite/backends/fpga/KD/pe.hpp" @@ -38,7 +40,6 @@ class FullyConnectedPE : public PE { Tensor* input = param_.input; convParam_.input = param_.input; convParam_.output = param_.output; - // convParam_.relu = param_.relu; convParam_.activeParam.type = param_.activeParam.type; convParam_.groups = 1; convParam_.strides = {1, 1}; @@ -54,32 +55,42 @@ class FullyConnectedPE : public PE { int height = param_.input->shape().height(); int width = param_.input->shape().width(); - int filter_channel = chw / height / width; + // int filter_channel = chw / height / width; int channel = param_.output->shape().channel(); - Shape shape(NCHW, {num, filter_channel, height, width}); - Tensor* conv_filter = new Tensor(); - float* new_filter_data = conv_filter->mutableData(FP32, shape); + Shape shape(NCHW, {num, chw_aligned, 1, 1}); + float* new_filter_data = conv_filter_.mutableData(FP32, shape); float* filter_data = param_.filter->data(); + memset(new_filter_data, 0, num * chw_aligned * sizeof(float)); + for (int i = 0; i < num; i++) { for (int j = 0; j < chw; j++) { float scale = filter_data[j * num + i]; - new_filter_data[i * chw + j] = scale; + new_filter_data[i * chw_aligned + j] = scale; } } - conv_filter->flush(); convParam_.filter = conv_filter; - Shape sb_shape(N, {channel}); + conv_filter_.flush(); + convParam_.filter = &conv_filter_; + // param_.filter->saveToFile("param_filter", true); + // conv_filter->saveToFile("conv_filter", true); + // exit(-1); + + Shape sb_shape(N, {num}); float* scale_data = convParam_.scale()->mutableData(FP32, sb_shape); float* bias_data = convParam_.bias()->mutableData(FP32, sb_shape); - for (int i = 0; i < channel; i++) { + for (int i = 0; i < num; i++) { scale_data[i] = 1.0f; bias_data[i] = param_.bias->data()[i]; } + // for (int i = 0; i < num; i++) { + // scale_data[i] = 1.0f; + // bias_data[i] = param_.bias->data()[i]; + // } convParam_.scale()->flush(); convParam_.bias()->flush(); @@ -115,14 +126,41 @@ class FullyConnectedPE : public PE { output->flush(); output->scale()[0] = max / 127.0f; output->scale()[1] = 127.0f / max; + output->saveToFile("cpu_compute", true); + // exit(-1); + } + + void batch_to_w() { + ConvParam& convParam_ = convPE_.param(); + + int channel = param_.input->shape().channel(); + param_.input->invalidate(); + + int remainder = + aligned_input_.shape().channel() - param_.input->shape().channel(); + + float max = 0; + for (int n = 0; n < param_.input->shape().num(); n++) { + memset(aligned_input_.data(), + 0, + aligned_input_.shape().channel() * sizeof(float16)); + memcpy( + aligned_input_.data() + n * aligned_input_.shape().channel(), + param_.input->data() + n * channel, + channel * sizeof(float16)); + aligned_input_.copyScaleFrom(param_.input); + aligned_input_.flush(); + } + + convPE_.dispatch(); } bool dispatch() { - // int num = param_.filter->shape().channel(); - // if (num == 2) { - // cpu_compute(); - // return 1; - // } else { + // batch_to_w(); + // return 1; + // cpu_compute1(); + // return 1; + return convPE_.dispatch(); // } } @@ -131,7 +169,10 @@ class FullyConnectedPE : public PE { private: FullyConnectedParam param_; + Tensor aligned_input_; + Tensor aligned_output_; ConvPE convPE_; + Tensor conv_filter_; }; } // namespace zynqmp } // namespace paddle diff --git a/lite/backends/fpga/KD/pes/input_pe.hpp b/lite/backends/fpga/KD/pes/input_pe.hpp index 380c85e17e7de63486bbc93bb62ae8728286dd7a..d8f9a15c6a94f6869a588f758982800b35eecc40 100755 --- a/lite/backends/fpga/KD/pes/input_pe.hpp +++ b/lite/backends/fpga/KD/pes/input_pe.hpp @@ -29,6 +29,7 @@ class InputPE : public PE { } bool dispatch() { + // std::cout << "input_dispatch()\n"; Tensor* input = param_.input; Tensor* output = param_.output; diff --git a/lite/backends/fpga/KD/pes/output_pe.hpp b/lite/backends/fpga/KD/pes/output_pe.hpp index 2944691693b135a2d2df7b91ecbe0ef249b015d8..2d02d30fbae12efc372e58c2ad80348356a8f22d 100755 --- a/lite/backends/fpga/KD/pes/output_pe.hpp +++ b/lite/backends/fpga/KD/pes/output_pe.hpp @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include "lite/backends/fpga/KD/llapi/zynqmp_api.h" #include "lite/backends/fpga/KD/pe.hpp" #include "lite/backends/fpga/KD/pe_params.hpp" @@ -52,6 +53,12 @@ class OutputPE : public PE { memcpy(DLEngine::get_instance().out_data, output->data(), output->shape().numel() * sizeof(float)); + + fpga_reset(); + + auto max = fpga_get_memory_size_max(); + std::cout << "PL ===== Max: ===== :: " << max << std::endl; + return true; } diff --git a/lite/backends/fpga/KD/tensor.hpp b/lite/backends/fpga/KD/tensor.hpp index 988bc1bb507036de8f13a6c6549c549718bd1256..a19d55a64dede6e6e929d605359fcbab826c13e2 100644 --- a/lite/backends/fpga/KD/tensor.hpp +++ b/lite/backends/fpga/KD/tensor.hpp @@ -103,12 +103,18 @@ class Tensor { return reinterpret_cast(ptr); } + void releaseData() { + released = true; + placeHolder_.reset(); + } + template Dtype* mutableData(DataType dataType, const Shape& shape) { - if (this->shape_ != nullptr) { - delete shape_; - } - this->shape_ = new Shape(shape); + // std::cout << "enter \n"; + // std::cout << "before new shape\n"; + // this->shape_ = new Shape(shape); + this->shape_.reset(new Shape(shape)); + // std::cout << "new shape \n"; this->dataType_ = dataType; return mutableData(); } @@ -117,11 +123,14 @@ class Tensor { Dtype* mutableData() { size_t memorySize = shape_->memorySize(CellSize(dataType_)) * mem_scale_factor_; + // std::cout << "mem_size:" << memorySize << std::endl; if (placeHolder_ != nullptr) { + // std::cout << "placeHolder_ not null"<< std::endl; if (memorySize > placeHolder_->memorySize()) { placeHolder_.reset(new PlaceHolder(memorySize)); } } else { + // std::cout << "placeHolder_ null"<< std::endl; placeHolder_.reset(new PlaceHolder(memorySize)); } return data(); @@ -138,7 +147,7 @@ class Tensor { DataType dataType() { return this->dataType_; } - Shape& shape() { return *shape_; } + Shape& shape() { return *(shape_.get()); } bool aligned() { return this->aligned_; } @@ -247,15 +256,17 @@ class Tensor { void shareDataWith(Tensor* src) { shareDataWith(src, src->shape()); } void shareDataWith(Tensor* src, const Shape& shape, int offset = 0) { - if (shape_ != nullptr) { - delete shape_; - } + // if (shape_ != nullptr) { + // delete shape_; + // } + this->placeHolder_ = src->placeHolder_; this->dataType_ = src->dataType_; this->aligned_ = src->aligned_; this->dateLocation_ = src->dateLocation_; this->offset = offset; - shape_ = new Shape(const_cast(shape)); + // shape_ = new Shape(const_cast(shape)); + shape_.reset(new Shape(shape)); } void copyFrom(Tensor* src) { @@ -300,7 +311,13 @@ class Tensor { } void flush() { - size_t memorySize = placeHolder_->memorySize(); + if (released) { + // std::cout << "flush::" << this << std::endl; + return; + } + + size_t memorySize = + shape_->memorySize(CellSize(dataType_)) * mem_scale_factor_; fpga_flush(placeHolder_->data(), memorySize); } @@ -451,18 +468,13 @@ class Tensor { return os; } - ~Tensor() { - if (shape_ != nullptr) { - delete shape_; - shape_ = nullptr; - } - } - private: + bool released = false; int offset = 0; float mem_scale_factor_ = 1.0f; std::shared_ptr placeHolder_; - Shape* shape_ = nullptr; + std::shared_ptr shape_; + // Shape* shape_ = nullptr; DataType dataType_ = FP32; bool aligned_ = false; DataSyncStatus synchedStatus_ = Synched; diff --git a/lite/backends/fpga/lite_tensor.cc b/lite/backends/fpga/lite_tensor.cc index 7f1e8d3e17f97315e77532b77bbcfcc8331edd4f..6ec9f6866ab859460ebfa56a0e13800d45ba5d52 100755 --- a/lite/backends/fpga/lite_tensor.cc +++ b/lite/backends/fpga/lite_tensor.cc @@ -69,7 +69,7 @@ std::string DDimLite::repr() const { } void TensorLite::ShareDataWith(const TensorLite &other) { - buffer_ = other.buffer_; + buffer_ = other.buffer_; // TODO(chonwhite) delete buffer; dims_ = other.dims_; zynq_tensor_ = other.zynq_tensor_; target_ = other.target_; @@ -79,10 +79,10 @@ void TensorLite::ShareDataWith(const TensorLite &other) { } void *TensorLite::mutable_data(size_t memory_size) { - memory_size_ = memory_size; + memory_size_ = memory_size; // TODO(chonwhite) delete buffer; buffer_->ResetLazy(target_, memory_size_); // throw -1; - std::cout << memory_size << std::endl; + // std::cout << memory_size << std::endl; return buffer_->data(); } @@ -92,16 +92,34 @@ void *TensorLite::mutable_data(TargetType target, size_t memory_size) { } void TensorLite::CopyDataFrom(const TensorLite &other) { + // std::cout << "other11:: "<< &other << std::endl; dims_ = other.dims_; target_ = other.target_; lod_ = other.lod_; - auto dt = zynq_tensor_->dataType(); + // std::cout << "before dataType\n"; - auto shape = other.zynq_tensor_->shape(); + if (zynq_tensor_.get() == nullptr) { + zynq_tensor_.reset(new zynqmp::Tensor()); + } + auto dt = zynq_tensor_->dataType(); + // std::cout << "after dataType\n"; + + // std::cout << "before resize\n"; Resize(other.dims()); + auto shape = other.zynq_tensor_->shape(); + // std::cout << "after resize\n"; zynq_tensor_->mutableData(zynq_tensor_->dataType(), shape); - this->ZynqTensor()->copyFrom(other.ZynqTensor()); + // std::cout << "after mutableData\n"; + // std::cout << "ZynqTensor():" << this->ZynqTensor() << std::endl; + // std::cout << "other Tensor():" << other.ZynqTensor() << std::endl; + + // this->ZynqTensor()->copyFrom(other.ZynqTensor()); + memcpy(this->ZynqTensor()->data(), + other.ZynqTensor()->data(), + other.ZynqTensor()->shape().numel() * sizeof(float)); + // memcpy() + // std::cout << "after copyFrom\n"; } } // namespace lite diff --git a/lite/backends/fpga/lite_tensor.h b/lite/backends/fpga/lite_tensor.h index 311fc8a98400e5a6916ba1b9c8de1e6e0bcec4c0..f83bed541e59639daed83eefc503f8de6f05aef4 100644 --- a/lite/backends/fpga/lite_tensor.h +++ b/lite/backends/fpga/lite_tensor.h @@ -81,6 +81,10 @@ class DDimLite { return !(a == b); } + ~DDimLite() { + // std::cout << "free DDimLite\n"; + } + private: std::vector data_; }; @@ -109,7 +113,12 @@ class TensorLite { return zynq_tensor_->data() + offset_; } - void Resize(const DDimLite &ddim) { dims_ = ddim; } + void Resize(const DDimLite &ddim) { + // std::cout << "Resize \n"; + // std::cout << "ddim:" << & ddim << std::endl; + dims_ = ddim; + // std::cout << "after Reize \n"; + } void Resize(const std::vector &x) { dims_ = DDimLite(x); } const DDimLite &dims() const { return dims_; } @@ -142,7 +151,9 @@ class TensorLite { void *mutable_data(size_t memory_size); void *mutable_data(TargetType target, size_t memory_size); - const void *raw_data() const { return buffer_->data(); } + const void *raw_data() const { + return buffer_->data(); + } // TODO(chonwhite) delete buffer; size_t data_size() const { return this->dims().production(); } @@ -150,7 +161,9 @@ class TensorLite { size_t offset() const { return offset_; } - bool IsInitialized() const { return buffer_->data(); } + bool IsInitialized() const { + return buffer_->data(); + } // TODO(chonwhite) delete buffer; // Other share data to this. void ShareDataWith(const TensorLite &other); @@ -165,7 +178,10 @@ class TensorLite { TargetType target() const { return target_; } - zynqmp::Tensor *ZynqTensor() const { return zynq_tensor_; } + // template + // TensorLite Slice(int64_t begin, int64_t end) const; + + zynqmp::Tensor *ZynqTensor() const { return zynq_tensor_.get(); } friend std::ostream &operator<<(std::ostream &os, const TensorLite &tensor) { os << "Tensor:" << '\n'; @@ -194,7 +210,8 @@ class TensorLite { size_t memory_size_{}; size_t offset_{0}; - zynqmp::Tensor *zynq_tensor_ = new zynqmp::Tensor(); + // zynqmp::Tensor *zynq_tensor_ = new zynqmp::Tensor(); + std::shared_ptr zynq_tensor_; template void mutable_data_internal(); @@ -203,6 +220,7 @@ class TensorLite { template R *TensorLite::mutable_data() { std::vector v; + // std::cout << "mutable_data \n"; for (int i = 0; i < dims_.size(); i++) { v.push_back(dims_[i]); } @@ -225,7 +243,7 @@ R *TensorLite::mutable_data() { break; } zynqmp::Shape input_shape(layout_type, v); - + // std::cout << "input_shape \n"; zynqmp::DataType data_type = zynqmp::FP32; if (typeid(T) == typeid(float)) { data_type = zynqmp::FP32; @@ -233,6 +251,13 @@ R *TensorLite::mutable_data() { if (typeid(T) == typeid(zynqmp::float16)) { data_type = zynqmp::FP16; } + // std::cout << "mutableData \n"; + // std::cout << "zynq_tensor_:" << zynq_tensor_.get() << std::endl; + + if (zynq_tensor_.get() == nullptr) { + zynq_tensor_.reset(new zynqmp::Tensor()); + } + return zynq_tensor_->mutableData(data_type, input_shape); } @@ -272,6 +297,7 @@ TensorLite TensorLite::Slice(int64_t begin, int64_t end) const { template void TensorLite::Slice(TensorLite &dst, int64_t begin, int64_t end) const { + // TODO(chonwhite) delete this function; CHECK_GE(begin, 0); CHECK_LE(end, dims_[0]); CHECK_LT(begin, end); diff --git a/lite/kernels/arm/sequence_pool_compute.cc b/lite/kernels/arm/sequence_pool_compute.cc index 8fcbb8cffe72935e4df503c3c1748ddb68247fb7..93072fe499eed296d6e31d87ee9b74494de07aa1 100644 --- a/lite/kernels/arm/sequence_pool_compute.cc +++ b/lite/kernels/arm/sequence_pool_compute.cc @@ -59,6 +59,7 @@ void SequencePoolCompute::Run() { for (int i = 0; i <= batch_size; i++) { offset_new[i] = i; } + (output->mutable_lod())->clear(); (output->mutable_lod())->push_back(offset_new); } diff --git a/lite/kernels/fpga/CMakeLists.txt b/lite/kernels/fpga/CMakeLists.txt index a017487b48b23b8b6096c901dc00d277f42d6aac..34125a0f47d373022f0e5e7828a2b11dc629cd48 100755 --- a/lite/kernels/fpga/CMakeLists.txt +++ b/lite/kernels/fpga/CMakeLists.txt @@ -14,7 +14,6 @@ add_kernel(conv_compute_fpga FPGA basic SRCS conv_compute.cc DEPS ${fpga_deps}) # add_kernel(density_prior_box_compute_fpga FPGA basic SRCS density_prior_box_compute.cc DEPS ${fpga_deps}) add_kernel(dropout_compute_fpga FPGA basic SRCS dropout_compute.cc DEPS ${fpga_deps}) add_kernel(elementwise_compute_fpga FPGA basic SRCS elementwise_compute.cc DEPS ${fpga_deps}) -# add_kernel(feed_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps}) add_kernel(fc_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps}) add_kernel(gru_compute_fpga FPGA extra SRCS gru_compute.cc DEPS ${fpga_deps}) diff --git a/lite/kernels/fpga/feed_compute.cc b/lite/kernels/fpga/feed_compute.cc index 79329e99a3e5e812dca487c17452f3f5d1e96449..9ca2424bc2f8a748c348cac4aafd219e538c7a17 100755 --- a/lite/kernels/fpga/feed_compute.cc +++ b/lite/kernels/fpga/feed_compute.cc @@ -40,8 +40,8 @@ void FeedCompute::PrepareForRun() { void FeedCompute::Run() { auto& param = this->Param(); Tensor& x = param.feed_list->at(param.col); + pe_.param().input = x.ZynqTensor(); pe_.dispatch(); - auto out_lod = param.out->mutable_lod(); *out_lod = x.lod(); diff --git a/lite/kernels/fpga/fetch_compute.cc b/lite/kernels/fpga/fetch_compute.cc index 2d296f4d4a89b1fd86e5b2330d3caf44fbad0903..71ec37a64d94bcbef00d7e3c2a187bdb28c47935 100755 --- a/lite/kernels/fpga/fetch_compute.cc +++ b/lite/kernels/fpga/fetch_compute.cc @@ -82,6 +82,6 @@ REGISTER_LITE_KERNEL(fetch, kNHWC, paddle::lite::kernels::fpga::FetchCompute, host_host) - .BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))}) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))}) .Finalize(); diff --git a/lite/kernels/fpga/mul_compute.cc b/lite/kernels/fpga/mul_compute.cc index c27600d9f773ff0aae04a2ee519905bc0e58785c..659c8dfb653c1707105a7337493ee4f3b3357b76 100755 --- a/lite/kernels/fpga/mul_compute.cc +++ b/lite/kernels/fpga/mul_compute.cc @@ -80,7 +80,8 @@ void mul(MulCompute* k) { } void MulCompute::Run() { - pe_.dispatch(); + // pe_.dispatch(); + mul(this); #ifdef FPGA_PRINT_TENSOR zynqmp::FullyConnectedParam& fc_param = pe_.param(); Debugger::get_instance().registerOutput("mul", fc_param.output); diff --git a/lite/kernels/host/one_hot_compute.cc b/lite/kernels/host/one_hot_compute.cc index e0af6f5173f367bb9b2e06de10499ee36806379c..e1bf4c103bcb59c59617d8c5d1ce10ae8780e403 100755 --- a/lite/kernels/host/one_hot_compute.cc +++ b/lite/kernels/host/one_hot_compute.cc @@ -16,7 +16,7 @@ #include #include -#include "lite/backends/fpga/KD/debugger.hpp" +// #include "lite/backends/fpga/KD/debugger.hpp" #include "lite/kernels/host/one_hot_compute.h" #include "lite/utils/paddle_enforce.h" diff --git a/lite/operators/one_hot_op.cc b/lite/operators/one_hot_op.cc index 023cdc23aeb8329736b7438af2c52cbfa899c75c..ebab9e20679038546611b8dc3221f4ecba1bbe21 100644 --- a/lite/operators/one_hot_op.cc +++ b/lite/operators/one_hot_op.cc @@ -15,7 +15,7 @@ #include "lite/operators/one_hot_op.h" #include "lite/core/op_registry.h" -#include "lite/backends/fpga/KD/debugger.hpp" +// #include "lite/backends/fpga/KD/debugger.hpp" namespace paddle { namespace lite {