// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #ifdef LITE_WITH_FPGA #include "lite/backends/fpga/lite_tensor.h" #endif #ifndef LITE_WITH_FPGA #include #include // for multiplies #include #include #include #include #include "lite/core/memory.h" #include "lite/utils/replace_stl/stream.h" namespace paddle { namespace lite { class DDimLite; class TensorLite; using DDim = lite::DDimLite; using Tensor = lite::TensorLite; template class SmallVector { public: SmallVector() { // VLOG(3)<<"call constructor"; data_ = new ValueType[initLength](); // data_ = static_cast(malloc(DimLength * // sizeof(ValueType))); // data_.resize(DimLength); // memset(data_, 0, DimLength * sizeof(ValueType)); size_ = 0U; memory_size = initLength; } ~SmallVector() { // VLOG(3)<<"call deconstructor"; if (data_ != nullptr) { delete[] data_; // free(data_); } data_ = nullptr; size_ = 0U; memory_size = 0U; } size_t size() const { // VLOG(3)<<"call size()"; return size_; } void resize(size_t new_size) { // VLOG(3)<<"call resize()"; if (new_size > memory_size) { if (data_ != nullptr) { delete[] data_; } data_ = new ValueType[new_size](); memory_size = new_size; } size_ = new_size; } ValueType *mutable_data() { return data_; } const ValueType *data() const { // VLOG(3)<<"call data()"; return data_; } ValueType operator[](int offset) const { // VLOG(3)<<"call operator[]"; return data_[offset]; } ValueType &operator[](int offset) { // VLOG(3)<<"call &operator[]"; return data_[offset]; } private: // ValueType data_[DimLength]; // ValueType* data_{nullptr}; ValueType *data_{nullptr}; size_t size_{0U}; size_t memory_size{0U}; }; class DDimLite { public: constexpr static size_t init_length = 4; using value_type = int64_t; using DDimVector = SmallVector; DDimLite() = default; DDimLite(const DDimLite &a) { data_.resize(a.size()); if (a.size() > 0U) { memcpy( data_.mutable_data(), a.data().data(), a.size() * sizeof(value_type)); } // deep copy } explicit DDimLite(const std::vector &x) { ConstructFrom(x); } // DDimLite(std::initializer_list init_list) : // DDimLite(std::vector(init_list)) {} void ConstructFrom(const std::vector &x) { data_.resize(x.size()); if (x.size() > 0U) { memcpy(data_.mutable_data(), x.data(), x.size() * sizeof(value_type)); // std::copy(x.data(), x.data() + x.size(), data_.mutable_data()); } } value_type operator[](int offset) const { return data_[offset]; } value_type &operator[](int offset) { return data_[offset]; } std::vector Vectorize() const { std::vector vec; vec.resize(data_.size()); if (data_.size() > 0U) { memcpy(vec.data(), data_.data(), data_.size() * sizeof(value_type)); // std::copy(data_.data(), data_.data() + data_.size(), vec.data()); } return vec; } size_t size() const { return data_.size(); } void resize(size_t size) { data_.resize(size); } bool empty() const { return data_.size() == 0U; } value_type production() const; const std::vector data() const { std::vector vec; vec.resize(data_.size()); if (data_.size() > 0U) { memcpy(vec.data(), data_.data(), data_.size() * sizeof(value_type)); // std::copy(data_.data(), data_.data() + data_.size(), vec.data()); } return vec; } value_type count(int start, int end) const; DDimLite Slice(int start, int end) const; DDimLite Flatten2D(int col) const { return DDimLite(std::vector( {Slice(0, col).production(), Slice(col, size()).production()})); } std::string repr() const; friend STL::ostream &operator<<(STL::ostream &os, const DDimLite &dims) { os << dims.repr(); return os; } DDimLite &operator=(const DDimLite &a) { this->data_.resize(a.size()); if (a.size() > 0U) { // std::copy(a.data().data(), a.data().data() + a.data().size(), // this->data_.mutable_data()); memcpy(this->data_.mutable_data(), a.data().data(), a.size() * sizeof(value_type)); } return *this; } friend bool operator==(const DDimLite &a, const DDimLite &b) { if (a.size() != b.size()) return false; for (size_t i = 0; i < a.size(); i++) { if (a[i] != b[i]) return false; } return true; } friend bool operator!=(const DDimLite &a, const DDimLite &b) { if (a.size() != b.size()) return true; for (size_t i = 0; i < a.size(); i++) { if (a[i] != b[i]) return true; } return false; } private: DDimVector data_; }; using LoD = std::vector>; // A light-weight tensor implementation. class TensorLite { public: TensorLite() : buffer_(std::make_shared()) {} explicit TensorLite(std::shared_ptr buffer) : buffer_(buffer) {} template void Assign(const DType *data, const DimT &dim) { Resize(dim); auto *dst = mutable_data(Target); CopySync( dst, data, dim.production() * sizeof(DType), IoDirection::HtoD); } // T is the data type and R is the return type // For OpenCL, the return type can be cl::Buffer // and the data type can be float/int8_t. // For other devices, T and R may be the same type. template const R *data() const { return reinterpret_cast(static_cast(buffer_->data()) + offset_); } void Resize(const DDimLite &ddim) { dims_ = ddim; } void Resize(const std::vector &x) { dims_.ConstructFrom(x); } const DDimLite &dims() const { return dims_; } int64_t numel() const { return dims_.production(); } const LoD &lod() const { return lod_; } LoD *mutable_lod() { return &lod_; } void set_lod(const LoD &lod) { lod_ = lod; } PrecisionType precision() const { return precision_; } void set_precision(PrecisionType precision) { precision_ = precision; } bool persistable() const { return persistable_; } void set_persistable(bool persistable) { persistable_ = persistable; } // T is the data type and R is the return type // For OpenCL, the return type can be cl::Buffer // and the data type can be float/int8_t. // For other devices, T and R may be the same type. template R *mutable_data() { precision_ = lite_api::PrecisionTypeTrait::Type(); memory_size_ = dims_.production() * sizeof(T); buffer_->ResetLazy(target_, memory_size_); return reinterpret_cast(static_cast(buffer_->data()) + offset_); } #ifdef LITE_WITH_OPENCL template R *mutable_data(const size_t img_w, const size_t img_h, void *host_ptr = nullptr) { target_ = TARGET(kOpenCL); buffer_->ResetLazyImage2D(target_, img_w, img_h, host_ptr); return static_cast(buffer_->data()); } #endif // T is the data type and R is the return type // For OpenCL, the return type can be cl::Buffer // and the data type can be float/int8_t. // For other devices, T and R may be the same type. template R *mutable_data(TargetType target) { target_ = target; return mutable_data(); } void *mutable_data(size_t memory_size); void *mutable_data(TargetType target, size_t memory_size); const void *raw_data() const { return static_cast( (static_cast(buffer_->data()) + offset_)); } void *raw_data() { return static_cast( (static_cast(buffer_->data()) + offset_)); } void clear() { buffer_->Free(); offset_ = 0; } size_t data_size() const { return this->dims().production(); } size_t memory_size() const { return memory_size_; } size_t offset() const { return offset_; } bool IsInitialized() const { return buffer_->data(); } // Other share data to this. void ShareDataWith(const TensorLite &other); void CopyDataFrom(const TensorLite &other); void ResetBuffer(std::shared_ptr buffer, size_t memory_size); TargetType target() const { return target_; } template TensorLite Slice(int64_t begin, int64_t end) const; friend STL::ostream &operator<<(STL::ostream &os, const TensorLite &tensor) { os << "Tensor:" << '\n'; os << "dim: " << tensor.dims() << '\n'; for (int i = 0; i < tensor.dims().production(); i++) { os << tensor.template data()[i] << " "; } os << "\n"; return os; } private: TargetType target_{TargetType::kHost}; // precision_ and persistable_ are only used for persistable vars. // If your tensor wants to be saved and loaded correctly, you must // set values of precision_ and persistable_ after updating it. // If your tensor is just a temp tensor, such as activations, // you can ignore these two attributes. PrecisionType precision_{PrecisionType::kUnk}; bool persistable_{false}; DDimLite dims_; std::shared_ptr buffer_; LoD lod_; size_t memory_size_{}; /// @brief Buffer may be shared with other tensors size_t offset_{0}; }; template TensorLite TensorLite::Slice(int64_t begin, int64_t end) const { CHECK_GE(begin, 0); CHECK_LE(end, dims_[0]); CHECK_LT(begin, end); if (dims_[0] == 1) { return *this; } else { int64_t base = numel() / dims_[0]; TensorLite dst; dst.buffer_ = buffer_; dst.target_ = target_; auto dst_dims = dims_; dst_dims[0] = end - begin; dst.Resize(dst_dims); dst.offset_ = offset_ + static_cast(begin * base) * sizeof(T); return dst; } } template bool TensorCompareWith(const TensorT &a, const TensorT &b) { if (a.dims() != b.dims()) return false; if (memcmp(a.raw_data(), b.raw_data(), a.data_size()) != 0) return false; return true; } #ifdef LITE_WITH_OPENCL template <> const cl::Image2D *TensorLite::data() const; template <> // use uint16_t represent half float const cl::Image2D *TensorLite::data() const; #endif } // namespace lite } // namespace paddle #endif // #ifndef LITE_WITH_FPGA