From bf77b2fecd817ab27f3be307eddca7a06b390911 Mon Sep 17 00:00:00 2001 From: liuruilong Date: Thu, 27 Sep 2018 12:17:18 +0800 Subject: [PATCH] add cl files --- src/framework/cl/cl_engine.cpp | 131 ++++++++++++++++++++++ src/framework/cl/cl_engine.h | 103 +++++++++++++++++ src/framework/cl/cl_tensor.h | 127 +++++++++++++++++++++ src/framework/tensor.h | 198 ++++++--------------------------- src/framework/tensor_base.h | 172 ++++++++++++++++++++++++++++ 5 files changed, 566 insertions(+), 165 deletions(-) create mode 100644 src/framework/cl/cl_engine.cpp create mode 100644 src/framework/cl/cl_engine.h create mode 100644 src/framework/cl/cl_tensor.h create mode 100644 src/framework/tensor_base.h diff --git a/src/framework/cl/cl_engine.cpp b/src/framework/cl/cl_engine.cpp new file mode 100644 index 0000000000..8cfa24c9c4 --- /dev/null +++ b/src/framework/cl/cl_engine.cpp @@ -0,0 +1,131 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "framework/cl/cl_engine.h" + +#include +#include +#include + +namespace paddle_mobile { +namespace framework { + +bool CLEngine::Init() { + cl_int status; + setPlatform(); + setClDeviceId(); + setClContext(); + setClCommandQueue(); + std::string filename = "./HelloWorld_Kernel.cl"; + loadKernelFromFile(filename.c_str()); + buildProgram(); + initialized_ = true; +} + +CLEngine *CLEngine::Instance() { + static CLEngine cl_engine_; + return &cl_engine_; +} + +std::unique_ptr<_cl_kernel, clKernel_deleter> CLEngine::GSetKernel( + const std::string &kernel_name) { + std::unique_ptr<_cl_kernel, clKernel_deleter> kernel( + clCreateKernel(program_.get(), kernel_name.c_str(), NULL)); + return std::move(kernel); +} + +bool CLEngine::SetClCommandQueue() { + cl_int status; + command_queue_.reset( + clCreateCommandQueue(context_.get(), devices_[0], 0, &status)); + return true; +} + +bool CLEngine::SetPlatform() { + platform_ = NULL; // the chosen platform + cl_uint numPlatforms; // the NO. of platforms + cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); + + /**For clarity, choose the first available platform. */ + if (numPlatforms > 0) { + cl_platform_id *platforms = reinterpret_cast( + malloc(numPlatforms * sizeof(cl_platform_id))); + status = clGetPlatformIDs(numPlatforms, platforms, NULL); + platform_ = platforms[0]; + free(platforms); + return true; + } else { + return false; + } +} + +bool CLEngine::SetClContext() { + context_.reset(clCreateContext(NULL, 1, devices_, NULL, NULL, NULL)); + return true; +} + +bool CLEngine::SetClDeviceId() { + cl_uint numDevices = 0; + devices_ = NULL; + cl_int status = + clGetDeviceIDs(platform_, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices); + + if (numDevices > 0) { + std::cout << numDevices << std::endl; + devices_ = reinterpret_cast( + malloc(numDevices * sizeof(cl_device_id))); + status = clGetDeviceIDs(platform_, CL_DEVICE_TYPE_GPU, numDevices, devices_, + NULL); + return true; + } + return false; +} + +bool CLEngine::LoadKernelFromFile(const char *kernel_file) { + size_t size; + char *str; + std::fstream f(kernel_file, (std::fstream::in | std::fstream::binary)); + + if (!f.is_open()) { + return false; + } + + size_t fileSize; + f.seekg(0, std::fstream::end); + size = fileSize = (size_t)f.tellg(); + f.seekg(0, std::fstream::beg); + str = new char[size + 1]; + if (!str) { + f.close(); + return 0; + } + + f.read(str, fileSize); + f.close(); + str[size] = '\0'; + const char *source = str; + size_t sourceSize[] = {strlen(source)}; + program_.reset( + clCreateProgramWithSource(context_.get(), 1, &source, sourceSize, NULL)); + return true; +} + +bool CLEngine::BuildProgram() { + cl_int status; + status = clBuildProgram(program_.get(), 0, 0, "-cl-fast-relaxed-math", 0, 0); + return true; +} + +} // namespace framework +} // namespace paddle_mobile diff --git a/src/framework/cl/cl_engine.h b/src/framework/cl/cl_engine.h new file mode 100644 index 0000000000..77d5eee2af --- /dev/null +++ b/src/framework/cl/cl_engine.h @@ -0,0 +1,103 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +// #include "CL/cl.h" +#include +#include +#include +#include + +namespace paddle_mobile { +namespace framework { + +struct CLContext {}; + +struct CLKernelDeleter { + template + void operator()(T *clKernelObj) { + clReleaseKernel(clKernelObj); + } +}; + +struct CLMemDeleter { + template + void operator()(T *clMemObj) { + clReleaseMemObject(clMemObj); + } +}; + +struct CLCommQueueDeleter { + template + void operator()(T *clQueueObj) { + clReleaseCommandQueue(clQueueObj); + } +}; + +struct CLContextDeleter { + template + void operator()(T *clContextObj) { + clReleaseContext(clContextObj); + } +}; + +struct CLProgramDeleter { + template + void operator()(T *clProgramObj) { + clReleaseProgram(clProgramObj); + } +}; + +class CLEngine { + public: + static CLEngine *Instance(); + + bool Init(); + + std::unique_ptr<_cl_kernel, clKernel_deleter> GetKernel( + const std::string &kernel_name); + + const cl_context GetContext() { return context_.get(); } + + const cl_program GetProgram() { return program_.get(); } + + const cl_command_queue GetCommandQueue() { return command_queue_.get(); } + + private: + CLEngine() { initialized_ = false; } + + bool SetPlatform(); + + bool SetClDeviceId(); + + bool SetClContext(); + + bool SetClCommandQueue(); + + bool LoadKernelFromFile(const char *kernel_file); + + bool BuildProgram(); + + bool initialized_; + cl_platform_id platform_; + cl_device_id *devices_; + std::unique_ptr<_cl_context, CLContextDeleter> context_; + std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_; + std::unique_ptr<_cl_program, clProgram_deleter> program_; +}; + +} // namespace framework +} // namespace paddle_mobile diff --git a/src/framework/cl/cl_tensor.h b/src/framework/cl/cl_tensor.h new file mode 100644 index 0000000000..8ec19b289f --- /dev/null +++ b/src/framework/cl/cl_tensor.h @@ -0,0 +1,127 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include + +#include "framework/tensor_base.h" + +namespace paddle_mobile { +namespace framework { + +class CLTensor { + public: + CLTensor() {} + + /*! Resize the dimensions of the memory block. */ + inline CLTensor &Resize(const DDim &dims) { + dims_ = dims; + return *this; + } + + // template + // inline T *mutable_with_data(void *data) { + // int64_t size = numel() * sizeof(float); + // holder_.reset(new PlaceholderImpl(size, data, typeid(T))); + // return reinterpret_cast(reinterpret_cast( + // reinterpret_cast(holder_->ptr()))); + // } + + inline void *mutable_data(std::type_index type) { + if (holder_ != nullptr) { + holder_->set_type(type); + } + PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor's numel must >=0.") + int64_t size = numel() * SizeOfType(type); + if (holder_ == nullptr || holder_->size() < size + offset_) { + holder_.reset(new PlaceholderImpl(size, type)); + offset_ = 0; + } + return reinterpret_cast( + reinterpret_cast(holder_->ptr()) + offset_); + } + + /** + * @brief Return a pointer to mutable memory block. + * @note If not exist, then allocation. + */ + template + inline T *mutable_data() { + static_assert(std::is_pod::value, "T must be POD"); + return reinterpret_cast(mutable_data(typeid(T))); + } + + /** + * @brief Return a pointer to mutable memory block. + * + * @param[in] dims The dimensions of the memory block. + * @param[in] place The place of the memory block. + * + * @note If not exist, then allocation. + */ + template + inline T *mutable_data(DDim dims) { + static_assert(std::is_pod::value, "T must be POD"); + Resize(dims); + return mutable_data(); + } + + private: + /* + * virtual ~Placeholder() = default; + + virtual void *ptr() const = 0; + + virtual size_t size() const = 0; + + virtual std::type_index type() const = 0; + + virtual void set_type(std::type_index type) = 0; + * */ + struct PlaceholderImpl : public Placeholder { + PlaceholderImpl(size_t size, void *input, std::type_index type) + : ptr_(clCreateBuffer(CLEngine::instance()->getContext(), + CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, size, + reinterpret_cast(input), NULL)), + size_(size), + type_(type) {} + + PlaceholderImpl(size_t size, std::type_index type) + : ptr_(clCreateBuffer(CLEngine::instance()->getContext(), + CL_MEM_READ_WRITE, size, NULL, NULL)), + size_(size), + type_(type) {} + + virtual size_t size() const { return size_; } + + virtual void *ptr() const { return static_cast(ptr_.get()); } + + virtual std::type_index type() const { return type_; } + + virtual void set_type(std::type_index type) { type_ = type; } + + std::unique_ptr<_cl_mem, clMem_deleter> ptr_; + + size_t size_; + + /* the current type of memory */ + std::type_index type_; + }; +}; + +} // namespace framework +} // namespace paddle_mobile diff --git a/src/framework/tensor.h b/src/framework/tensor.h index ba8e3d3402..b6990a07d8 100644 --- a/src/framework/tensor.h +++ b/src/framework/tensor.h @@ -24,65 +24,24 @@ limitations under the License. */ #include #include "common/enforce.h" -#include "common/types.h" #include "framework/data_layout.h" -#include "framework/ddim.h" +#include "framework/tensor_base.h" #include "memory/t_malloc.h" namespace paddle_mobile { namespace framework { -template -struct SizeOfTypeFunctor; - -template -struct SizeOfTypeFunctor { - size_t operator()(std::type_index type) const { - if (typeid(T).hash_code() == type.hash_code()) { - return sizeof(T); - } else { - return 0UL; - } - } -}; - -template <> -struct SizeOfTypeFunctor<> { - size_t operator()(std::type_index type) const { return 0UL; } -}; - -template -struct SizeOfTypeFunctor { - size_t operator()(std::type_index type) const { - SizeOfTypeFunctor head; - size_t head_size = head(type); - if (head_size != 0) { - return head_size; - } - SizeOfTypeFunctor tail; - return tail(type); - } -}; - -static inline size_t SizeOfType(std::type_index type) { - SizeOfTypeFunctor - functor; - size_t size = functor(type); - - PADDLE_MOBILE_ENFORCE(size != 0UL, "Cannot get size of type %s", type.name()); - return size; -} class LoDTensor; -class Tensor { +class Tensor : public TensorBase { public: - Tensor() : offset_(0) {} + Tensor() {} template - Tensor(std::vector input, DDim ddim) : offset_(0) { + Tensor(std::vector input, DDim ddim) { PADDLE_MOBILE_ENFORCE( input.size() == framework::product(ddim), "input vector'length should be equal to tensor's length"); + auto input_ptr = mutable_data(ddim); for (int i = 0; i < input.size(); ++i) { input_ptr[i] = input[i]; @@ -95,46 +54,6 @@ class Tensor { this->offset_ = inTensor.offset_; } - /*! Return a pointer to mutable memory block. */ - template - inline T *data() { - check_memory_size(); - PADDLE_MOBILE_ENFORCE( - (std::is_same::value || - holder_->type().hash_code() == typeid(T).hash_code()), - "Tensor holds the wrong type, it holds %s", - this->holder_->type().name()); - - return reinterpret_cast(reinterpret_cast(holder_->ptr()) + - offset_); - } - - /*! Return a pointer to constant memory block. */ - template - inline const T *data() const { - check_memory_size(); - PADDLE_MOBILE_ENFORCE( - (std::is_same::value || - holder_->type().hash_code() == typeid(T).hash_code()), - "Tensor holds the wrong type, it holds %s ,requested:%s", - this->holder_->type().name(), typeid(T).name()); - - return reinterpret_cast( - reinterpret_cast(holder_->ptr()) + offset_); - } - - inline bool IsInitialized() const { return holder_ != nullptr; } - - /** - * @brief Return a pointer to mutable memory block. - * @note If not exist, then allocation. - */ - template - inline T *mutable_data() { - static_assert(std::is_pod::value, "T must be POD"); - return reinterpret_cast(mutable_data(typeid(T))); - } - #ifdef PADDLE_MOBILE_DEBUG template inline void dump(std::string filename) const { @@ -151,6 +70,21 @@ class Tensor { } #endif + /*! Resize the dimensions of the memory block. */ + inline Tensor &Resize(const DDim &dims) { + dims_ = dims; + return *this; + } + + /*! The internal of two tensors share the same memory block. */ + inline Tensor &ShareDataWith(const Tensor &src) { + src.check_memory_size(); + if (holder_.get() != src.holder_.get()) { + *this = src; + } + return *this; + } + inline void *mutable_data(std::type_index type) { if (holder_ != nullptr) { holder_->set_type(type); @@ -165,6 +99,16 @@ class Tensor { reinterpret_cast(holder_->ptr()) + offset_); } + /** + * @brief Return a pointer to mutable memory block. + * @note If not exist, then allocation. + */ + template + inline T *mutable_data() { + static_assert(std::is_pod::value, "T must be POD"); + return reinterpret_cast(mutable_data(typeid(T))); + } + /** * @brief Return a pointer to mutable memory block. * @@ -180,27 +124,6 @@ class Tensor { return mutable_data(); } - /*! Return the dimensions of the memory block. */ - inline const DDim &dims() const { return dims_; } - - /*! Return the numel of the memory block. */ - inline int64_t numel() const { return product(dims_); } - - /*! Resize the dimensions of the memory block. */ - inline Tensor &Resize(const DDim &dims) { - dims_ = dims; - return *this; - } - - /*! The internal of two tensors share the same memory block. */ - inline Tensor &ShareDataWith(const Tensor &src) { - src.check_memory_size(); - if (holder_.get() != src.holder_.get()) { - *this = src; - } - return *this; - } - /** * @brief Return a sub-tensor of the given tensor. * @@ -234,44 +157,7 @@ class Tensor { } } - std::type_index type() const { - PADDLE_MOBILE_ENFORCE( - holder_ != nullptr, - "Tensor not initialized yet when Tensor::type() is called.") - return holder_->type(); - } - - // memory size returns the holding memory size in byte. - size_t memory_size() const { - return holder_ == nullptr ? 0UL : holder_->size() - offset_; - } - - inline void check_memory_size() const { - PADDLE_MOBILE_ENFORCE( - holder_ != nullptr, - "Tensor holds no memory. Call Tensor::mutable_data first."); - PADDLE_MOBILE_ENFORCE(numel() * SizeOfType(type()) <= memory_size(), - "Tensor's dims_ is out of bound. "); - } - private: - /** - * @note Placeholder hides type T, so it doesn't appear as a - * template - * parameter of Variable. - */ - struct Placeholder { - virtual ~Placeholder() = default; - - virtual void *ptr() const = 0; - - virtual size_t size() const = 0; - - virtual std::type_index type() const = 0; - - virtual void set_type(std::type_index type) = 0; - }; - struct PlaceholderImpl : public Placeholder { PlaceholderImpl(size_t size, std::type_index type) : ptr_(static_cast(memory::Alloc(size)), @@ -299,30 +185,12 @@ class Tensor { std::type_index type_; }; - /*! holds the memory block if allocated. */ - std::shared_ptr holder_; - - /** - * @brief points to elements dimensions. - * - * @note dims_ do not indicate the memory block size. - */ - - DDim dims_; - - /** - * @brief A PlaceHolder may be shared by more than one tensor. - * - * @note Some of them may be slices of the others. So the offset_ - * is introduced here to indicate the byte offset between - * PlaceHolder::ptr_ and where the tensor data really - * begins. - */ - size_t offset_; #ifdef PADDLE_MOBILE_FPGA + public: inline void reset_data_ptr(void *p) { - ((PlaceholderImpl *)(holder_.get()))->ptr_.reset((uint8_t *)p); + (reinterpret_cast(holder_.get())) + ->ptr_.reset(reinterpret_cast(p)); } float scale[2]; // scale[0]= MAX/127.0, scale[1]= 127.0/MAX #endif diff --git a/src/framework/tensor_base.h b/src/framework/tensor_base.h new file mode 100644 index 0000000000..fe0c9116d4 --- /dev/null +++ b/src/framework/tensor_base.h @@ -0,0 +1,172 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include + +#include "common/enforce.h" +#include "common/types.h" +#include "framework/ddim.h" + +namespace paddle_mobile { +namespace framework { + +template +struct SizeOfTypeFunctor; + +template +struct SizeOfTypeFunctor { + size_t operator()(std::type_index type) const { + if (typeid(T).hash_code() == type.hash_code()) { + return sizeof(T); + } else { + return 0UL; + } + } +}; + +template <> +struct SizeOfTypeFunctor<> { + size_t operator()(std::type_index type) const { return 0UL; } +}; + +template +struct SizeOfTypeFunctor { + size_t operator()(std::type_index type) const { + SizeOfTypeFunctor head; + size_t head_size = head(type); + if (head_size != 0) { + return head_size; + } + SizeOfTypeFunctor tail; + return tail(type); + } +}; + +static inline size_t SizeOfType(std::type_index type) { + SizeOfTypeFunctor + functor; + size_t size = functor(type); + + PADDLE_MOBILE_ENFORCE(size != 0UL, "Cannot get size of type %s", type.name()); + return size; +} + +class TensorBase { + public: + virtual inline TensorBase &Resize(const DDim &dims) = 0; + + inline bool IsInitialized() const { return holder_ != nullptr; } + + virtual inline void *mutable_data(std::type_index type) = 0; + + /*! Return a pointer to mutable memory block. */ + template + inline T *data() { + check_memory_size(); + PADDLE_MOBILE_ENFORCE( + (std::is_same::value || + holder_->type().hash_code() == typeid(T).hash_code()), + "Tensor holds the wrong type, it holds %s", + this->holder_->type().name()); + + return reinterpret_cast(reinterpret_cast(holder_->ptr()) + + offset_); + } + + /*! Return a pointer to constant memory block. */ + template + inline const T *data() const { + check_memory_size(); + PADDLE_MOBILE_ENFORCE( + (std::is_same::value || + holder_->type().hash_code() == typeid(T).hash_code()), + "Tensor holds the wrong type, it holds %s ,requested:%s", + this->holder_->type().name(), typeid(T).name()); + + return reinterpret_cast( + reinterpret_cast(holder_->ptr()) + offset_); + } + + /*! Return the dimensions of the memory block. */ + inline const DDim &dims() const { return dims_; } + + /*! Return the numel of the memory block. */ + inline int64_t numel() const { return product(dims_); } + + std::type_index type() const { + PADDLE_MOBILE_ENFORCE( + holder_ != nullptr, + "Tensor not initialized yet when Tensor::type() is called.") + return holder_->type(); + } + + // memory size returns the holding memory size in byte. + size_t memory_size() const { + return holder_ == nullptr ? 0UL : holder_->size() - offset_; + } + + inline void check_memory_size() const { + PADDLE_MOBILE_ENFORCE( + holder_ != nullptr, + "Tensor holds no memory. Call Tensor::mutable_data first."); + PADDLE_MOBILE_ENFORCE(numel() * SizeOfType(type()) <= memory_size(), + "Tensor's dims_ is out of bound. "); + } + + protected: + /** + * @note Placeholder hides type T, so it doesn't appear as a + * template + * parameter of Variable. + */ + struct Placeholder { + virtual ~Placeholder() = default; + + virtual void *ptr() const = 0; + + virtual size_t size() const = 0; + + virtual std::type_index type() const = 0; + + virtual void set_type(std::type_index type) = 0; + }; + + /** + * @brief points to elements dimensions. + * + * @note dims_ do not indicate the memory block size. + */ + + DDim dims_; + + /*! holds the memory block if allocated. */ + std::shared_ptr holder_; + + /** + * @brief A PlaceHolder may be shared by more than one tensor. + * + * @note Some of them may be slices of the others. So the offset_ + * is introduced here to indicate the byte offset between + * PlaceHolder::ptr_ and where the tensor data really + * begins. + */ + size_t offset_ = 0; +}; + +} // namespace framework +} // namespace paddle_mobile -- GitLab