diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 4cb73b35646fca63485a5374678aac153a54d309..2a1dacedca8f1b81d155841561bd5d5a16ca9344 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1195,20 +1195,6 @@ USE_TRT_CONVERTER(clip); namespace paddle_infer { -void Tensor::Reshape(const std::vector &shape) { tensor_->Reshape(shape); } - -std::vector Tensor::shape() const { return tensor_->shape(); } - -void Tensor::SetLoD(const std::vector> &x) { - return tensor_->SetLoD(x); -} - -std::vector> Tensor::lod() const { return tensor_->lod(); } - -const std::string &Tensor::name() const { return tensor_->name(); } - -DataType Tensor::type() const { return tensor_->type(); } - Predictor::Predictor(const Config &config) { const_cast(&config)->SwitchUseFeedFetchOps(false); // The second parameter indicates that the discard log is not printed @@ -1221,9 +1207,7 @@ std::vector Predictor::GetInputNames() { } std::unique_ptr Predictor::GetInputHandle(const std::string &name) { - auto zero_copy_tensor = predictor_->GetInputTensor(name); - std::unique_ptr tensor(new Tensor(std::move(zero_copy_tensor))); - return tensor; + return predictor_->GetInputTensor(name); } std::vector Predictor::GetOutputNames() { @@ -1231,9 +1215,7 @@ std::vector Predictor::GetOutputNames() { } std::unique_ptr Predictor::GetOutputHandle(const std::string &name) { - auto zero_copy_tensor = predictor_->GetOutputTensor(name); - std::unique_ptr tensor(new Tensor(std::move(zero_copy_tensor))); - return tensor; + return predictor_->GetOutputTensor(name); } bool Predictor::Run() { return predictor_->ZeroCopyRun(); } diff --git a/paddle/fluid/inference/api/details/CMakeLists.txt b/paddle/fluid/inference/api/details/CMakeLists.txt index 80b53b32a8607b4e67f42ba30bd1a283c93ebed1..4341fb0a9ccd8822151d4660f5a0c22901e47122 100644 --- a/paddle/fluid/inference/api/details/CMakeLists.txt +++ b/paddle/fluid/inference/api/details/CMakeLists.txt @@ -16,3 +16,5 @@ cc_library(reset_tensor_array SRCS reset_tensor_array.cc DEPS lod_tensor scope) cc_library(zero_copy_tensor SRCS zero_copy_tensor.cc DEPS scope lod_tensor enforce) cc_library(zero_copy_tensor_dummy SRCS zero_copy_tensor_dummy.cc) + +cc_test(zero_copy_tensor_test SRCS zero_copy_tensor_test.cc DEPS paddle_inference_api) diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index 0ed7476bb61fe1985b1d5bc7204c1becd894b3d0..f7dbfd39cd26e6af40d7536d76fd031bee5a331c 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -18,126 +18,135 @@ #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/enforce.h" -namespace paddle { +namespace paddle_infer { -void ZeroCopyTensor::Reshape(const std::vector &shape) { +void Tensor::Reshape(const std::vector &shape) { PADDLE_ENFORCE_EQ( name_.empty(), false, - platform::errors::PreconditionNotMet( + paddle::platform::errors::PreconditionNotMet( "Need to SetName first, so that the corresponding tensor can " "be retrieved.")); PADDLE_ENFORCE_EQ(input_or_output_, true, - platform::errors::PermissionDenied( + paddle::platform::errors::PermissionDenied( "Can't reshape the output tensor, it is readonly")); - PADDLE_ENFORCE_NOT_NULL(scope_, platform::errors::PreconditionNotMet( - "The scope should not be nullptr.")); - auto *scope = static_cast(scope_); + auto *scope = static_cast(scope_); auto *var = scope->FindVar(name_); PADDLE_ENFORCE_NOT_NULL( - var, platform::errors::PreconditionNotMet( + var, paddle::platform::errors::PreconditionNotMet( "No tensor called [%s] in the runtime scope", name_)); - auto *tensor = var->GetMutable(); - tensor->Resize(framework::make_ddim(shape)); + auto *tensor = var->GetMutable(); + tensor->Resize(paddle::framework::make_ddim(shape)); } #define EAGER_GET_TENSOR \ if (!tensor_) { \ tensor_ = FindTensor(); \ } \ - auto *tensor = static_cast(tensor_); + auto *tensor = static_cast(tensor_); template -T *ZeroCopyTensor::mutable_data(PaddlePlace place) { +T *Tensor::mutable_data(PlaceType place) { EAGER_GET_TENSOR; PADDLE_ENFORCE_GT( tensor->numel(), 0, - platform::errors::PreconditionNotMet( - "You should call ZeroCopyTensor::Reshape(const std::vector " + paddle::platform::errors::PreconditionNotMet( + "You should call Tensor::Reshape(const std::vector " "&shape)" "function before retrieving mutable_data from input tensor.")); switch (static_cast(place)) { - case static_cast(PaddlePlace::kCPU): { - return tensor->mutable_data(platform::CPUPlace()); + case static_cast(PlaceType::kCPU): { + return tensor->mutable_data(paddle::platform::CPUPlace()); } - case static_cast(PaddlePlace::kGPU): { - return tensor->mutable_data(platform::CUDAPlace(device_)); + case static_cast(PlaceType::kGPU): { + return tensor->mutable_data(paddle::platform::CUDAPlace(device_)); + } + case static_cast(PlaceType::kXPU): { + return tensor->mutable_data(paddle::platform::XPUPlace(device_)); } default: - PADDLE_THROW(platform::errors::Unavailable("Unsupported place: %d", - static_cast(place))); + PADDLE_THROW(paddle::platform::errors::Unavailable( + "Only CPU / CUDA / XPU places is supported. The place `%d` is not " + "supported.", + static_cast(place))); break; } return nullptr; } template -T *ZeroCopyTensor::data(PaddlePlace *place, int *size) const { +T *Tensor::data(PlaceType *place, int *size) const { EAGER_GET_TENSOR; auto *res = tensor->data(); - if (platform::is_cpu_place(tensor->place())) { - *place = PaddlePlace::kCPU; - } else if (platform::is_gpu_place(tensor->place())) { - *place = PaddlePlace::kGPU; + if (paddle::platform::is_cpu_place(tensor->place())) { + *place = PlaceType::kCPU; + } else if (paddle::platform::is_gpu_place(tensor->place())) { + *place = PlaceType::kGPU; + } else if (paddle::platform::is_xpu_place(tensor->place())) { + *place = PlaceType::kXPU; } else { - *place = PaddlePlace::kUNK; + *place = PlaceType::kUNK; } *size = tensor->numel(); return res; } -PaddleDType ZeroCopyTensor::type() const { +DataType Tensor::type() const { EAGER_GET_TENSOR; auto type = tensor->type(); - if (type == framework::proto::VarType::FP32) { - return PaddleDType::FLOAT32; - } else if (type == framework::proto::VarType::INT64) { - return PaddleDType::INT64; - } else if (type == framework::proto::VarType::INT32) { - return PaddleDType::INT32; - } else if (type == framework::proto::VarType::UINT8) { - return PaddleDType::UINT8; + if (type == paddle::framework::proto::VarType::FP32) { + return DataType::FLOAT32; + } else if (type == paddle::framework::proto::VarType::INT64) { + return DataType::INT64; + } else if (type == paddle::framework::proto::VarType::INT32) { + return DataType::INT32; + } else if (type == paddle::framework::proto::VarType::UINT8) { + return DataType::UINT8; } - return PaddleDType::FLOAT32; + return DataType::FLOAT32; } template -void ZeroCopyTensor::copy_from_cpu(const T *data) { +void Tensor::CopyFromCpu(const T *data) { EAGER_GET_TENSOR; PADDLE_ENFORCE_GE(tensor->numel(), 0, - platform::errors::PreconditionNotMet( - "You should call ZeroCopyTensor::Reshape(const " + paddle::platform::errors::PreconditionNotMet( + "You should call Tensor::Reshape(const " "std::vector &shape)" "function before copying data from cpu.")); size_t ele_size = tensor->numel() * sizeof(T); - if (place_ == PaddlePlace::kCPU) { - auto *t_data = tensor->mutable_data(platform::CPUPlace()); + if (place_ == PlaceType::kCPU) { + auto *t_data = tensor->mutable_data(paddle::platform::CPUPlace()); std::memcpy(static_cast(t_data), data, ele_size); - } else if (place_ == PaddlePlace::kGPU) { + } else if (place_ == PlaceType::kGPU) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - platform::CUDAPlace gpu_place(device_); + paddle::platform::DeviceContextPool &pool = + paddle::platform::DeviceContextPool::Instance(); + paddle::platform::CUDAPlace gpu_place(device_); auto *t_data = tensor->mutable_data(gpu_place); - auto *dev_ctx = - static_cast(pool.Get(gpu_place)); + auto *dev_ctx = static_cast( + pool.Get(gpu_place)); - memory::Copy(gpu_place, static_cast(t_data), platform::CPUPlace(), - data, ele_size, dev_ctx->stream()); + paddle::memory::Copy(gpu_place, static_cast(t_data), + paddle::platform::CPUPlace(), data, ele_size, + dev_ctx->stream()); #else - PADDLE_THROW(platform::errors::Unavailable( - "Not compiled with CUDA, should not reach here.")); + PADDLE_THROW(paddle::platform::errors::Unavailable( + "Can not create tensor with CUDA place because paddle is not compiled " + "with CUDA.")); #endif - } else if (place_ == PaddlePlace::kXPU) { + } else if (place_ == PlaceType::kXPU) { #ifdef PADDLE_WITH_XPU - platform::XPUPlace xpu_place(device_); + paddle::platform::XPUPlace xpu_place(device_); auto *t_data = tensor->mutable_data(xpu_place); - memory::Copy(xpu_place, static_cast(t_data), platform::CPUPlace(), - data, ele_size); + paddle::memory::Copy(xpu_place, static_cast(t_data), + paddle::platform::CPUPlace(), data, ele_size); #else - PADDLE_THROW(platform::errors::Unavailable( - "Not compiled with XPU, should not reach here.")); + PADDLE_THROW(paddle::platform::errors::Unavailable( + "Can not create tensor with XPU place because paddle is not compiled " + "with XPU.")); #endif } else { PADDLE_THROW(paddle::platform::errors::InvalidArgument( @@ -146,119 +155,119 @@ void ZeroCopyTensor::copy_from_cpu(const T *data) { } template -void ZeroCopyTensor::copy_to_cpu(T *data) { +void Tensor::CopyToCpu(T *data) { EAGER_GET_TENSOR; auto ele_num = tensor->numel(); auto *t_data = tensor->data(); auto t_place = tensor->place(); - if (platform::is_cpu_place(t_place)) { + if (paddle::platform::is_cpu_place(t_place)) { std::memcpy(static_cast(data), t_data, ele_num * sizeof(T)); - } else if (place_ == PaddlePlace::kGPU) { + } else if (place_ == PlaceType::kGPU) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - auto gpu_place = BOOST_GET_CONST(platform::CUDAPlace, t_place); - auto *dev_ctx = - static_cast(pool.Get(gpu_place)); - memory::Copy(platform::CPUPlace(), static_cast(data), gpu_place, - t_data, ele_num * sizeof(T), dev_ctx->stream()); + paddle::platform::DeviceContextPool &pool = + paddle::platform::DeviceContextPool::Instance(); + auto gpu_place = BOOST_GET_CONST(paddle::platform::CUDAPlace, t_place); + auto *dev_ctx = static_cast( + pool.Get(gpu_place)); + paddle::memory::Copy(paddle::platform::CPUPlace(), + static_cast(data), gpu_place, t_data, + ele_num * sizeof(T), dev_ctx->stream()); #ifdef PADDLE_WITH_HIP hipStreamSynchronize(dev_ctx->stream()); #else cudaStreamSynchronize(dev_ctx->stream()); #endif #else - PADDLE_THROW(platform::errors::Unavailable( - "Not compile with CUDA, should not reach here.")); + PADDLE_THROW(paddle::platform::errors::Unavailable( + "Can not create tensor with CUDA place because paddle is not compiled " + "with CUDA.")); #endif - } else if (place_ == PaddlePlace::kXPU) { + } else if (place_ == PlaceType::kXPU) { #ifdef PADDLE_WITH_XPU - auto xpu_place = BOOST_GET_CONST(platform::XPUPlace, t_place); - memory::Copy(platform::CPUPlace(), static_cast(data), xpu_place, - t_data, ele_num * sizeof(T)); + auto xpu_place = BOOST_GET_CONST(paddle::platform::XPUPlace, t_place); + paddle::memory::Copy(paddle::platform::CPUPlace(), + static_cast(data), xpu_place, t_data, + ele_num * sizeof(T)); #else - PADDLE_THROW(platform::errors::Unavailable( - "Not compile with XPU, should not reach here.")); + PADDLE_THROW(paddle::platform::errors::Unavailable( + "Can not create tensor with XPU place because paddle is not compiled " + "with XPU.")); #endif } else { PADDLE_THROW(paddle::platform::errors::InvalidArgument( "The analysis predictor supports CPU, GPU and XPU now.")); } } -template PD_INFER_DECL void ZeroCopyTensor::copy_from_cpu( - const float *data); -template PD_INFER_DECL void ZeroCopyTensor::copy_from_cpu( - const int64_t *data); -template PD_INFER_DECL void ZeroCopyTensor::copy_from_cpu( - const int32_t *data); -template PD_INFER_DECL void ZeroCopyTensor::copy_from_cpu( - const uint8_t *data); -template PD_INFER_DECL void ZeroCopyTensor::copy_from_cpu( - const int8_t *data); +template PD_INFER_DECL void Tensor::CopyFromCpu(const float *data); +template PD_INFER_DECL void Tensor::CopyFromCpu(const int64_t *data); +template PD_INFER_DECL void Tensor::CopyFromCpu(const int32_t *data); +template PD_INFER_DECL void Tensor::CopyFromCpu(const uint8_t *data); +template PD_INFER_DECL void Tensor::CopyFromCpu(const int8_t *data); + +template PD_INFER_DECL void Tensor::CopyToCpu(float *data); +template PD_INFER_DECL void Tensor::CopyToCpu(int64_t *data); +template PD_INFER_DECL void Tensor::CopyToCpu(int32_t *data); +template PD_INFER_DECL void Tensor::CopyToCpu(uint8_t *data); +template PD_INFER_DECL void Tensor::CopyToCpu(int8_t *data); -template PD_INFER_DECL void ZeroCopyTensor::copy_to_cpu(float *data); -template PD_INFER_DECL void ZeroCopyTensor::copy_to_cpu(int64_t *data); -template PD_INFER_DECL void ZeroCopyTensor::copy_to_cpu(int32_t *data); -template PD_INFER_DECL void ZeroCopyTensor::copy_to_cpu(uint8_t *data); -template PD_INFER_DECL void ZeroCopyTensor::copy_to_cpu(int8_t *data); +template PD_INFER_DECL float *Tensor::data(PlaceType *place, + int *size) const; +template PD_INFER_DECL int64_t *Tensor::data(PlaceType *place, + int *size) const; +template PD_INFER_DECL int32_t *Tensor::data(PlaceType *place, + int *size) const; +template PD_INFER_DECL uint8_t *Tensor::data(PlaceType *place, + int *size) const; +template PD_INFER_DECL int8_t *Tensor::data(PlaceType *place, + int *size) const; -template PD_INFER_DECL float *ZeroCopyTensor::data(PaddlePlace *place, - int *size) const; -template PD_INFER_DECL int64_t *ZeroCopyTensor::data( - PaddlePlace *place, int *size) const; -template PD_INFER_DECL int32_t *ZeroCopyTensor::data( - PaddlePlace *place, int *size) const; -template PD_INFER_DECL uint8_t *ZeroCopyTensor::data( - PaddlePlace *place, int *size) const; -template PD_INFER_DECL int8_t *ZeroCopyTensor::data(PaddlePlace *place, - int *size) const; +template PD_INFER_DECL float *Tensor::mutable_data(PlaceType place); +template PD_INFER_DECL int64_t *Tensor::mutable_data(PlaceType place); +template PD_INFER_DECL int32_t *Tensor::mutable_data(PlaceType place); +template PD_INFER_DECL uint8_t *Tensor::mutable_data(PlaceType place); +template PD_INFER_DECL int8_t *Tensor::mutable_data(PlaceType place); -template PD_INFER_DECL float *ZeroCopyTensor::mutable_data( - PaddlePlace place); -template PD_INFER_DECL int64_t *ZeroCopyTensor::mutable_data( - PaddlePlace place); -template PD_INFER_DECL int32_t *ZeroCopyTensor::mutable_data( - PaddlePlace place); -template PD_INFER_DECL uint8_t *ZeroCopyTensor::mutable_data( - PaddlePlace place); -template PD_INFER_DECL int8_t *ZeroCopyTensor::mutable_data( - PaddlePlace place); +Tensor::Tensor(void *scope) : scope_{scope} { + PADDLE_ENFORCE_NOT_NULL(scope_, + paddle::platform::errors::PreconditionNotMet( + "The `scope` can not be nullptr. It should be " + "set to the pointer of scope.")); +} -void *ZeroCopyTensor::FindTensor() const { +void *Tensor::FindTensor() const { PADDLE_ENFORCE_EQ( name_.empty(), false, - platform::errors::PreconditionNotMet( + paddle::platform::errors::PreconditionNotMet( "Need to SetName first, so that the corresponding tensor can " "be retrieved.")); - PADDLE_ENFORCE_NOT_NULL(scope_, platform::errors::PreconditionNotMet( - "The scope should not be nullptr.")); - auto *scope = static_cast(scope_); + auto *scope = static_cast(scope_); auto *var = scope->FindVar(name_); PADDLE_ENFORCE_NOT_NULL( - var, platform::errors::PreconditionNotMet( + var, paddle::platform::errors::PreconditionNotMet( "No tensor called [%s] in the runtime scope", name_)); - auto *tensor = var->GetMutable(); + auto *tensor = var->GetMutable(); return tensor; } -std::vector ZeroCopyTensor::shape() const { +std::vector Tensor::shape() const { EAGER_GET_TENSOR; PADDLE_ENFORCE_NOT_NULL( - tensor_, platform::errors::PreconditionNotMet( + tensor_, paddle::platform::errors::PreconditionNotMet( "Not found tensor called %s in the scope", name_)); - return framework::vectorize(tensor->dims()); + return paddle::framework::vectorize(tensor->dims()); } -void ZeroCopyTensor::SetLoD(const std::vector> &x) { +void Tensor::SetLoD(const std::vector> &x) { EAGER_GET_TENSOR; - framework::LoD lod; + paddle::framework::LoD lod; for (auto &level : x) { lod.emplace_back(level); } tensor->set_lod(lod); } -std::vector> ZeroCopyTensor::lod() const { +std::vector> Tensor::lod() const { EAGER_GET_TENSOR; std::vector> res; for (auto &level : tensor->lod()) { @@ -267,4 +276,13 @@ std::vector> ZeroCopyTensor::lod() const { return res; } -} // namespace paddle +void Tensor::SetName(const std::string &name) { name_ = name; } + +const std::string &Tensor::name() const { return name_; } + +void Tensor::SetPlace(PlaceType place, int device) { + place_ = place; + device_ = device; +} + +} // namespace paddle_infer diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc b/paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc index ea90bc74533a3415eef1850d41ad38cad7c711b7..1f1be136103791bd29a4148a784007d1e9a31744 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc @@ -15,35 +15,35 @@ #include "paddle/fluid/inference/api/paddle_api.h" #include "paddle/fluid/inference/api/paddle_infer_declare.h" -namespace paddle { +namespace paddle_infer { -void ZeroCopyTensor::Reshape(const std::vector &shape) {} +void Tensor::Reshape(const std::vector &shape) {} template -T *ZeroCopyTensor::mutable_data(PaddlePlace place) { +T *Tensor::mutable_data(PlaceType place) { return nullptr; } template -T *ZeroCopyTensor::data(PaddlePlace *place, int *size) const { +T *Tensor::data(PlaceType *place, int *size) const { return nullptr; } -template PD_INFER_DECL float *ZeroCopyTensor::data(PaddlePlace *place, - int *size) const; -template PD_INFER_DECL int64_t *ZeroCopyTensor::data( - PaddlePlace *place, int *size) const; -template float *ZeroCopyTensor::mutable_data(PaddlePlace place); -template int64_t *ZeroCopyTensor::mutable_data(PaddlePlace place); +template PD_INFER_DECL float *Tensor::data(PlaceType *place, + int *size) const; +template PD_INFER_DECL int64_t *Tensor::data(PlaceType *place, + int *size) const; +template float *Tensor::mutable_data(PlaceType place); +template int64_t *Tensor::mutable_data(PlaceType place); -void *ZeroCopyTensor::FindTensor() const { return nullptr; } +void *Tensor::FindTensor() const { return nullptr; } -std::vector ZeroCopyTensor::shape() const { return {}; } +std::vector Tensor::shape() const { return {}; } -void ZeroCopyTensor::SetLoD(const std::vector> &x) {} +void Tensor::SetLoD(const std::vector> &x) {} -std::vector> ZeroCopyTensor::lod() const { +std::vector> Tensor::lod() const { return std::vector>(); } -} // namespace paddle +} // namespace paddle_infer diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor_test.cc b/paddle/fluid/inference/api/details/zero_copy_tensor_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..42f9259c52562d23fbd403f4cca69caea8bf7be1 --- /dev/null +++ b/paddle/fluid/inference/api/details/zero_copy_tensor_test.cc @@ -0,0 +1,138 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include +#include +#include +#include +#include + +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/inference/api/helper.h" +#include "paddle/fluid/inference/api/paddle_tensor.h" +#include "paddle/fluid/platform/place.h" + +namespace paddle_infer { + +struct TensorWrapper : public Tensor { + TensorWrapper(paddle_infer::PlaceType place, paddle::framework::Scope* scope, + const std::string& name) + : Tensor{static_cast(scope)} { + SetPlace(place, 0 /*device_id*/); + SetName(name); + input_or_output_ = true; + } +}; + +std::unique_ptr CreateTensor(paddle_infer::PlaceType place, + paddle::framework::Scope* scope, + const std::string& name) { + return std::unique_ptr(new TensorWrapper{place, scope, name}); +} + +template +struct RandomGenerator { + RandomGenerator(double min = (std::numeric_limits::min)(), + double max = (std::numeric_limits::max)()) + : dist_{static_cast(min), static_cast(max)} {} + T operator()() { return static_cast(dist_(random_engine_)); } + + private: + std::mt19937_64 random_engine_{std::random_device()()}; + std::uniform_real_distribution dist_; +}; + +template typename G> +bool FillRandomDataAndCheck(PlaceType place, size_t length, G&& generator, + float threshold = 10e-5) { + std::vector data_in(length); + std::generate(data_in.begin(), data_in.end(), std::forward>(generator)); + paddle::framework::Scope scope; + const std::string name{"name"}; + scope.Var(name); + auto tensor = CreateTensor(place, &scope, name); + tensor->CopyFromCpu(data_in.data()); + if (tensor->type() != paddle::inference::ConvertToPaddleDType( + paddle::framework::DataTypeTrait::DataType())) { + return false; + } + std::vector data_out(length); + tensor->CopyToCpu(data_out.data()); + for (size_t i = 0; i < length; ++i) { + if (std::abs(data_out[i] - data_out[i]) > threshold) { + return false; + } + } + return true; +} + +template +bool SetPlaceAndCheck(PlaceType place, size_t length) { + paddle::framework::Scope scope; + const std::string name{"name"}; + const std::vector> lod{{0, length}}; + scope.Var(name); + auto tensor = CreateTensor(place, &scope, name); + tensor->Reshape({static_cast(length)}); + tensor->mutable_data(place); + tensor->SetLoD(lod); + + PlaceType place_out{PlaceType::kUNK}; + int length_out{-1}; + tensor->data(&place_out, &length_out); + if (length_out != static_cast(length) || place_out != place) { + return false; + } + if (tensor->name() != name || tensor->lod() != lod) { + return false; + } + return true; +} + +bool FillRandomDataAndCheck(PlaceType place) { + const size_t length{RandomGenerator{1, 1000}()}; + VLOG(3) << "FillRandomDataAndCheck: length = " << length; + return FillRandomDataAndCheck(place, length, + RandomGenerator{}) && + FillRandomDataAndCheck(place, length, + RandomGenerator{}) && + FillRandomDataAndCheck(place, length, + RandomGenerator{}) && + FillRandomDataAndCheck(place, length, + RandomGenerator{}); +} + +bool SetPlaceAndCheck(PlaceType place) { + const size_t length{RandomGenerator{1, 1000}()}; + VLOG(3) << "SetPlaceAndCheck: length = " << length; + return SetPlaceAndCheck(place, length) && + SetPlaceAndCheck(place, length) && + SetPlaceAndCheck(place, length) && + SetPlaceAndCheck(place, length); +} + +TEST(Tensor, FillRandomDataAndCheck) { + ASSERT_TRUE(FillRandomDataAndCheck(PlaceType::kCPU)); + ASSERT_TRUE(SetPlaceAndCheck(PlaceType::kCPU)); +#ifdef PADDLE_WITH_CUDA + ASSERT_TRUE(FillRandomDataAndCheck(PlaceType::kGPU)); + ASSERT_TRUE(SetPlaceAndCheck(PlaceType::kGPU)); +#endif +} + +} // namespace paddle_infer diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index 061b83e1d1e4a5180b281c672cf81f6e6a0c51bd..14b968f5834da8618f6af16aa8c25e1d1baaae5e 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -58,6 +58,26 @@ constexpr PaddleDType PaddleTensorGetDType() { return PaddleDType::FLOAT32; } +inline PaddleDType ConvertToPaddleDType( + paddle::framework::proto::VarType::Type type) { + if (type == paddle::framework::proto::VarType::FP32) { + return PaddleDType::FLOAT32; + } else if (type == paddle::framework::proto::VarType::INT64) { + return PaddleDType::INT64; + } else if (type == paddle::framework::proto::VarType::INT32) { + return PaddleDType::INT32; + } else if (type == paddle::framework::proto::VarType::UINT8) { + return PaddleDType::UINT8; + } else { + PADDLE_THROW(paddle::platform::errors::Unimplemented( + "The paddle dtype convert function only supports FLOAT32, INT64, INT32 " + "and UINT8 now. But " + "we get %d here.", + static_cast(type))); + return PaddleDType::FLOAT32; + } +} + using paddle::framework::DataTypeToString; // Timer for timer diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h index c5893a23a496094bcb52a3a6fc14586540824bf6..3e92ffaf9dcbc9816cac4887513bb687657c63a3 100644 --- a/paddle/fluid/inference/api/paddle_api.h +++ b/paddle/fluid/inference/api/paddle_api.h @@ -29,19 +29,13 @@ #include #include "crypto/cipher.h" #include "paddle_infer_declare.h" // NOLINT +#include "paddle_tensor.h" // NOLINT /*! \namespace paddle */ namespace paddle { -/// \brief Paddle data type. -enum PaddleDType { - FLOAT32, - INT64, - INT32, - UINT8, - INT8, - // TODO(Superjomn) support more data types if needed. -}; +using PaddleDType = paddle_infer::DataType; +using PaddlePlace = paddle_infer::PlaceType; /// \brief Memory manager for PaddleTensor. /// @@ -162,8 +156,6 @@ struct PD_INFER_DECL PaddleTensor { std::vector> lod; ///< Tensor+LoD equals LoDTensor }; -enum class PaddlePlace { kUNK = -1, kCPU, kGPU, kXPU }; - /// \brief Represents an n-dimensional array of values. /// The ZeroCopyTensor is used to store the input or output of the network. /// Zero copy means that the tensor supports direct copy of host or device data @@ -172,79 +164,27 @@ enum class PaddlePlace { kUNK = -1, kCPU, kGPU, kXPU }; /// AnalysisPredictor. /// It is obtained through PaddlePredictor::GetinputTensor() /// and PaddlePredictor::GetOutputTensor() interface. -class PD_INFER_DECL ZeroCopyTensor { - public: - /// \brief Reset the shape of the tensor. - /// Generally it's only used for the input tensor. - /// Reshape must be called before calling mutable_data() or copy_from_cpu() - /// \param shape The shape to set. - void Reshape(const std::vector& shape); - - /// \brief Get the memory pointer in CPU or GPU with specific data type. - /// Please Reshape the tensor first before call this. - /// It's usually used to get input data pointer. - /// \param place The place of the tensor. - template - T* mutable_data(PaddlePlace place); - - /// \brief Get the memory pointer directly. - /// It's usually used to get the output data pointer. - /// \param[out] place To get the device type of the tensor. - /// \param[out] size To get the data size of the tensor. - /// \return The tensor data buffer pointer. - template - T* data(PaddlePlace* place, int* size) const; +class PD_INFER_DECL ZeroCopyTensor : public paddle_infer::Tensor { + public: /// \brief Copy the host memory to tensor data. /// It's usually used to set the input tensor data. /// \param data The pointer of the data, from which the tensor will copy. template - void copy_from_cpu(const T* data); - + void copy_from_cpu(const T* data) { + return CopyFromCpu(data); + } /// \brief Copy the tensor data to the host memory. /// It's usually used to get the output tensor data. /// \param[out] data The tensor will copy the data to the address. template - void copy_to_cpu(T* data); - - /// \brief Return the shape of the Tensor. - std::vector shape() const; - - /// \brief Set lod info of the tensor. - /// More about LOD can be seen here: - /// https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/lod_tensor.html#lodtensor - /// \param x the lod info. - void SetLoD(const std::vector>& x); - /// \brief Return the lod info of the tensor. - std::vector> lod() const; - /// \brief Return the name of the tensor. - const std::string& name() const { return name_; } - void SetPlace(PaddlePlace place, int device = -1) { - place_ = place; - device_ = device; + void copy_to_cpu(T* data) { + return CopyToCpu(data); } - /// \brief Return the data type of the tensor. - /// It's usually used to get the output tensor data type. - /// \return The data type of the tensor. - PaddleDType type() const; - - protected: - explicit ZeroCopyTensor(void* scope) : scope_{scope} {} - void SetName(const std::string& name) { name_ = name; } - void* FindTensor() const; - private: - std::string name_; - bool input_or_output_; friend class AnalysisPredictor; - void* scope_{nullptr}; - // The corresponding tensor pointer inside Paddle workspace is cached for - // performance. - mutable void* tensor_{nullptr}; - PaddlePlace place_; - PaddleDType dtype_; - int device_; + explicit ZeroCopyTensor(void* scope) : paddle_infer::Tensor{scope} {} }; /// \brief A Predictor for executing inference on a model. diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h index 2e1e3b822d164d995be95d3dad6a7752371b7636..a516abb1432ca89b9343567128faf0b1692822f8 100644 --- a/paddle/fluid/inference/api/paddle_inference_api.h +++ b/paddle/fluid/inference/api/paddle_inference_api.h @@ -42,97 +42,10 @@ limitations under the License. */ /// namespace paddle_infer { -using DataType = paddle::PaddleDType; -using PlaceType = paddle::PaddlePlace; + using PrecisionType = paddle::AnalysisConfig::Precision; using Config = paddle::AnalysisConfig; -/// -/// \class Tensor -/// -/// \brief Represents an n-dimensional array of values. -/// The Tensor is used to store the input or output of the network. -/// It is obtained through Predictor::GetinputHandle() -/// and Predictor::GetOutputHandle() interface. -/// -class PD_INFER_DECL Tensor { - public: - // Can only be created by predictor->GetInputHandle(cosnt std::string& name) - // or predictor->GetOutputHandle(cosnt std::string& name) - Tensor() = delete; - explicit Tensor(std::unique_ptr&& tensor) - : tensor_(std::move(tensor)) {} - - /// - /// \brief Reset the shape of the tensor. - /// Generally it's only used for the input tensor. - /// Reshape must be called before calling mutable_data() or CopyFromCpu() - /// \param shape The shape to set. - /// - void Reshape(const std::vector& shape); - - /// - /// \brief Copy the host memory to tensor data. - /// It's usually used to set the input tensor data. - /// \param data The pointer of the data, from which the tensor will copy. - /// - template - void CopyFromCpu(const T* data); - - /// - /// \brief Get the memory pointer in CPU or GPU with specific data type. - /// Please Reshape the tensor first before call this. - /// It's usually used to get input data pointer. - /// \param place The place of the tensor. - /// \return The tensor data buffer pointer. - /// - template - T* mutable_data(PlaceType place); - - /// - /// \brief Copy the tensor data to the host memory. - /// It's usually used to get the output tensor data. - /// \param[out] data The tensor will copy the data to the address. - /// - template - void CopyToCpu(T* data); - - /// - /// \brief Get the memory pointer directly. - /// It's usually used to get the output data pointer. - /// \param[out] place To get the device type of the tensor. - /// \param[out] size To get the data size of the tensor. - /// \return The tensor data buffer pointer. - /// - template - T* data(PlaceType* place, int* size) const; - - /// - /// \brief Set lod info of the tensor. - /// More about LOD can be seen here: - /// https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/lod_tensor.html#lodtensor - /// \param x the lod info. - /// - void SetLoD(const std::vector>& x); - - /// \brief Return the lod info of the tensor. - std::vector> lod() const; - - /// \brief Return the data type of the tensor. - /// It's usually used to get the output tensor data type. - /// \return The data type of the tensor. - DataType type() const; - - /// \brief Return the shape of the Tensor. - std::vector shape() const; - - /// \brief Return the name of the tensor. - const std::string& name() const; - - private: - std::unique_ptr tensor_; -}; - /// /// \class Predictor /// @@ -258,31 +171,7 @@ PD_INFER_DECL int GetNumBytesOfDataType(DataType dtype); PD_INFER_DECL std::string GetVersion(); PD_INFER_DECL std::string UpdateDllFlag(const char* name, const char* value); -template -void Tensor::CopyFromCpu(const T* data) { - tensor_->copy_from_cpu(data); -} - -template -void Tensor::CopyToCpu(T* data) { - return tensor_->copy_to_cpu(data); -} - -template -T* Tensor::mutable_data(PlaceType place) { - return tensor_->mutable_data(place); -} - -template -T* Tensor::data(PlaceType* place, int* size) const { - return tensor_->data(place, size); -} - -} // namespace paddle_infer - -namespace paddle_infer { namespace services { - /// /// \class PredictorPool /// @@ -308,4 +197,5 @@ class PD_INFER_DECL PredictorPool { std::vector> preds_; }; } // namespace services + } // namespace paddle_infer diff --git a/paddle/fluid/inference/api/paddle_tensor.h b/paddle/fluid/inference/api/paddle_tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..9c4e5858af3ad9f9910ebb8a7b052777afc8c305 --- /dev/null +++ b/paddle/fluid/inference/api/paddle_tensor.h @@ -0,0 +1,111 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle_infer_declare.h" // NOLINT + +namespace paddle_infer { + +/// \brief Paddle data type. +enum DataType { + FLOAT32, + INT64, + INT32, + UINT8, + INT8, + // TODO(Superjomn) support more data types if needed. +}; + +enum class PlaceType { kUNK = -1, kCPU, kGPU, kXPU }; + +/// \brief Represents an n-dimensional array of values. +/// The Tensor is used to store the input or output of the network. +/// Zero copy means that the tensor supports direct copy of host or device data +/// to device, +/// eliminating additional CPU copy. Tensor is only used in the +/// AnalysisPredictor. +/// It is obtained through PaddlePredictor::GetinputTensor() +/// and PaddlePredictor::GetOutputTensor() interface. +class PD_INFER_DECL Tensor { + public: + /// \brief Reset the shape of the tensor. + /// Generally it's only used for the input tensor. + /// Reshape must be called before calling mutable_data() or copy_from_cpu() + /// \param shape The shape to set. + void Reshape(const std::vector& shape); + + /// \brief Get the memory pointer in CPU or GPU with specific data type. + /// Please Reshape the tensor first before call this. + /// It's usually used to get input data pointer. + /// \param place The place of the tensor. + template + T* mutable_data(PlaceType place); + + /// \brief Get the memory pointer directly. + /// It's usually used to get the output data pointer. + /// \param[out] place To get the device type of the tensor. + /// \param[out] size To get the data size of the tensor. + /// \return The tensor data buffer pointer. + template + T* data(PlaceType* place, int* size) const; + + /// \brief Copy the host memory to tensor data. + /// It's usually used to set the input tensor data. + /// \param data The pointer of the data, from which the tensor will copy. + template + void CopyFromCpu(const T* data); + + /// \brief Copy the tensor data to the host memory. + /// It's usually used to get the output tensor data. + /// \param[out] data The tensor will copy the data to the address. + template + void CopyToCpu(T* data); + + /// \brief Return the shape of the Tensor. + std::vector shape() const; + + /// \brief Set lod info of the tensor. + /// More about LOD can be seen here: + /// https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/lod_tensor.html#lodtensor + /// \param x the lod info. + void SetLoD(const std::vector>& x); + /// \brief Return the lod info of the tensor. + std::vector> lod() const; + /// \brief Return the name of the tensor. + const std::string& name() const; + + /// \brief Return the data type of the tensor. + /// It's usually used to get the output tensor data type. + /// \return The data type of the tensor. + DataType type() const; + + protected: + explicit Tensor(void* scope); + void* FindTensor() const; + void SetPlace(PlaceType place, int device = -1); + void SetName(const std::string& name); + + std::string name_; + // The corresponding tensor pointer inside Paddle workspace is cached for + // performance. + mutable void* tensor_{nullptr}; + DataType dtype_; + bool input_or_output_; + void* scope_{nullptr}; + PlaceType place_; + int device_; +}; + +} // namespace paddle_infer