From 1fb93746561838bd14299a99026edde8fc398d02 Mon Sep 17 00:00:00 2001 From: superjomn Date: Tue, 23 Apr 2019 13:02:52 +0800 Subject: [PATCH] correct the running logic of host model - make the target wrapper for host works - code clean --- paddle/fluid/lite/CMakeLists.txt | 1 + paddle/fluid/lite/api/CMakeLists.txt | 2 +- paddle/fluid/lite/api/cxx_api.h | 2 +- paddle/fluid/lite/api/cxx_api_test.cc | 16 ++++++- paddle/fluid/lite/core/kernel.h | 7 +++ paddle/fluid/lite/core/memory.h | 32 +++++++------- paddle/fluid/lite/core/mir/ssa_graph.h | 3 -- paddle/fluid/lite/core/op_lite.cc | 1 + paddle/fluid/lite/core/program.h | 6 +++ paddle/fluid/lite/core/target_wrapper.h | 44 +++++++++++++++++-- paddle/fluid/lite/core/tensor.h | 4 +- paddle/fluid/lite/core/type_system.h | 36 +++++++++++++-- paddle/fluid/lite/cuda/target_wrapper.cc | 6 ++- paddle/fluid/lite/host/CMakeLists.txt | 1 + paddle/fluid/lite/host/target_wrapper.cc | 33 ++++++++++++++ .../fluid/lite/kernels/host/feed_compute.cc | 4 +- .../fluid/lite/kernels/host/fetch_compute.cc | 2 +- paddle/fluid/lite/kernels/host/mul_compute.cc | 29 ++++++------ .../fluid/lite/kernels/host/scale_compute.cc | 8 ++-- .../fluid/lite/model_parser/model_parser.cc | 2 +- paddle/fluid/lite/operators/feed_op.cc | 1 - paddle/fluid/lite/utils/macros.h | 7 +++ 22 files changed, 193 insertions(+), 54 deletions(-) create mode 100644 paddle/fluid/lite/host/CMakeLists.txt create mode 100644 paddle/fluid/lite/host/target_wrapper.cc diff --git a/paddle/fluid/lite/CMakeLists.txt b/paddle/fluid/lite/CMakeLists.txt index 804a0bcda35..98357922eb8 100644 --- a/paddle/fluid/lite/CMakeLists.txt +++ b/paddle/fluid/lite/CMakeLists.txt @@ -1,5 +1,6 @@ add_subdirectory(core) add_subdirectory(x86) +add_subdirectory(host) add_subdirectory(cuda) add_subdirectory(operators) add_subdirectory(kernels) diff --git a/paddle/fluid/lite/api/CMakeLists.txt b/paddle/fluid/lite/api/CMakeLists.txt index c2c40c36bcc..73989dda91b 100644 --- a/paddle/fluid/lite/api/CMakeLists.txt +++ b/paddle/fluid/lite/api/CMakeLists.txt @@ -1,3 +1,3 @@ -cc_library(cxx_api_lite SRCS cxx_api.cc DEPS scope_lite op_executor_lite host_kernels ops_lite optimizer_lite) +cc_library(cxx_api_lite SRCS cxx_api.cc DEPS scope_lite op_executor_lite host_kernels ops_lite optimizer_lite target_wrapper_host) cc_test(test_cxx_api_lite SRCS cxx_api_test.cc DEPS cxx_api_lite model_parser_lite) diff --git a/paddle/fluid/lite/api/cxx_api.h b/paddle/fluid/lite/api/cxx_api.h index 218f24c7a3c..d6b5f09dcbc 100644 --- a/paddle/fluid/lite/api/cxx_api.h +++ b/paddle/fluid/lite/api/cxx_api.h @@ -58,7 +58,7 @@ class Predictor { const Tensor* GetOutput(size_t offset) { auto* _fetch_list = program_->exec_scope()->FindVar("fetch"); CHECK(_fetch_list) << "no fatch variable in exec_scope"; - auto fetch_list = _fetch_list->Get>(); + auto& fetch_list = *_fetch_list->GetMutable>(); CHECK_LT(offset, fetch_list.size()) << "offset " << offset << " overflow"; return &fetch_list.at(offset); } diff --git a/paddle/fluid/lite/api/cxx_api_test.cc b/paddle/fluid/lite/api/cxx_api_test.cc index 515ef9c682e..43bbc69a920 100644 --- a/paddle/fluid/lite/api/cxx_api_test.cc +++ b/paddle/fluid/lite/api/cxx_api_test.cc @@ -28,8 +28,22 @@ TEST(CXXApi, test) { auto* input_tensor = predictor.GetInput(0); input_tensor->Resize({100, 100}); - input_tensor->mutable_data(); + auto* data = input_tensor->mutable_data(); + for (int i = 0; i < 100 * 100; i++) { + data[i] = i; + } + + LOG(INFO) << "input " << input_tensor; + LOG(INFO) << "input " << *input_tensor; + predictor.Run(); + + auto* out = predictor.GetOutput(0); + LOG(INFO) << out << " memory size " << out->memory_size(); + LOG(INFO) << "out " << out->data()[0]; + LOG(INFO) << "out " << out->data()[1]; + LOG(INFO) << "dims " << out->dims(); + LOG(INFO) << "out " << *out; } } // namespace lite diff --git a/paddle/fluid/lite/core/kernel.h b/paddle/fluid/lite/core/kernel.h index 9fb7b60b34e..9db4874d1bd 100644 --- a/paddle/fluid/lite/core/kernel.h +++ b/paddle/fluid/lite/core/kernel.h @@ -65,6 +65,13 @@ class KernelBase { virtual ~KernelBase() = default; + std::string DebugString() const { + std::stringstream ss; + ss << op_type() << ":" << TargetToStr(target()) << "/" + << PrecisionToStr(precision()) << "/" << DataLayoutToStr(layout()); + return ss.str(); + } + protected: std::unique_ptr context_; mutable operators::param_t param_; diff --git a/paddle/fluid/lite/core/memory.h b/paddle/fluid/lite/core/memory.h index ccfe412edad..6cb321c637b 100644 --- a/paddle/fluid/lite/core/memory.h +++ b/paddle/fluid/lite/core/memory.h @@ -21,18 +21,16 @@ namespace lite { static void* TargetMalloc(TargetType target, size_t size) { void* data{nullptr}; - switch (static_cast(target)) { - case static_cast(TargetType::kX86): - data = TargetWrapper::Malloc(size); + switch (target) { + case TargetType::kHost: + case TargetType::kX86: + data = TargetWrapper::Malloc(size); break; - case static_cast(TargetType::kCUDA): + case TargetType::kCUDA: data = TargetWrapper::Malloc(size); break; - case static_cast(TargetType::kHost): - data = TargetWrapper::Malloc(size); - break; default: - LOG(FATAL) << "Unknown type"; + LOG(FATAL) << "Unknown supported target " << TargetToStr(target); } return data; } @@ -52,17 +50,19 @@ static void TargetFree(TargetType target, void* data) { static void TargetCopy(TargetType target, void* dst, const void* src, size_t size) { - switch (static_cast(target)) { - case static_cast(TargetType::kX86): - case static_cast(TargetType::kHost): + switch (target) { + case TargetType::kX86: + case TargetType::kHost: TargetWrapper::MemcpySync(dst, src, size, IoDirection::DtoD); break; - case static_cast(TargetType::kCUDA): + case TargetType::kCUDA: TargetWrapper::MemcpySync(dst, src, size, IoDirection::DtoD); break; + default: + LOG(FATAL) << "unsupported type"; } } @@ -79,12 +79,10 @@ class Buffer { void ResetLazy(TargetType target, size_t size) { if (target != target_ || space_ < size) { Free(); + data_ = TargetMalloc(target, size); + target_ = target; + space_ = size; } - - if (size < space_) return; - target_ = target; - data_ = TargetMalloc(target, size); - space_ = size; } void ResizeLazy(size_t size) { ResetLazy(target_, size); } diff --git a/paddle/fluid/lite/core/mir/ssa_graph.h b/paddle/fluid/lite/core/mir/ssa_graph.h index 81b1d8565ef..070747f4e57 100644 --- a/paddle/fluid/lite/core/mir/ssa_graph.h +++ b/paddle/fluid/lite/core/mir/ssa_graph.h @@ -60,9 +60,6 @@ class SSAGraph : GraphBase { op->SetValidPlaces(valid_places); auto &new_node = node_storage_.back(); auto kernels = op->CreateKernels(valid_places); - for (auto &kernel : kernels) { - op->AttachKernel(kernel.get()); - } node_storage_.back().AsInstruct(op->op_type_, std::move(kernels), op, op->op_info()); diff --git a/paddle/fluid/lite/core/op_lite.cc b/paddle/fluid/lite/core/op_lite.cc index 1493b577c5d..86557ce0abc 100644 --- a/paddle/fluid/lite/core/op_lite.cc +++ b/paddle/fluid/lite/core/op_lite.cc @@ -29,6 +29,7 @@ std::vector> OpLite::CreateKernels( (kernel_type.empty() ? op_type_ : kernel_type), place.target, place.precision); for (auto &&it : ks) { + AttachKernel(it.get()); kernels.emplace_back(std::move(it)); } } diff --git a/paddle/fluid/lite/core/program.h b/paddle/fluid/lite/core/program.h index 8046a3473fc..a31a0eb0d5f 100644 --- a/paddle/fluid/lite/core/program.h +++ b/paddle/fluid/lite/core/program.h @@ -105,6 +105,11 @@ struct Instruction { void Run() { CHECK(op_); CHECK(kernel_); + LOG(INFO) << "running kernel> " << kernel_->DebugString(); + if (UNLIKELY(first_epoch_)) { + first_epoch_ = false; + op_->CheckShape(); + } op_->InferShape(); kernel_->Run(); } @@ -112,6 +117,7 @@ struct Instruction { private: std::shared_ptr op_; std::unique_ptr kernel_; + bool first_epoch_{true}; }; /* diff --git a/paddle/fluid/lite/core/target_wrapper.h b/paddle/fluid/lite/core/target_wrapper.h index c8b7aa18cb8..2852f582572 100644 --- a/paddle/fluid/lite/core/target_wrapper.h +++ b/paddle/fluid/lite/core/target_wrapper.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include #include #include @@ -138,11 +139,48 @@ class TargetWrapper { static void StreamSync(const stream_t& stream) {} - static void* Malloc(size_t size) { return new char[size]; } - static void Free(void* ptr) { delete[] static_cast(ptr); } + static void* Malloc(size_t size) { + LOG(FATAL) << "Unimplemented malloc for " << TargetToStr(Target); + return nullptr; + } + static void Free(void* ptr) { LOG(FATAL) << "Unimplemented"; } + + static void MemcpySync(void* dst, const void* src, size_t size, + IoDirection dir) { + LOG(FATAL) << "Unimplemented"; + } + static void MemcpyAsync(void* dst, const void* src, size_t size, + IoDirection dir, const stream_t& stream) { + MemcpySync(dst, src, size, dir); + } +}; + +// This interface should be specified by each kind of target. +template <> +class TargetWrapper { + public: + using stream_t = int; + using event_t = int; + + static size_t num_devices() { return 0; } + static size_t maximum_stream() { return 0; } + + static void CreateStream(stream_t* stream) {} + static void DestroyStream(const stream_t& stream) {} + + static void CreateEvent(event_t* event) {} + static void DestroyEvent(const event_t& event) {} + + static void RecordEvent(const event_t& event) {} + static void SyncEvent(const event_t& event) {} + + static void StreamSync(const stream_t& stream) {} + + static void* Malloc(size_t size); + static void Free(void* ptr); static void MemcpySync(void* dst, const void* src, size_t size, - IoDirection dir) {} + IoDirection dir); static void MemcpyAsync(void* dst, const void* src, size_t size, IoDirection dir, const stream_t& stream) { MemcpySync(dst, src, size, dir); diff --git a/paddle/fluid/lite/core/tensor.h b/paddle/fluid/lite/core/tensor.h index 864e5db9ecd..f78b9080139 100644 --- a/paddle/fluid/lite/core/tensor.h +++ b/paddle/fluid/lite/core/tensor.h @@ -95,13 +95,15 @@ class Tensor { dims_ = other.dims_; target_ = other.target_; lod_ = other.lod_; + memory_size_ = other.memory_size_; } void CopyDataFrom(const Tensor& other) { dims_ = other.dims_; target_ = other.target_; lod_ = other.lod_; - *buffer_ = *other.buffer_; + memory_size_ = other.memory_size_; + buffer_->CopyDataFrom(*other.buffer_, memory_size_); } TargetType target() const { return target_; } diff --git a/paddle/fluid/lite/core/type_system.h b/paddle/fluid/lite/core/type_system.h index e9aabee51f2..c8c0c0f5c12 100644 --- a/paddle/fluid/lite/core/type_system.h +++ b/paddle/fluid/lite/core/type_system.h @@ -38,9 +38,39 @@ namespace lite { // The DNN system is simple, and the architecture can not process that many data // types as a compiler, or that will turn out to a chaos. // -// We should make sure that supported data types should be registered here, and -// keep the quantity small. And avoid using some special data types as op's IO, -// such as some runtime cache, that need to be avoided. +// We should make sure that the supported data types be registered here, and +// keep the quantity small and avoid using some special data types as op's +// inputs or outputs, such as some runtime cache, those types can't be processed +// by the MIR. +// +// A tensor with different places(target, precision, data layout or device) +// should be treated as different types. Different types might be compatible +// with each other, for example, the `VoidTy` means any type, so any other types +// can be treated as a `VoidTy`. +// +// The Different Types can transform to others by adding some special +// transforming operators, for example, a DataLayoutTransformOp can convert a +// `TensorFp32NCHWTy` to a `TensorFp32NHWCTy`; a IoCopyOp can convert a +// `TensorFp32NCHWTy(kHost)` to `TensorFp32NCHWTy(kCUDA)`. There are many other +// convertions between different Types, but there are some unsupportted type +// convertions, for example, there is noway to convert a `UnsupportedTy` to a +// `TensorAnyTy`. +// +// We use Types to declare the definition of a kernel, each inputs' and outputs' +// arguments have a specific Types. +// +// REGISTER_LITE_KERNEL(mul, kHost, kFloat, +// paddle::lite::kernels::host::MulCompute, def) +// .BindInput("X", {paddle::lite::Type::Get( +// TARGET(kHost))}) +// .BindInput("Y", {paddle::lite::Type::Get( +// TARGET(kHost))}) +// .BindOutput("Out", +// {paddle::lite::Type::Get(TARGET(kHost))}) +// .Finalize(); +// +// The above definition will be used in MIR by Type inference and uncompatible +// types check. // // TODO(Superjomn) Add operator/kernel-wise static checking to avoid unsupported // type mixed in the system. diff --git a/paddle/fluid/lite/cuda/target_wrapper.cc b/paddle/fluid/lite/cuda/target_wrapper.cc index 21df004aa91..cca9a95d8e0 100644 --- a/paddle/fluid/lite/cuda/target_wrapper.cc +++ b/paddle/fluid/lite/cuda/target_wrapper.cc @@ -26,12 +26,14 @@ using TargetW = TargetWrapper; template <> void* TargetW::Malloc(size_t size) { - return new char[size]; + void* ptr{}; + CHECK_EQ(cudaSuccess, cudaMalloc(&ptr, size)); + return ptr; } template <> void TargetW::Free(void* ptr) { - delete[] static_cast(ptr); + CHECK_EQ(cudaSuccess, cudaFree(ptr)); } template <> diff --git a/paddle/fluid/lite/host/CMakeLists.txt b/paddle/fluid/lite/host/CMakeLists.txt new file mode 100644 index 00000000000..90812f3f3cd --- /dev/null +++ b/paddle/fluid/lite/host/CMakeLists.txt @@ -0,0 +1 @@ +cc_library(target_wrapper_host SRCS target_wrapper.cc) diff --git a/paddle/fluid/lite/host/target_wrapper.cc b/paddle/fluid/lite/host/target_wrapper.cc new file mode 100644 index 00000000000..2e8393ef466 --- /dev/null +++ b/paddle/fluid/lite/host/target_wrapper.cc @@ -0,0 +1,33 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/core/target_wrapper.h" +#include + +namespace paddle { +namespace lite { + +void* TargetWrapper::Malloc(size_t size) { + return new char[size]; +} +void TargetWrapper::Free(void* ptr) { + delete[] static_cast(ptr); +} +void TargetWrapper::MemcpySync(void* dst, const void* src, + size_t size, IoDirection dir) { + memcpy(dst, src, size); +} + +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/kernels/host/feed_compute.cc b/paddle/fluid/lite/kernels/host/feed_compute.cc index 3f8015614bf..d727f0c22ac 100644 --- a/paddle/fluid/lite/kernels/host/feed_compute.cc +++ b/paddle/fluid/lite/kernels/host/feed_compute.cc @@ -28,6 +28,8 @@ class FeedCompute : public OpKernel { auto ¶m = Param(); const Tensor &feed_item = param.feed_list->at(param.col); param.out->CopyDataFrom(feed_item); + LOG(INFO) << "FEED input " << feed_item << " col " << param.col; + LOG(INFO) << "FEED output " << *param.out; } }; @@ -40,6 +42,6 @@ REGISTER_LITE_KERNEL(feed, kHost, kFloat, paddle::lite::kernels::host::FeedCompute, def) .BindInput("X", {paddle::lite::Type::Get( TARGET(kHost))}) - .BindOutput("Out", {paddle::lite::Type::Get( + .BindOutput("Out", {paddle::lite::Type::Get( TARGET(kHost))}) .Finalize(); diff --git a/paddle/fluid/lite/kernels/host/fetch_compute.cc b/paddle/fluid/lite/kernels/host/fetch_compute.cc index 4bc71266ed2..b3193a01942 100644 --- a/paddle/fluid/lite/kernels/host/fetch_compute.cc +++ b/paddle/fluid/lite/kernels/host/fetch_compute.cc @@ -32,7 +32,7 @@ class FetchCompute : public OpKernel { } auto& dst = fetch_list->at(param.col); - dst.CopyDataFrom(*param.input); + dst.ShareDataWith(*param.input); } }; diff --git a/paddle/fluid/lite/kernels/host/mul_compute.cc b/paddle/fluid/lite/kernels/host/mul_compute.cc index b2062fa9308..ee7b168503a 100644 --- a/paddle/fluid/lite/kernels/host/mul_compute.cc +++ b/paddle/fluid/lite/kernels/host/mul_compute.cc @@ -40,22 +40,23 @@ class MulCompute : public OpKernel { using param_t = operators::MulParam; void Run() override { - auto& theparam = Param(); - core::dim2 x_shape( - {product(theparam.x->dims().begin(), - theparam.x->dims().begin() + theparam.x_num_col_dims), - product(theparam.x->dims().begin() + theparam.x_num_col_dims, - theparam.x->dims().end())}); + auto& param = Param(); + core::dim2 x_shape({product(param.x->dims().begin(), + param.x->dims().begin() + param.x_num_col_dims), + product(param.x->dims().begin() + param.x_num_col_dims, + param.x->dims().end())}); - core::dim2 y_shape( - {product(theparam.y->dims().begin(), - theparam.y->dims().begin() + theparam.x_num_col_dims), - product(theparam.y->dims().begin() + theparam.x_num_col_dims, - theparam.y->dims().end())}); + core::dim2 y_shape({product(param.y->dims().begin(), + param.y->dims().begin() + param.x_num_col_dims), + product(param.y->dims().begin() + param.x_num_col_dims, + param.y->dims().end())}); - mul_compute_eigen(theparam.x->data(), x_shape.x, x_shape.y, // - theparam.y->data(), y_shape.x, y_shape.y, // - theparam.output->mutable_data()); + mul_compute_eigen(param.x->data(), x_shape.x, x_shape.y, // + param.y->data(), y_shape.x, y_shape.y, // + param.output->mutable_data()); + LOG(INFO) << "MUL x " << *param.x; + LOG(INFO) << "MUL W " << *param.y; + LOG(INFO) << "MUL out " << *param.output; } virtual ~MulCompute() = default; diff --git a/paddle/fluid/lite/kernels/host/scale_compute.cc b/paddle/fluid/lite/kernels/host/scale_compute.cc index ebf6f2ff4b3..7ad6ab28818 100644 --- a/paddle/fluid/lite/kernels/host/scale_compute.cc +++ b/paddle/fluid/lite/kernels/host/scale_compute.cc @@ -36,10 +36,10 @@ class ScaleCompute : public OpKernel { using param_t = operators::MulParam; void Run() override { - auto& theparam = Param(); - scale_compute(theparam.x->data(), theparam.x->mutable_data(), - product(theparam.x->dims()), theparam.scale, theparam.bias, - theparam.bias_after_scale); + auto& param = Param(); + scale_compute(param.x->data(), param.output->mutable_data(), + product(param.x->dims()), param.scale, param.bias, + param.bias_after_scale); } virtual ~ScaleCompute() = default; diff --git a/paddle/fluid/lite/model_parser/model_parser.cc b/paddle/fluid/lite/model_parser/model_parser.cc index 63316d34cd0..feaff82d313 100644 --- a/paddle/fluid/lite/model_parser/model_parser.cc +++ b/paddle/fluid/lite/model_parser/model_parser.cc @@ -77,7 +77,7 @@ void TensorFromStream(std::istream &is, lite::Tensor *tensor) { DO(INT64, int64_t); #undef DO default: - LOG(FATAL) << "unknown type"; + LOG(FATAL) << "unknown type " << desc.data_type(); } is.read(static_cast(buf), size); diff --git a/paddle/fluid/lite/operators/feed_op.cc b/paddle/fluid/lite/operators/feed_op.cc index 041fe07f634..47876b55e76 100644 --- a/paddle/fluid/lite/operators/feed_op.cc +++ b/paddle/fluid/lite/operators/feed_op.cc @@ -51,7 +51,6 @@ class FeedOp : public OpLite { // NOTE need boost here // TODO(Superjomn) drop the need of framework::op_desc param_.col = boost::get(opdesc.GetAttr("col")); - kernel_->SetParam(param_); return true; } diff --git a/paddle/fluid/lite/utils/macros.h b/paddle/fluid/lite/utils/macros.h index 1115c71cd24..1861f20f839 100644 --- a/paddle/fluid/lite/utils/macros.h +++ b/paddle/fluid/lite/utils/macros.h @@ -21,3 +21,10 @@ #endif #define LITE_UNIMPLEMENTED CHECK(false) << "Not Implemented"; + +#ifndef LIKELY +#define LIKELY(x) __builtin_expect(!!(x), 1) +#endif +#ifndef UNLIKELY +#define UNLIKELY(x) __built_expect(!!(x), 0) +#endif -- GitLab