diff --git a/paddle/fluid/framework/data_type_transform.cc b/paddle/fluid/framework/data_type_transform.cc index e7abe21daeb5e12c8ce4b59545c0d467e72b4fa2..3ab653841545c5f405f4226466c2ffa28018584a 100644 --- a/paddle/fluid/framework/data_type_transform.cc +++ b/paddle/fluid/framework/data_type_transform.cc @@ -111,6 +111,16 @@ struct CastDataType { out_begin, CastDataTypeFunctor()); context->Wait(); +#endif +#if defined(PADDLE_WITH_IPU) + } else if (platform::is_ipu_place(in_.place())) { + platform::Transform trans; + auto* context = static_cast(ctx_); + trans(*context, + in_begin, + in_end, + out_begin, + CastDataTypeFunctor()); #endif } else { PADDLE_THROW(platform::errors::Unimplemented( diff --git a/paddle/fluid/framework/ir/node.cc b/paddle/fluid/framework/ir/node.cc index 51542fc80850f0d1d77052ac3cd4b29f40ce7012..fd33e0e53a1f2d827dc7db3c635b7e576a4b4ad8 100644 --- a/paddle/fluid/framework/ir/node.cc +++ b/paddle/fluid/framework/ir/node.cc @@ -18,7 +18,9 @@ namespace paddle { namespace framework { namespace ir { // msvc15 don't support constexpr in correct way. -#if !defined(_WIN32) +// static constexpr member implies inline since CXX17 and may cause multiple +// definition. +#if !defined(_WIN32) && (__cplusplus < 201703L) constexpr char Node::kControlDepVarName[]; #else const char Node::kControlDepVarName[] = "__control_var"; diff --git a/paddle/fluid/framework/ir/node.h b/paddle/fluid/framework/ir/node.h index 6fbbd47cc8e325018e980c6f302455f255c6bfa6..1381ea64d7b26c13e956b593c02c4992f9cc5c4d 100644 --- a/paddle/fluid/framework/ir/node.h +++ b/paddle/fluid/framework/ir/node.h @@ -64,7 +64,10 @@ class Node { enum class Type { kOperation, kVariable }; enum class Dep { kSame = 0, kBefore = 1, kAfter = 2, kNoDep = 3 }; -#if !defined(_WIN32) // msvc not support constexpr correctly. +// msvc not support constexpr correctly. +// static constexpr member implies inline since CXX17 and may cause multiple +// definition. +#if !defined(_WIN32) && (__cplusplus < 201703L) static constexpr char kControlDepVarName[] = "__control_var"; #else static const char kControlDepVarName[]; diff --git a/paddle/fluid/platform/device/ipu/CMakeLists.txt b/paddle/fluid/platform/device/ipu/CMakeLists.txt index 29f2a2955e0c22d03a6e1cc0914c34e4187dce3c..a4a6db37837063cc994597f15e9e23f07d7b71fe 100644 --- a/paddle/fluid/platform/device/ipu/CMakeLists.txt +++ b/paddle/fluid/platform/device/ipu/CMakeLists.txt @@ -23,7 +23,7 @@ if(WITH_IPU) endforeach() endforeach() - set(IPU_BACKEND_SRC "ipu_strategy.cc" "ipu_executor.cc" "ipu_compiler.cc" + set(IPU_BACKEND_SRC "ipu_strategy.cc" "ipu_compiler.cc" "ipu_executor.cc" "ipu_backend.cc" "ipu_utils.cc") set(IPU_INFO_SRC "ipu_info.cc" "ipu_device.cc") @@ -34,7 +34,20 @@ if(WITH_IPU) cc_library( ipu_backend SRCS ${IPU_BACKEND_SRC} - DEPS popart-only graph graph_helper popdist popart_canonicalization) + DEPS popart-only + graph + graph_helper + popdist + popef + model_runtime + popart_canonicalization) + + # magic here is model_runtime requires CXX17 and uses std::optional while popart uses CXX14 + # and nonstd::optional. A manually macro setting is required here to solve symbol conflict. + target_compile_features(ipu_backend PRIVATE cxx_std_17) + target_compile_definitions(ipu_backend + PRIVATE optional_CONFIG_SELECT_OPTIONAL=1) + cc_library( ipu_info SRCS ${IPU_INFO_SRC} diff --git a/paddle/fluid/platform/device/ipu/ipu_backend.cc b/paddle/fluid/platform/device/ipu/ipu_backend.cc index fb8686601a196529c669494875afadfdec27dd6d..130151f028ad578797068ae24723e3459de2ca23 100644 --- a/paddle/fluid/platform/device/ipu/ipu_backend.cc +++ b/paddle/fluid/platform/device/ipu/ipu_backend.cc @@ -76,7 +76,11 @@ void IpuBackend::Run(const std::vector& inputs, const std::vector& outputs, const framework::ExecutionContext& ctx) { timer_->Start(); - executor_->Run(inputs, outputs, ctx); + if (ipu_strategy_->enable_model_runtime_executor) { + executor_->RunPopef(inputs, outputs, ctx); + } else { + executor_->Run(inputs, outputs, ctx); + } timer_->Pause(); VLOG(10) << "[IPU Run]: " << timer_->ElapsedMS() << " (ms)"; } diff --git a/paddle/fluid/platform/device/ipu/ipu_executor.cc b/paddle/fluid/platform/device/ipu/ipu_executor.cc index f3ffd8f5d94d7849333ad02b3f1ba55275b01dcf..ee749f3a19a15dbb663700776db448becd9ff1ab 100644 --- a/paddle/fluid/platform/device/ipu/ipu_executor.cc +++ b/paddle/fluid/platform/device/ipu/ipu_executor.cc @@ -14,9 +14,11 @@ limitations under the License. */ #include "paddle/fluid/platform/device/ipu/ipu_executor.h" +#include #include #include +#include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/device/ipu/ipu_compiler.h" #include "paddle/fluid/platform/device/ipu/ipu_names.h" @@ -28,6 +30,13 @@ namespace ipu { namespace { +model_runtime::AnchorCallbackPredicate PredFilterMain( + const model_runtime::Session *session) { + // Create predicates for binding Anchors from Main programs only + return model_runtime::predicate_factory::predProgramFlowMain( + session->model()->metadata.programFlow()); +} + // Get paddle prefix and popart postfix of weight states // Format: {popart_postfix, paddle_prefix} std::vector> GetOptPrePostfix( @@ -156,6 +165,10 @@ void Executor::Prepare(const std::string &proto) { VLOG(10) << "Setting random seed to: " << ipu_strategy_->random_seed; session_->setRandomSeed(ipu_strategy_->random_seed); } + enable_model_runtime_executor_ = ipu_strategy_->enable_model_runtime_executor; + if (enable_model_runtime_executor_) { + PreparePopefSession(); + } } void Executor::Run(const std::vector &inputs, @@ -234,6 +247,208 @@ void Executor::Run(const std::vector &inputs, VLOG(10) << "Running...done"; } +void Executor::PreparePopefSession() { + VLOG(10) << "enter Executor::PreparePopefSession"; + if (popef_session_) { + VLOG(10) << "popef: previous popef model is not released, reset resources."; + ResetPopef(); + } + auto popef_model = PopartSessionToPopefModel(session_.get()); + + auto num_buffers = ipu_strategy_->num_buffers; + + // convert timeout_ms to timeout_ns + const std::chrono::nanoseconds timeout_ns( + int64_t(ipu_strategy_->timeout_ms * 1000000)); + + // prepare popef session + model_runtime::SessionConfig config; + config.policy = model_runtime::LaunchPolicy::Immediate; + + popef_session_ = + std::make_unique(popef_model, config); + + // prepare queue_manager + auto timeout_cb = [this](model_runtime::InputRingBuffer *buffer) { + VLOG(10) << "ModelRuntmie timeout callback is called."; + std::unique_lock lock(this->queue_mutex_); + if (buffer->readAvailable()) { + return; + } + this->queue_manager_->flushAll(); + }; + + queue_manager_ = + popef_session_->createQueueManager(num_buffers, + timeout_cb, + timeout_ns, + PredFilterMain(popef_session_.get()), + PredFilterMain(popef_session_.get())); + + // prepare program + popef_session_->runLoadPrograms(); + + main_program_ = std::thread([&]() { + while (!stop_.load()) { + VLOG(13) << "popef: Run main program"; + popef_session_->runMainPrograms(); + } + }); + + // Detach device from popart session + Detach(); +} + +void Executor::RunPopef(const std::vector &inputs, + const std::vector &outputs, + const framework::ExecutionContext &ctx) { + VLOG(10) << "enter Executor::RunPopef"; + + auto input_names = ctx.InputNames("FeedList"); + auto output_names = ctx.OutputNames("FetchList"); + + int batch_size = 0; + bool auto_batch = (ipu_strategy_->timeout_ms != 0); + + auto tensor_check = [&](const Tensor *tensor, + const popef::TensorInfo &info, + int *batch_size, + Tensor *cast_tensor) { + // check dtype + auto popef_phi_dtype = PopefDtype2PhiDtype(info.dataType()); + bool casted = false; + + if (popef_phi_dtype != tensor->dtype()) { + // popart may do some implicit conversion, int64->int32 for example, cast + // is needed in some case. + VLOG(10) << "Cast paddle input type " << tensor->dtype() << " to " + << popef_phi_dtype; + framework::TransDataType( + *tensor, PopefDType2VarType(info.dataType()), cast_tensor); + casted = true; + } + + // check size + auto popef_input_shape = info.shape(); + if (popef_input_shape.size() != tensor->dims().size()) { + PADDLE_THROW( + errors::Fatal("Incompatible size between paddle and popef.")); + } + + for (int i = 1; i < popef_input_shape.size(); ++i) { + PADDLE_ENFORCE_EQ( + popef_input_shape[i], + tensor->dims().at(i), + errors::InvalidArgument("Invalid tensor size at dim %s. " + "popef expecting %s but received %s ", + i, + popef_input_shape[i], + tensor->dims().at(i))); + } + + // check batch_size + if (!auto_batch) { + // disable auto batching + PADDLE_ENFORCE_EQ( + popef_input_shape[0], + tensor->dims().at(0), + errors::InvalidArgument( + "Batch size doesn't equal between paddle and popef.")); + } else { + // enable auto batching + bool is_single_batch = ipu_strategy_->micro_batch_size == 1; + if (*batch_size == 0) { + // retrieve batch_size + *batch_size = is_single_batch ? 1 : tensor->dims().at(0); + } else if (!is_single_batch) { + // input/output should have batch info when enable auto batch. + PADDLE_ENFORCE_EQ(*batch_size, + tensor->dims().at(0), + errors::InvalidArgument( + "batch size should be equal for each tensor")); + } + } + return casted; + }; + + const auto &session_inputs = popef_session_->getUserInputAnchors(); + std::vector cast_tensor(inputs.size()); + const auto &session_outputs = popef_session_->getUserOutputAnchors(); + + // ModelRuntime::Queue is not thread safety. + std::unique_lock lock(queue_mutex_); + for (size_t i = 0; i < inputs.size(); ++i) { + const auto &popef_input_name = + compiler_resources_->tensors.at(input_names[i]); + auto &elem_queue = queue_manager_->inputQueue(popef_input_name); + const auto &info = elem_queue.tensorInfo(); + VLOG(10) << "popef: handle popef input: " << popef_input_name + << " mapped with paddle " << input_names[i]; + + bool casted = tensor_check(inputs[i], info, &batch_size, &(cast_tensor[i])); + + const void *data = casted ? cast_tensor[i].data() : inputs[i]->data(); + const auto size = + casted ? cast_tensor[i].memory_size() : inputs[i]->memory_size(); + + elem_queue.enqueue(data, size, [popef_input_name]() { + VLOG(10) << "popef: enqueued data for input: " << popef_input_name; + }); + } + + std::vector> finish_indicators; + finish_indicators.reserve(session_outputs.size()); + + for (size_t i = 0; i < session_outputs.size(); ++i) { + const auto &popef_output_name = + compiler_resources_->tensors.at(output_names[i]); + auto &out_queue = queue_manager_->outputQueue(popef_output_name); + const auto &info = out_queue.tensorInfo(); + VLOG(10) << "popef: handle popef output: " << popef_output_name + << " mapped with paddle " << output_names[i]; + + auto popef_dtype = info.dataType(); + auto paddle_dtype = PopefDType2VarType(popef_dtype); + auto output_shape = info.shape(); + if (auto_batch) { + if (output_shape[0] == ipu_strategy_->micro_batch_size) { + output_shape[0] = batch_size; + } else { + // shape of output must have batch info when when auto batch enabled + PADDLE_THROW(platform::errors::Unimplemented( + "Auto batch doesn't support the tensor with no batch info. " + "Expected batch size in output tensor: %d should equal to " + "micro batch size: %d. Please make sure batch size is set " + "correctly in both IPU program compiling and IpuStrategy.", + output_shape[0], + ipu_strategy_->micro_batch_size)); + } + } + + auto *tensor = outputs[i]; + // resize output size to make data_ptr valid. + tensor->Resize(phi::make_ddim(output_shape)); + tensor->mutable_data(ctx.GetPlace(), + framework::TransToPhiDataType(paddle_dtype)); + + const auto size = tensor->memory_size(); + + auto promise = std::make_shared>(); + finish_indicators.emplace_back(promise->get_future()); + out_queue.enqueue(tensor->data(), size, [popef_output_name, promise]() { + VLOG(10) << "popef: received output: " << popef_output_name; + promise->set_value(); + }); + } + lock.unlock(); + + // Synchronous waiting outputs. Asynchronous execution is not supported since + // python api calling is synchronous and output data is copied outside. + for (const auto &indicator : finish_indicators) { + indicator.wait(); + } +} + void Executor::WeightsToHost() { if (ipu_strategy_->is_training && session_) { WeightsToPaddle(); @@ -316,6 +531,32 @@ void Executor::Reset() { Detach(); session_.reset(); executor_resources_.reset(); + if (enable_model_runtime_executor_) { + ResetPopef(); + } +} + +void Executor::ResetPopef() { + VLOG(10) << "Reset popef resources."; + stop_.store(true); + if (queue_manager_) { + queue_manager_->disconnectAll(); + } + if (main_program_.joinable()) { + const auto future = std::async(std::launch::async, + [this]() { this->main_program_.join(); }); + if (future.wait_for(std::chrono::seconds(10)) == + std::future_status::timeout) { + popef_session_->stop(); + VLOG(10) << "popef: failed to wait for main program. Force stop popef " + "session."; + } + } + popef_session_.reset(); + + // reset stop back to false in case executor is reused. + stop_.store(false); + queue_manager_ = nullptr; } void Executor::SetWeightsIO() { diff --git a/paddle/fluid/platform/device/ipu/ipu_executor.h b/paddle/fluid/platform/device/ipu/ipu_executor.h index eab1bd9d36eb6058d44cd549c55c425a4277a32e..ec0587fb0ae0d4a89c5fb40f0cfd9824f3eb479e 100644 --- a/paddle/fluid/platform/device/ipu/ipu_executor.h +++ b/paddle/fluid/platform/device/ipu/ipu_executor.h @@ -14,6 +14,10 @@ limitations under the License. */ #pragma once +#include + +#include +#include #include #include #include @@ -57,6 +61,11 @@ class Executor { const std::vector &outputs, const framework::ExecutionContext &ctx); + // Run popef session + void RunPopef(const std::vector &inputs, + const std::vector &outputs, + const framework::ExecutionContext &ctx); + // Sync weights from popart to paddle void WeightsToHost(); @@ -88,13 +97,15 @@ class Executor { void ConvertWeights(bool); void WeightsFromPaddle(); void WeightsToPaddle(); + void PreparePopefSession(); + void ResetPopef(); private: // Not own const Scope *scope_ = nullptr; const IpuStrategy *ipu_strategy_ = nullptr; CompilerResources *compiler_resources_ = nullptr; - bool compile_only_ = false; + model_runtime::QueueManager *queue_manager_ = nullptr; // Deviceinfo for popart session std::shared_ptr device_; @@ -102,6 +113,20 @@ class Executor { std::unique_ptr session_; // A ExecutorResources corresponds to a graph std::unique_ptr executor_resources_; + // mutex to lock session run. + mutable std::mutex mutex_; + // popef session + std::unique_ptr popef_session_; + // popef execution threads + std::thread main_program_; + // mutex to lock popef queue + std::mutex queue_mutex_; + + bool compile_only_ = false; + // indicate if popart/popef is used. Do not use the ipu_strategy which may + // deconstruct before executor and cause undefined behavior. + bool enable_model_runtime_executor_ = false; + std::atomic_bool stop_ = {false}; }; } // namespace ipu diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.cc b/paddle/fluid/platform/device/ipu/ipu_strategy.cc index 56794d851edefbbaf2f3ae29b9a515f46c367695..2f3c31473028eda483e8ddb2bb0f71307393a244 100644 --- a/paddle/fluid/platform/device/ipu/ipu_strategy.cc +++ b/paddle/fluid/platform/device/ipu/ipu_strategy.cc @@ -87,24 +87,26 @@ IpuStrategy::IpuStrategy() { ADD_BOOL_OPTION(use_no_bias_optimizer); ADD_BOOL_OPTION(enable_distribution); ADD_BOOL_OPTION(scaled_optimizer_state); + ADD_BOOL_OPTION(is_dynamic); + ADD_BOOL_OPTION(enable_model_runtime_executor); ADD_UINT64_OPTION(num_ipus); ADD_UINT64_OPTION(batches_per_step); ADD_UINT64_OPTION(micro_batch_size); ADD_UINT64_OPTION(random_seed); ADD_UINT64_OPTION(tiles_per_ipu); + ADD_UINT64_OPTION(num_buffers); ADD_DOUBLE_OPTION(available_memory_proportion); ADD_DOUBLE_OPTION(loss_scaling); ADD_DOUBLE_OPTION(max_weight_norm); + ADD_DOUBLE_OPTION(timeout_ms); + // dy2static support + ADD_DOUBLE_OPTION(lr); ADD_STRING_OPTION(accl1_type); ADD_STRING_OPTION(accl2_type); ADD_STRING_OPTION(accl3_type); ADD_STRING_OPTION(onnx_dump_path); ADD_STRING_OPTION(weight_decay_mode); - // dy2static support - ADD_DOUBLE_OPTION(lr); - ADD_BOOL_OPTION(is_dynamic); - #undef ADD_STRING_OPTION #undef ADD_DOUBLE_OPTION #undef ADD_UINT64_OPTION diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.h b/paddle/fluid/platform/device/ipu/ipu_strategy.h index 34beb44686b6dc25ef3af9b3bb3e343a3599b001..ea6930cbd7928dfde9575f9483641263728e9ff7 100644 --- a/paddle/fluid/platform/device/ipu/ipu_strategy.h +++ b/paddle/fluid/platform/device/ipu/ipu_strategy.h @@ -118,6 +118,16 @@ class IpuStrategy { // whether in dynamic mode bool is_dynamic = false; + // use popart executor as default. Enable model_runtime executor if set to + // true + bool enable_model_runtime_executor = false; + + // buffer size for model_runtime_executor + int num_buffers = 10; + + // timeout setting for model_runtime + double timeout_ms = 0.0; + public: void AddBoolOption(const std::string &option, bool value); void AddUint64Option(const std::string &option, std::uint64_t value); diff --git a/paddle/fluid/platform/device/ipu/ipu_utils.cc b/paddle/fluid/platform/device/ipu/ipu_utils.cc index 43e4a6820c81398217203dfb64f72cd0004ec1fa..60fc0278d366510ee30c7bbd3819741cb8cbccef 100644 --- a/paddle/fluid/platform/device/ipu/ipu_utils.cc +++ b/paddle/fluid/platform/device/ipu/ipu_utils.cc @@ -84,6 +84,32 @@ const popart::DataType PhiDType2PopartDType(const phi::DataType type) { } } +const phi::DataType PopefDtype2PhiDtype(const popef::DataType type) { + switch (type) { + case popef::DataType::U8: + return phi::DataType::UINT8; + case popef::DataType::S8: + return phi::DataType::INT8; + case popef::DataType::S16: + return phi::DataType::INT16; + case popef::DataType::S32: + return phi::DataType::INT32; + case popef::DataType::S64: + return phi::DataType::INT64; + case popef::DataType::BOOL: + return phi::DataType::BOOL; + case popef::DataType::F64: + return phi::DataType::FLOAT64; + case popef::DataType::F32: + return phi::DataType::FLOAT32; + case popef::DataType::F16: + return phi::DataType::FLOAT16; + default: + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported phi::DataType when converting to popef data type.")); + } +} + const VarType::Type PopartDType2VarType(const popart::DataType type) { switch (type) { case popart::DataType::UINT8: @@ -116,6 +142,32 @@ const VarType::Type PopartDType2VarType(const popart::DataType type) { } } +const VarType::Type PopefDType2VarType(const popef::DataType type) { + switch (type) { + case popef::DataType::U8: + return VarType::UINT8; + case popef::DataType::S8: + return VarType::INT8; + case popef::DataType::S16: + return VarType::INT16; + case popef::DataType::S32: + return VarType::INT32; + case popef::DataType::S64: + return VarType::INT64; + case popef::DataType::BOOL: + return VarType::BOOL; + case popef::DataType::F64: + return VarType::FP64; + case popef::DataType::F32: + return VarType::FP32; + case popef::DataType::F16: + return VarType::FP16; + default: + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported popart::DataType when converting to var type.")); + } +} + const popart::DataType OnnxDType2PopartType(const ONNXDataType type) { switch (type) { case ONNXDataType::BOOL: @@ -223,6 +275,16 @@ const int RequestIpus(const int num_ipus) { return std::pow(2, ceil(log2(num_ipus))); } +std::shared_ptr PopartSessionToPopefModel( + popart::Session* session) { + VLOG(10) << "Converting popart session to popef model"; + auto temp_stream = std::make_shared(); + session->compileAndExport(*temp_stream); + auto reader = std::make_shared(); + reader->parseStream(temp_stream); + return popef::ModelBuilder(reader).createModel(); +} + } // namespace ipu } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/device/ipu/ipu_utils.h b/paddle/fluid/platform/device/ipu/ipu_utils.h index 8a4fa465ca5c22ed0b00e028f62fbdaffdeee32a..5e93ce4bf9385c4e23c0eba0732ef2a268afae78 100644 --- a/paddle/fluid/platform/device/ipu/ipu_utils.h +++ b/paddle/fluid/platform/device/ipu/ipu_utils.h @@ -15,9 +15,14 @@ limitations under the License. */ #pragma once #include +#include #include #include #include +#include +#include +#include +#include #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/lod_tensor.h" @@ -85,6 +90,10 @@ enum ONNXDataType : int { BFLOAT16 = 16 }; +// TODO(czr): remove const qualifier on return value. +const VarType::Type PopefDType2VarType(const popef::DataType type); +const phi::DataType PopefDtype2PhiDtype(const popef::DataType type); + // VarType::Type to popart::DataType const popart::DataType VarType2PopartDType(const VarType::Type type); // phi::DataType to popart::DataType @@ -102,6 +111,10 @@ const bool GetBoolEnv(const std::string& str); // Request number of ipus must be pow(2, n) const int RequestIpus(const int num_ipus); +// Convert popart session to popef +std::shared_ptr PopartSessionToPopefModel( + popart::Session* session); + } // namespace ipu } // namespace platform } // namespace paddle diff --git a/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py new file mode 100644 index 0000000000000000000000000000000000000000..d7283294bb825acd0d707e405ed67c6ea0c4c9cf --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py @@ -0,0 +1,170 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import unittest + +import numpy as np +import paddle +from op_test_ipu import IPUOpTest + + +class SimpleLayer(paddle.nn.Layer): + def __init__(self): + super(SimpleLayer, self).__init__() + self.conv = paddle.nn.Conv2D( + in_channels=3, out_channels=1, kernel_size=2, stride=1 + ) + + def forward(self, x, target=None): + x = self.conv(x) + x = paddle.fluid.layers.flatten(x, axis=1) + if target is not None: + x = paddle.fluid.layers.softmax(x) + loss = paddle.fluid.layers.cross_entropy(x, target) + return x, loss + return x + + +class TestBase(IPUOpTest): + def setUp(self): + self.ipu_model = None + self.set_attrs() + if 'POPLAR_IPUMODEL' in os.environ: + self.ipu_model = os.environ['POPLAR_IPUMODEL'] + del os.environ['POPLAR_IPUMODEL'] + + def set_attrs(self): + self.timeout = 0.0 + self.batch_size = 8 + + def tearDown(self): + if getattr(self, 'ipu_model', None): + os.environ['POPLAR_IPUMODEL'] = self.ipu_model + paddle.framework.core.IpuBackend.get_instance().reset() + + def generate_feed(self): + return { + "X": np.random.rand(8, 3, 10, 10).astype(np.float32), + "Y": np.random.randint(0, 10, [8], dtype="int64"), + } + + @IPUOpTest.static_graph + def build_model(self): + x = paddle.static.data( + name='X', shape=[self.batch_size, 3, 10, 10], dtype='float32' + ) + label = paddle.static.data( + name='Y', shape=[self.batch_size], dtype='int64' + ) + model = SimpleLayer() + pred, loss = model(x, label) + self.feed_list = [x.name, label.name] + self.fetch_list = [pred.name, loss.name] + + def reset_seeds(self): + + np.random.seed(self.SEED) + paddle.seed(self.SEED) + self.main_prog.random_seed = self.SEED + self.startup_prog.random_seed = self.SEED + + def _test(self, use_ipu=False): + + self.reset_seeds() + place = paddle.IPUPlace() if use_ipu else paddle.CPUPlace() + + executor = paddle.static.Executor(place) + executor.run(self.startup_prog) + + if use_ipu: + paddle.set_device('ipu') + ipu_strategy = paddle.static.IpuStrategy() + ipu_strategy.set_graph_config( + num_ipus=1, + is_training=False, + micro_batch_size=self.batch_size, + enable_manual_shard=False, + ) + ipu_strategy.set_options( + { + 'enable_model_runtime_executor': True, + 'timeout_ms': self.timeout, + } + ) + program = paddle.static.IpuCompiledProgram( + self.main_prog, ipu_strategy=ipu_strategy + ).compile(self.feed_list, self.fetch_list) + else: + program = self.main_prog + + epochs = 10 + preds = [] + losses = [] + for epoch in range(epochs): + feed = self.generate_feed() + dy_batch = feed["X"].shape[0] + if not use_ipu: + # padding inputs + pad_batch = self.batch_size - dy_batch + for k, v in feed.items(): + pad_size = tuple( + ( + (0, 0 if i != 0 else pad_batch) + for i in range(len(v.shape)) + ) + ) + feed[k] = np.pad(v, pad_size, 'constant', constant_values=0) + + pred, loss = executor.run( + program, feed=feed, fetch_list=self.fetch_list + ) + if not use_ipu: + pred = pred[0:dy_batch] + loss = loss[0:dy_batch] + + preds.append(pred) + losses.append(loss) + + return np.concatenate(preds, axis=0), np.concatenate(losses, axis=0) + + def test_infer(self): + self.build_model() + ipu_pred, ipu_loss = self._test(True) + cpu_pred, cpu_loss = self._test(False) + np.testing.assert_allclose( + ipu_pred.flatten(), cpu_pred.flatten(), rtol=1e-05, atol=1e-4 + ) + np.testing.assert_allclose( + ipu_loss.flatten(), cpu_loss.flatten(), rtol=1e-05, atol=1e-4 + ) + + +class TestAutoBatch(TestBase): + def set_attrs(self): + self.timeout = 0.01 + # fixed batch + self.batch_size = 8 + + def generate_feed(self): + # generate dynamic batch + batch = np.random.randint(1, self.batch_size) + return { + "X": np.random.rand(batch, 3, 10, 10).astype(np.float32), + "Y": np.random.randint(0, 10, [batch], dtype="int64"), + } + + +if __name__ == "__main__": + unittest.main()