diff --git a/cmake/external/poplar.cmake b/cmake/external/poplar.cmake index 7947a54f8b5f117836ebd9ae8b52590a14b2022e..7a8fa3ef5d710af7eb7e1697dac375449d585a9a 100644 --- a/cmake/external/poplar.cmake +++ b/cmake/external/poplar.cmake @@ -11,31 +11,35 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + if(WITH_IPU) set(POPLAR_DIR CACHE PATH "Path to a Poplar install") set(POPART_DIR CACHE PATH "Path to a Popart install") set(POPLAR_SDK_DIR CACHE PATH "Path to an extracted SDK archive or to a Poplar & Popart install directory (Will populate POPLAR_DIR and POPART_DIR)") + # support setting SDK both from environment variable or command line arguments + if(DEFINED ENV{POPLAR_SDK_DIR}) set(POPLAR_SDK_DIR $ENV{POPLAR_SDK_DIR}) + endif() + if(EXISTS ${POPLAR_SDK_DIR}) execute_process(COMMAND find ${POPLAR_SDK_DIR}/ -maxdepth 1 -type d -name "popart*" OUTPUT_VARIABLE POPART_DIR OUTPUT_STRIP_TRAILING_WHITESPACE) execute_process(COMMAND find ${POPLAR_SDK_DIR}/ -maxdepth 1 -type d -name "poplar-*" -o -name "poplar" OUTPUT_VARIABLE POPLAR_DIR OUTPUT_STRIP_TRAILING_WHITESPACE) - if(NOT IS_DIRECTORY "${POPLAR_DIR}") - message(FATAL_ERROR "Couldn't find a \"poplar\" or \"poplar-*\" folder in '${POPLAR_SDK_DIR}'") - endif() - if(NOT IS_DIRECTORY "${POPART_DIR}") - message(FATAL_ERROR "Couldn't find a \"popart*\" folder in '${POPLAR_SDK_DIR}'") - endif() - else() - message(FATAL_ERROR "You must provide a path to a Poplar install using export POPLAR_SDK_DIR=/path/to/poplar_sdk") endif() - - message("POPLAR_DIR is ${POPLAR_DIR}") - message("POPART_DIR is ${POPART_DIR}") + if(DEFINED ENV{POPLAR_DIR}) + set(POPLAR_DIR $ENV{POPLAR_DIR}) + endif() + if(DEFINED ENV{POPART_DIR}) + set(POPART_DIR $ENV{POPART_DIR}) + endif() if(EXISTS ${POPLAR_DIR}) + message("POPLAR_DIR is ${POPLAR_DIR}") + if(NOT IS_DIRECTORY "${POPLAR_DIR}") + message(FATAL_ERROR "Couldn't find a \"poplar\" or \"poplar-*\" folder in '${POPLAR_SDK_DIR}'") + endif() list(APPEND CMAKE_PREFIX_PATH ${POPLAR_DIR}) set(ENABLE_POPLAR_CMD "source ${POPLAR_DIR}/enable.sh") find_package(poplar REQUIRED) @@ -45,8 +49,11 @@ if(WITH_IPU) if(NOT poplar_FOUND) message(FATAL_ERROR "You must provide a path to a Poplar install using -DPOPLAR_DIR=/path/to/popart/build/install") endif() - if(EXISTS ${POPART_DIR}) + message("POPART_DIR is ${POPART_DIR}") + if(NOT IS_DIRECTORY "${POPART_DIR}") + message(FATAL_ERROR "Couldn't find a \"popart*\" folder in '${POPLAR_SDK_DIR}'") + endif() list(APPEND CMAKE_PREFIX_PATH ${POPART_DIR}) set(ENABLE_POPART_CMD "source ${POPART_DIR}/enable.sh") find_package(popart REQUIRED COMPONENTS popart-only) @@ -56,6 +63,7 @@ if(WITH_IPU) if(NOT popart_FOUND) message(FATAL_ERROR "You must provide a path to a Popart build using -DPOPART_DIR=/path/to/popart/build") endif() + add_definitions(-DONNX_NAMESPACE=onnx) add_custom_target(extern_poplar DEPENDS poplar popart-only) endif() diff --git a/paddle/fluid/framework/ir/ipu/forward_graph_extract_pass.cc b/paddle/fluid/framework/ir/ipu/forward_graph_extract_pass.cc index 5dcfddf6187f2b6e79d9f478489110a484cb0575..0d25fc0525c1de996e207b4543df95207899d227 100644 --- a/paddle/fluid/framework/ir/ipu/forward_graph_extract_pass.cc +++ b/paddle/fluid/framework/ir/ipu/forward_graph_extract_pass.cc @@ -64,6 +64,19 @@ void ForwardGraphExtractPass::ApplyImpl(ir::Graph* graph) const { } } } + // learning_rate var + for (auto* node : all_ops[OpRole::kOptimize]) { + if (node->Op()->Inputs().count("LearningRate") && + !node->Op()->Inputs().at("LearningRate").empty()) { + auto lr_var_name = node->Op()->Inputs().at("LearningRate").front(); + for (auto* in_var : node->inputs) { + if (in_var->Name() == lr_var_name) { + VLOG(10) << "found LearningRate var: " << in_var->Name(); + forward_vars.insert(in_var); + } + } + } + } // control_vars & backward_vars for (auto* node : graph->Nodes()) { if (!node->IsVar()) { diff --git a/paddle/fluid/framework/ir/ipu/inference_process_pass.cc b/paddle/fluid/framework/ir/ipu/inference_process_pass.cc index 05490773301649f1cc172a8e56ca18e4205a7a10..02f000acc2a39f489663f6aed68200427e1f4182 100644 --- a/paddle/fluid/framework/ir/ipu/inference_process_pass.cc +++ b/paddle/fluid/framework/ir/ipu/inference_process_pass.cc @@ -51,11 +51,11 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const { if (num_ipus > 1) { ipu_strategy_instance_->need_avg_shard = true; ipu_strategy_instance_->popart_options.virtualGraphMode = - platform::ipu::VirtualGraphMode::Manual; + popart::VirtualGraphMode::Manual; } else { ipu_strategy_instance_->need_avg_shard = false; ipu_strategy_instance_->popart_options.virtualGraphMode = - platform::ipu::VirtualGraphMode::Off; + popart::VirtualGraphMode::Off; } // total num IPUs = num_ipus * replica_num ipu_strategy_instance_->num_ipus = num_ipus * replica_num; diff --git a/paddle/fluid/operators/ipu/ipu_runtime_op.cc b/paddle/fluid/operators/ipu/ipu_runtime_op.cc index 8cbf1a018a57026ac678f579d6c9c313dec000b4..802cc13ae4e073c5ee66f8482d514a72fa94c52f 100644 --- a/paddle/fluid/operators/ipu/ipu_runtime_op.cc +++ b/paddle/fluid/operators/ipu/ipu_runtime_op.cc @@ -46,8 +46,7 @@ class IpuRuntimeOp : public framework::OperatorBase { for (size_t i = 0; i < outputs.size(); ++i) { auto* out = outputs[i]; if (out->dims().size() == 0) { - auto tensor_dtype = framework::TransToProtoVarType(out->dtype()); - auto sizeof_dtype = framework::SizeOfType(tensor_dtype); + auto sizeof_dtype = framework::DataTypeSize(out->dtype()); int64_t dim = out->memory_size() / sizeof_dtype; out->Resize({dim}); VLOG(10) << "set ipu_runtime_op output: " << output_names[i] diff --git a/paddle/fluid/platform/device/ipu/ipu_backend.cc b/paddle/fluid/platform/device/ipu/ipu_backend.cc index 2471e15e09e0a938c74111899a6979edb2d3c46d..8f2a7ef78c9824d7706be48f117a86b19c334b8a 100644 --- a/paddle/fluid/platform/device/ipu/ipu_backend.cc +++ b/paddle/fluid/platform/device/ipu/ipu_backend.cc @@ -102,7 +102,7 @@ void IpuBackend::SetCustomOps( compiler_->SetCustomOps(custom_ops); } -void IpuBackend::SaveMoldeProto(const std::string& path) { +void IpuBackend::SaveModelProto(const std::string& path) { if (ipu_strategy_->is_training && is_prepared_) { executor_->SaveModelToHost(path); } else if (is_compiled_) { diff --git a/paddle/fluid/platform/device/ipu/ipu_backend.h b/paddle/fluid/platform/device/ipu/ipu_backend.h index 122a3e0837010a68e3de071655a78808b1e0f634..b12e2539258dfefe93e0828fa1a7341e21d62e70 100644 --- a/paddle/fluid/platform/device/ipu/ipu_backend.h +++ b/paddle/fluid/platform/device/ipu/ipu_backend.h @@ -74,7 +74,7 @@ class IpuBackend { void SetCustomOps(const std::vector &custom_ops); // save compiled model to onnx - void SaveMoldeProto(const std::string &path); + void SaveModelProto(const std::string &path); private: void Prepare(); diff --git a/paddle/fluid/platform/device/ipu/ipu_compiler.cc b/paddle/fluid/platform/device/ipu/ipu_compiler.cc index 93c5cc90762ca53e0f67fa38f16a7192be750872..15cba89e9e0be52ecdbd14ed136b9681711d5d52 100644 --- a/paddle/fluid/platform/device/ipu/ipu_compiler.cc +++ b/paddle/fluid/platform/device/ipu/ipu_compiler.cc @@ -297,9 +297,8 @@ void Compiler::LowerConstants(const Graph* graph, const Scope* scope) { tensor->Resize(ddim); auto const_data = std::unique_ptr(); - popart::TensorInfo tensor_info( - VarType2PopartType(framework::TransToProtoVarType(tensor->dtype())), - shape); + popart::TensorInfo tensor_info(PdDataType2PopartType(tensor->dtype()), + shape); const_data.reset(new popart::ConstVoidData(tensor->data(), tensor_info)); popart::TensorId result = builder_->aiOnnxOpset11().constant(*const_data); SetIpuIndexStage(result, op_desc); @@ -327,8 +326,7 @@ void Compiler::LowerWeights(const Graph* graph, const Scope* scope) { auto var = scope->FindVar(var_name); if (var) { auto tensor = var->Get(); - auto dtype = VarType2PopartType( - framework::TransToProtoVarType(tensor.dtype())); + auto dtype = PdDataType2PopartType(tensor.dtype()); auto shape = std::vector(); for (size_t i = 0; i < tensor.dims().size(); ++i) { shape.push_back(tensor.dims().at(i)); diff --git a/paddle/fluid/platform/device/ipu/ipu_executor.cc b/paddle/fluid/platform/device/ipu/ipu_executor.cc index 5da785c1b6f4af255bb948e61c35b9a6b1cb1204..cb8dca50058204176a8ab2b2974cfd2d534e68ea 100644 --- a/paddle/fluid/platform/device/ipu/ipu_executor.cc +++ b/paddle/fluid/platform/device/ipu/ipu_executor.cc @@ -154,7 +154,12 @@ void Executor::AcquireDevice() { bool use_ipu_model = GetBoolEnv("POPLAR_IPUMODEL"); if (use_ipu_model) { - std::map deviceOpts{{"numIPUs", "1 "}}; + std::map deviceOpts{ + { + "numIPUs", std::to_string(ipu_strategy_->num_ipus), + }, + {"ipuVersion", "ipu2"}, + }; device_ = popart::DeviceManager::createDeviceManager().createIpuModelDevice( deviceOpts); } else { @@ -210,8 +215,8 @@ void Executor::SetWeightsIO() { void Executor::ConvertWeights(bool align_to_popart) { for (auto weight_pair : executor_resources_->weights_and_opt_state) { auto paddle_var = scope_->GetVar(weight_pair.second); - auto paddle_var_dtype = VarType2PopartType( - paddle_var->GetMutable()->type()); + auto paddle_var_dtype = PdDataType2PopartType( + paddle_var->GetMutable()->dtype()); PADDLE_ENFORCE_EQ((paddle_var_dtype == popart::DataType::FLOAT || paddle_var_dtype == popart::DataType::FLOAT16), diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.cc b/paddle/fluid/platform/device/ipu/ipu_strategy.cc index 2ddead420d3bf60245f40ba90df54533a60ce802..4a9b9c00cb75cd042bab527532de3314075e6dcd 100644 --- a/paddle/fluid/platform/device/ipu/ipu_strategy.cc +++ b/paddle/fluid/platform/device/ipu/ipu_strategy.cc @@ -13,23 +13,451 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/device/ipu/ipu_strategy.h" -#include + +namespace { + +template +void RegisterSetter( + std::map>& options, // NOLINT + const std::string& name, Lambda setter) { + options[name] = setter; +} + +template +void RegisterGetter( + std::map>& options, // NOLINT + std::map& options_type, // NOLINT + const std::string& name, const std::string& type_str, Lambda getter) { + options[name] = getter; + options_type[name] = type_str; +} + +} // namespace namespace paddle { namespace platform { namespace ipu { -void IpuStrategy::enablePattern(const std::string& t) { +IpuStrategy::IpuStrategy() { +#define ADD_BOOL_OPTION(name) \ + RegisterSetter(bool_options, #name, [&](bool value) { name = value; }); \ + RegisterGetter(options_getter, options_type, #name, "bool", \ + [&]() { return std::to_string(name); }) + +#define ADD_UINT64_OPTION(name) \ + RegisterSetter(uint64_options, #name, \ + [&](std::uint64_t value) { name = value; }); \ + RegisterGetter(options_getter, options_type, #name, "uint64", \ + [&]() { return std::to_string(name); }) + +#define ADD_DOUBLE_OPTION(name) \ + RegisterSetter(double_options, #name, [&](double value) { name = value; }); \ + RegisterGetter(options_getter, options_type, #name, "double", \ + [&]() { return std::to_string(name); }) + +#define ADD_STRING_OPTION(name) \ + RegisterSetter(string_options, #name, \ + [&](const std::string& value) { name = value; }); \ + RegisterGetter(options_getter, options_type, #name, "string", \ + [&]() { return name; }) + + ADD_BOOL_OPTION(is_training); + ADD_BOOL_OPTION(save_init_onnx); + ADD_BOOL_OPTION(save_onnx_checkpoint); + ADD_BOOL_OPTION(need_avg_shard); + ADD_BOOL_OPTION(enable_fp16); + ADD_UINT64_OPTION(num_ipus); + ADD_UINT64_OPTION(batches_per_step); + ADD_UINT64_OPTION(micro_batch_size); + ADD_UINT64_OPTION(save_per_n_step); + ADD_DOUBLE_OPTION(available_memory_proportion); + ADD_DOUBLE_OPTION(loss_scaling); + ADD_DOUBLE_OPTION(max_weight_norm); + +#undef ADD_STRING_OPTION +#undef ADD_DOUBLE_OPTION +#undef ADD_UINT64_OPTION +#undef ADD_BOOL_OPTION + +#define ADD_POPART_ENUM_OPTION_ALIAS(name, aliased_name, EnumType) \ + RegisterSetter(uint64_options, #name, [&](std::uint64_t value) { \ + PADDLE_ENFORCE_LT( \ + value, static_cast(popart::EnumType::N), \ + errors::InvalidArgument("Value for %s out of range", #EnumType)); \ + popart_options.aliased_name = static_cast(value); \ + }); \ + RegisterGetter(options_getter, options_type, #name, "uint64", [&]() { \ + return std::to_string( \ + static_cast(popart_options.aliased_name)); \ + }) + +#define ADD_POPART_BOOL_OPTION_ALIAS(name, aliased_name) \ + RegisterSetter(bool_options, #name, \ + [&](bool value) { popart_options.aliased_name = value; }); \ + RegisterGetter(options_getter, options_type, #name, "bool", [&]() { \ + return std::to_string(popart_options.aliased_name); \ + }) + +#define ADD_POPART_UINT64_OPTION_ALIAS(name, aliased_name) \ + RegisterSetter(uint64_options, #name, [&](std::uint64_t value) { \ + popart_options.aliased_name = value; \ + }); \ + RegisterGetter(options_getter, options_type, #name, "uint64", [&]() { \ + return std::to_string(popart_options.aliased_name); \ + }) + +#define ADD_POPART_DOUBLE_OPTION_ALIAS(name, aliased_name) \ + RegisterSetter(double_options, #name, \ + [&](double value) { popart_options.aliased_name = value; }); \ + RegisterGetter(options_getter, options_type, #name, "double", [&]() { \ + return std::to_string(popart_options.aliased_name); \ + }) + +#define ADD_POPART_STRING_OPTION_ALIAS(name, aliased_name) \ + RegisterSetter(string_options, #name, [&](const std::string& value) { \ + popart_options.aliased_name = value; \ + }); \ + RegisterGetter(options_getter, options_type, #name, "string", \ + [&]() { return popart_options.aliased_name; }) + +#define ADD_POPART_ENUM_OPTION(name, EnumType) \ + ADD_POPART_ENUM_OPTION_ALIAS(name, name, EnumType) + +#define ADD_POPART_BOOL_OPTION(name) ADD_POPART_BOOL_OPTION_ALIAS(name, name) + +#define ADD_POPART_UINT64_OPTION(name) \ + ADD_POPART_UINT64_OPTION_ALIAS(name, name) + +#define ADD_POPART_DOUBLE_OPTION(name) \ + ADD_POPART_DOUBLE_OPTION_ALIAS(name, name) + +#define ADD_POPART_STRING_OPTION(name) \ + ADD_POPART_STRING_OPTION_ALIAS(name, name) + + ADD_POPART_ENUM_OPTION(autodiffSettings.stitchStrategy, + AutodiffStitchStrategy); + ADD_POPART_ENUM_OPTION(batchSerializationSettings.transformContext, + BatchSerializationTransformContext); + ADD_POPART_ENUM_OPTION(batchSerializationSettings.method, + BatchSerializationMethod); + ADD_POPART_ENUM_OPTION(batchSerializationSettings.batchSchedule, + BatchSerializationBatchSchedule); + ADD_POPART_ENUM_OPTION(autoRecomputation, RecomputationType); + ADD_POPART_ENUM_OPTION(mergeVarUpdate, MergeVarUpdateType); + ADD_POPART_ENUM_OPTION(virtualGraphMode, VirtualGraphMode); + ADD_POPART_ENUM_OPTION(syntheticDataMode, SyntheticDataMode); + ADD_POPART_ENUM_OPTION(subgraphCopyingStrategy, SubgraphCopyingStrategy); + ADD_POPART_ENUM_OPTION(accumulationAndReplicationReductionType, + ReductionType); + ADD_POPART_ENUM_OPTION(meanAccumulationAndReplicationReductionStrategy, + MeanReductionStrategy); + + ADD_POPART_STRING_OPTION(logDir); + ADD_POPART_STRING_OPTION(cachePath); + ADD_POPART_STRING_OPTION(partialsTypeMatMuls); + ADD_POPART_STRING_OPTION(customCodeletCompileFlags); + ADD_POPART_STRING_OPTION(serializedPoprithmsShiftGraphsDir); + ADD_POPART_STRING_OPTION(kahnTieBreaker); + + ADD_POPART_UINT64_OPTION(executionPhaseSettings.phases); + ADD_POPART_UINT64_OPTION(executionPhaseSettings.stages); + ADD_POPART_UINT64_OPTION(batchSerializationSettings.factor); + ADD_POPART_UINT64_OPTION(firstDotOp); + ADD_POPART_UINT64_OPTION(finalDotOp); + ADD_POPART_UINT64_OPTION(numIOTiles); + ADD_POPART_UINT64_OPTION(mergeVarUpdateMemThreshold); + ADD_POPART_UINT64_OPTION(looseThresholdAtPeak); + ADD_POPART_UINT64_OPTION(accumulationFactor); + ADD_POPART_UINT64_OPTION(swapLimitScheduler); + ADD_POPART_UINT64_OPTION(globalReplicationFactor); + ADD_POPART_UINT64_OPTION(globalReplicaOffset); + ADD_POPART_UINT64_OPTION(defaultPrefetchBufferingDepth); + ADD_POPART_UINT64_OPTION(compilationProgressTotal); + ADD_POPART_UINT64_OPTION(transitiveClosureOptimizationThreshold); + + ADD_POPART_BOOL_OPTION(batchSerializationSettings.concatOnVirtualGraphChange); + ADD_POPART_BOOL_OPTION( + batchSerializationSettings.concatOnExecutionPhaseChange); + ADD_POPART_BOOL_OPTION( + batchSerializationSettings.concatOnPipelineStageChange); + ADD_POPART_BOOL_OPTION(strictOpVersions); + ADD_POPART_BOOL_OPTION(opxAliasChecking); + ADD_POPART_BOOL_OPTION(opxModifyChecking); + ADD_POPART_BOOL_OPTION(dotOpNames); + ADD_POPART_BOOL_OPTION(exportPoplarComputationGraph); + ADD_POPART_BOOL_OPTION(exportPoplarVertexGraph); + ADD_POPART_BOOL_OPTION(separateCallOpPdfs); + ADD_POPART_BOOL_OPTION(enableOutlining); + ADD_POPART_BOOL_OPTION(enableOutliningCopyCostPruning); + ADD_POPART_BOOL_OPTION(rearrangeAnchorsOnHost); + ADD_POPART_BOOL_OPTION(enablePrefetchDatastreams); + ADD_POPART_BOOL_OPTION(enableNonStableSoftmax); + ADD_POPART_BOOL_OPTION(enableReplicatedGraphs); + ADD_POPART_BOOL_OPTION(enableGradientAccumulation); + ADD_POPART_BOOL_OPTION(instrumentWithHardwareCycleCounter); + ADD_POPART_BOOL_OPTION(enablePipelining); + ADD_POPART_BOOL_OPTION_ALIAS(enable_pipelining, enablePipelining); + ADD_POPART_BOOL_OPTION(disableGradAccumulationTensorStreams); + ADD_POPART_BOOL_OPTION(compileEngine); + ADD_POPART_BOOL_OPTION(constantWeights); + ADD_POPART_BOOL_OPTION(enableEngineCaching); + ADD_POPART_BOOL_OPTION(enableMergeExchange); + ADD_POPART_BOOL_OPTION(enableFloatingPointChecks); + ADD_POPART_BOOL_OPTION(enableStochasticRounding); + ADD_POPART_BOOL_OPTION_ALIAS(enable_stochastic_rounding, + enableStochasticRounding); + ADD_POPART_BOOL_OPTION(explicitRecomputation); + ADD_POPART_BOOL_OPTION(enableExplicitMainLoops); + ADD_POPART_BOOL_OPTION(useHostCopyOps); + ADD_POPART_BOOL_OPTION(aliasZeroCopy); + ADD_POPART_BOOL_OPTION(delayVarUpdates); + ADD_POPART_BOOL_OPTION(enableFullyConnectedPass); + ADD_POPART_BOOL_OPTION(enableSerializedMatmuls); + ADD_POPART_BOOL_OPTION(enableStableNorm); + ADD_POPART_BOOL_OPTION(decomposeGradSum); + ADD_POPART_BOOL_OPTION(enableDistributedReplicatedGraphs); + ADD_POPART_BOOL_OPTION(groupHostSync); + ADD_POPART_BOOL_OPTION(automaticLossScalingSettings.enabled); + ADD_POPART_BOOL_OPTION(instrumentWithHardwareCycleCounter); + ADD_POPART_BOOL_OPTION(enableSupportedDataTypeCasting); + ADD_POPART_BOOL_OPTION(groupNormStridedChannelGrouping); + ADD_POPART_BOOL_OPTION(scheduleNonWeightUpdateGradientConsumersEarly); + + ADD_POPART_DOUBLE_OPTION(outlineSequenceBreakCost); + ADD_POPART_DOUBLE_OPTION(outlineThreshold); + ADD_POPART_DOUBLE_OPTION(timeLimitScheduler); + ADD_POPART_DOUBLE_OPTION(automaticLossScalingSettings.binEdgeLocation); + ADD_POPART_DOUBLE_OPTION( + automaticLossScalingSettings.thresholdUpperCountProportion); + +#undef ADD_POPART_STRING_OPTION +#undef ADD_POPART_DOUBLE_OPTION +#undef ADD_POPART_UINT64_OPTION +#undef ADD_POPART_BOOL_OPTION +#undef ADD_POPART_ENUM_OPTION +#undef ADD_POPART_STRING_OPTION_ALIAS +#undef ADD_POPART_DOUBLE_OPTION_ALIAS +#undef ADD_POPART_UINT64_OPTION_ALIAS +#undef ADD_POPART_BOOL_OPTION_ALIAS +#undef ADD_POPART_ENUM_OPTION_ALIAS + + RegisterSetter(bool_options, "enable_manual_shard", [&](bool value) { + if (value) { + popart_options.virtualGraphMode = popart::VirtualGraphMode::Manual; + } else { + popart_options.virtualGraphMode = popart::VirtualGraphMode::Off; + } + }); + + RegisterGetter(options_getter, options_type, "enable_manual_shard", "bool", + [&]() { + return std::to_string(popart_options.virtualGraphMode == + popart::VirtualGraphMode::Manual); + }); + + RegisterSetter(bool_options, "enable_half_partial", [&](bool value) { + if (value) { + popart_options.partialsTypeMatMuls = "half"; + } else { + popart_options.partialsTypeMatMuls = "float"; + } + }); + + RegisterGetter( + options_getter, options_type, "enable_half_partial", "bool", [&]() { + return std::to_string(popart_options.partialsTypeMatMuls == "half"); + }); + + RegisterSetter( + container_options, "dotChecks", + [&](const std::pair& p) { + std::uint64_t value = std::stoul(p.first); + popart_options.dotChecks.insert(static_cast(value)); + }); + + RegisterGetter( + vector_options_getter, options_type, "dotChecks", "vector", [&]() { + std::vector res; + for (auto x : popart_options.dotChecks) { + res.push_back(std::to_string(static_cast(x))); + } + return res; + }); + + RegisterSetter(container_options, "hardwareInstrumentations", + [&](const std::pair& p) { + std::uint64_t value = std::stoul(p.first); + popart_options.hardwareInstrumentations.insert( + static_cast(value)); + }); + + RegisterGetter( + vector_options_getter, options_type, "hardwareInstrumentations", "vector", + [&]() { + std::vector res; + for (auto x : popart_options.hardwareInstrumentations) { + res.push_back(std::to_string(static_cast(x))); + } + return res; + }); + + RegisterSetter(container_options, "customCodelets", + [&](const std::pair& p) { + popart_options.customCodelets.push_back(p.first); + }); + + RegisterGetter(vector_options_getter, options_type, "customCodelets", + "vector", [&]() { + std::vector res; + for (auto x : popart_options.customCodelets) { + res.push_back(x); + } + return res; + }); + + RegisterSetter(container_options, "engineOptions", + [&](const std::pair& p) { + popart_options.engineOptions.emplace(p); + }); + + RegisterGetter(map_options_getter, options_type, "engineOptions", "map", + [&]() { return popart_options.engineOptions; }); + + RegisterSetter(container_options, "reportOptions", + [&](const std::pair& p) { + popart_options.reportOptions.emplace(p); + }); + + RegisterGetter(map_options_getter, options_type, "reportOptions", "map", + [&]() { return popart_options.reportOptions; }); + + RegisterSetter(container_options, "convolutionOptions", + [&](const std::pair& p) { + popart_options.convolutionOptions.emplace(p); + }); + + RegisterGetter(map_options_getter, options_type, "convolutionOptions", "map", + [&]() { return popart_options.convolutionOptions; }); + + RegisterSetter(container_options, "lstmOptions", + [&](const std::pair& p) { + popart_options.lstmOptions.emplace(p); + }); + + RegisterGetter(map_options_getter, options_type, "lstmOptions", "map", + [&]() { return popart_options.lstmOptions; }); + + RegisterSetter(container_options, "gclOptions", + [&](const std::pair& p) { + popart_options.gclOptions.emplace(p); + }); + + RegisterGetter(map_options_getter, options_type, "gclOptions", "map", + [&]() { return popart_options.gclOptions; }); +} + +void IpuStrategy::AddBoolOption(const std::string& option, bool value) { + set(option, value, bool_options, "bool"); +} + +void IpuStrategy::AddUint64Option(const std::string& option, + std::uint64_t value) { + set(option, value, uint64_options, "uint64"); +} + +void IpuStrategy::AddDoubleOption(const std::string& option, double value) { + set(option, value, double_options, "double"); +} + +void IpuStrategy::AddStringOption(const std::string& option, + const std::string& value) { + set(option, value, string_options, "string"); +} + +void IpuStrategy::InsertStringOption(const std::string& option, + const std::string& value) { + set(option, std::pair(value, ""), container_options, + "vector"); +} + +void IpuStrategy::InsertStringPairOption(const std::string& option, + const std::string& key, + const std::string& value) { + set(option, std::pair(key, value), + container_options, "map"); +} + +void IpuStrategy::SetTensorLocation(const std::string& tensor, + const std::string& opt, + std::uint64_t value) { + VLOG(10) << "Setting " << opt << " to " << value << " for location " + << tensor; + popart::TensorLocationSettings* settings; + if (tensor == "location_activation") { + settings = &popart_options.activationTensorLocationSettings; + } else if (tensor == "location_weight") { + settings = &popart_options.weightTensorLocationSettings; + } else if (tensor == "location_optimizer") { + settings = &popart_options.optimizerStateTensorLocationSettings; + } else if (tensor == "location_accumulator") { + settings = &popart_options.accumulatorTensorLocationSettings; + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Unknown tensor location: %s", tensor)); + } + + if (opt == "minElementsForOffChip") { + settings->minElementsForOffChip = value; + } else if (opt == "minElementsForReplicatedTensorSharding") { + settings->minElementsForReplicatedTensorSharding = value; + } else if (opt == "onChip") { + settings->location.storage = value > 0 ? popart::TensorStorage::OnChip + : popart::TensorStorage::OffChip; + } else if (opt == "useReplicatedTensorSharding") { + settings->location.replicatedTensorSharding = + value > 0 ? popart::ReplicatedTensorSharding::On + : popart::ReplicatedTensorSharding::Off; + } else if (opt == "useIOTilesToLoad") { + settings->location.loadTileSet = + value > 0 ? popart::TileSet::IO : popart::TileSet::Compute; + } else if (opt == "useIOTilesToStore") { + settings->location.storageTileSet = + value > 0 ? popart::TileSet::IO : popart::TileSet::Compute; + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Unknown option ' %s' for tensor location: %s", opt, tensor)); + } +} + +std::string IpuStrategy::GetOption(const std::string& option) { + return get(option, options_getter); +} + +std::vector IpuStrategy::GetVectorOption( + const std::string& option) { + return get(option, vector_options_getter); +} + +std::map IpuStrategy::GetMapOption( + const std::string& option) { + return get(option, map_options_getter); +} + +std::string IpuStrategy::GetOptionType(const std::string& option) { + return options_type[option]; +} + +void IpuStrategy::EnablePattern(const std::string& t) { VLOG(10) << "enable popart pattern: " << t; popart_patterns.enablePattern(t, true); } -void IpuStrategy::disablePattern(const std::string& t) { +void IpuStrategy::DisablePattern(const std::string& t) { VLOG(10) << "disable popart pattern: " << t; popart_patterns.enablePattern(t, false); } -const bool IpuStrategy::isPatternEnabled(const std::string& t) { +const bool IpuStrategy::IsPatternEnabled(const std::string& t) { return popart_patterns.isPatternEnabled(t); } diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.h b/paddle/fluid/platform/device/ipu/ipu_strategy.h index 08f09b96cc0cdb60bff61c932f866fa6a7d87b9d..0e2af26454c401960773de20744f285aecec6bed 100644 --- a/paddle/fluid/platform/device/ipu/ipu_strategy.h +++ b/paddle/fluid/platform/device/ipu/ipu_strategy.h @@ -14,50 +14,19 @@ limitations under the License. */ #pragma once -#include +#include #include #include -#include "popart/patterns/patterns.hpp" +#include "paddle/fluid/platform/enforce.h" namespace paddle { namespace platform { namespace ipu { -using VirtualGraphMode = popart::VirtualGraphMode; -using RecomputationType = popart::RecomputationType; - struct IpuStrategy { - IpuStrategy() { - // we always save optimizer state to OffChip and enable rts for saving - // memory - auto storage = popart::TensorLocation(popart::TensorStorage::OffChip, - popart::ReplicatedTensorSharding::On); - popart_options.optimizerStateTensorLocationSettings = - popart::TensorLocationSettings(storage); - - // We divide the accumulationFactor and replicatedGraphCount after all - // reduce - popart_options.accumulationAndReplicationReductionType = - popart::ReductionType::Mean; - popart_options.meanAccumulationAndReplicationReductionStrategy = - popart::MeanReductionStrategy::Post; - - popart_options.enableFloatingPointChecks = false; - - // A directory for log traces to be written into. - popart_options.logDir = "popart_log"; - } - ~IpuStrategy() {} - - // Number ipus total needed, replica * ipu_per_replica - int num_ipus = 1; - - // batches per step - int batches_per_step = 1; - - // micro batch-size - int micro_batch_size = 1; + IpuStrategy(); + // TODO(alleng) create PaddleOptions // training flag, true for training bool is_training = true; @@ -67,15 +36,25 @@ struct IpuStrategy { // save the trained model bool save_onnx_checkpoint = false; - // save paddle model per n steps - int save_per_n_step = 1; - // average sharding, debugging used bool need_avg_shard = false; // flag for fp16, true for pure fp16 bool enable_fp16 = false; + // Number ipus total needed, replica * ipu_per_replica + int num_ipus = 1; + + // batches per step + int batches_per_step = 1; + + // micro batch-size + int micro_batch_size = 1; + + // save paddle model per n steps + int save_per_n_step = 1; + + // TODO(alleng) remove this param // available memory proportion, 0.0f for disable float available_memory_proportion = 0.0f; @@ -88,12 +67,71 @@ struct IpuStrategy { // popart session option popart::SessionOptions popart_options; + + // popart pattern manager popart::Patterns popart_patterns; + private: + std::map> bool_options; + std::map> uint64_options; + std::map> double_options; + std::map> string_options; + std::map)>> + container_options; + + std::map> options_getter; + std::map()>> + vector_options_getter; + std::map()>> + map_options_getter; + std::map options_type; + + template + void set( + const std::string &key, ValueType value, + std::map> &options, // NOLINT + const std::string &type_str) { + auto it = options.find(key); + PADDLE_ENFORCE_NE(it, options.end(), platform::errors::InvalidArgument( + "Cannot find option: %s, type: %s " + "when setting IpuStrategy options", + key, type_str)); + it->second(value); + } + + template + ValueType get( + const std::string &key, + std::map> &options) { // NOLINT + auto it = options.find(key); + PADDLE_ENFORCE_NE( + it, options.end(), + platform::errors::InvalidArgument( + "Cannot find option name: %s when trying to get IpuStrategy option", + key)); + return it->second(); + } + public: - void enablePattern(const std::string& t); - void disablePattern(const std::string& t); - const bool isPatternEnabled(const std::string& t); + void AddBoolOption(const std::string &option, bool value); + void AddUint64Option(const std::string &option, std::uint64_t value); + void AddDoubleOption(const std::string &option, double value); + void AddStringOption(const std::string &option, const std::string &value); + void InsertStringOption(const std::string &option, const std::string &value); + void InsertStringPairOption(const std::string &option, const std::string &key, + const std::string &value); + void SetTensorLocation(const std::string &tensor, const std::string &option, + std::uint64_t value); + + std::string GetOption(const std::string &); + std::vector GetVectorOption(const std::string &); + std::map GetMapOption(const std::string &); + std::string GetOptionType(const std::string &); + + void EnablePattern(const std::string &t); + void DisablePattern(const std::string &t); + const bool IsPatternEnabled(const std::string &t); }; } // namespace ipu diff --git a/paddle/fluid/platform/device/ipu/ipu_utils.cc b/paddle/fluid/platform/device/ipu/ipu_utils.cc index 6e221fae84ef53e1d139d4a71ebba0f5360d90d8..720de822608b6a78a3518f4717faeae87e9b2865 100644 --- a/paddle/fluid/platform/device/ipu/ipu_utils.cc +++ b/paddle/fluid/platform/device/ipu/ipu_utils.cc @@ -22,7 +22,7 @@ namespace ipu { void* PaddleIArray::data() { return tensor_.data(); } popart::DataType PaddleIArray::dataType() const { - return VarType2PopartType(tensor_.type()); + return PdDataType2PopartType(tensor_.dtype()); } std::size_t PaddleIArray::rank() const { return tensor_.dims().size(); } @@ -66,11 +66,44 @@ popart::DataType VarType2PopartType( case framework::proto::VarType::COMPLEX128: return popart::DataType::COMPLEX128; default: - PADDLE_THROW(paddle::platform::errors::Unavailable( + PADDLE_THROW(paddle::platform::errors::Unimplemented( "Unsupported Paddle var type.")); } } +popart::DataType PdDataType2PopartType( + const paddle::experimental::DataType type) { + switch (type) { + case paddle::experimental::DataType::UINT8: + return popart::DataType::UINT8; + case paddle::experimental::DataType::INT8: + return popart::DataType::INT8; + case paddle::experimental::DataType::INT16: + return popart::DataType::INT16; + case paddle::experimental::DataType::INT32: + return popart::DataType::INT32; + case paddle::experimental::DataType::INT64: + return popart::DataType::INT64; + case paddle::experimental::DataType::BOOL: + return popart::DataType::BOOL; + case paddle::experimental::DataType::FLOAT64: + return popart::DataType::DOUBLE; + case paddle::experimental::DataType::FLOAT32: + return popart::DataType::FLOAT; + case paddle::experimental::DataType::FLOAT16: + return popart::DataType::FLOAT16; + case paddle::experimental::DataType::BFLOAT16: + return popart::DataType::BFLOAT16; + case paddle::experimental::DataType::COMPLEX64: + return popart::DataType::COMPLEX64; + case paddle::experimental::DataType::COMPLEX128: + return popart::DataType::COMPLEX128; + default: + PADDLE_THROW(paddle::platform::errors::Unimplemented( + "Unsupported Paddle data type.")); + } +} + framework::proto::VarType::Type PopartType2VarType( const popart::DataType type) { switch (type) { diff --git a/paddle/fluid/platform/device/ipu/ipu_utils.h b/paddle/fluid/platform/device/ipu/ipu_utils.h index 1b8d25acff47386c67e634f870ba9898b0b09b7a..7644513cc0207885c3b01709be5b894c532f4647 100644 --- a/paddle/fluid/platform/device/ipu/ipu_utils.h +++ b/paddle/fluid/platform/device/ipu/ipu_utils.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" @@ -83,14 +84,15 @@ class PaddleIArray final : public popart::IArray { }; popart::DataType VarType2PopartType(const framework::proto::VarType::Type type); +popart::DataType PdDataType2PopartType( + const paddle::experimental::DataType type); framework::proto::VarType::Type PopartType2VarType(const popart::DataType type); popart::DataType OnnxDtype2PopartType(const int type); bool GetBoolEnv(std::string str); template std::unique_ptr> Tensor2IArray(const Tensor& tensor) { - auto dtype = - VarType2PopartType(framework::TransToProtoVarType(tensor.dtype())); + auto dtype = PdDataType2PopartType(tensor.dtype()); auto shape = std::vector(); for (size_t i = 0; i < tensor.dims().size(); ++i) { shape.push_back(tensor.dims().at(i));