未验证 提交 46161679 编写于 作者: A Allen Guo 提交者: GitHub

[IPU] Update IpuStrategy (#39644)

* Update IpuStrategy

* fix ci

* rerun ci
上级 bc3ca678
...@@ -11,31 +11,35 @@ ...@@ -11,31 +11,35 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
if(WITH_IPU) if(WITH_IPU)
set(POPLAR_DIR CACHE PATH "Path to a Poplar install") set(POPLAR_DIR CACHE PATH "Path to a Poplar install")
set(POPART_DIR CACHE PATH "Path to a Popart install") set(POPART_DIR CACHE PATH "Path to a Popart install")
set(POPLAR_SDK_DIR CACHE PATH "Path to an extracted SDK archive or to a Poplar & Popart install directory (Will populate POPLAR_DIR and POPART_DIR)") set(POPLAR_SDK_DIR CACHE PATH "Path to an extracted SDK archive or to a Poplar & Popart install directory (Will populate POPLAR_DIR and POPART_DIR)")
# support setting SDK both from environment variable or command line arguments
if(DEFINED ENV{POPLAR_SDK_DIR}) if(DEFINED ENV{POPLAR_SDK_DIR})
set(POPLAR_SDK_DIR $ENV{POPLAR_SDK_DIR}) set(POPLAR_SDK_DIR $ENV{POPLAR_SDK_DIR})
endif()
if(EXISTS ${POPLAR_SDK_DIR})
execute_process(COMMAND find ${POPLAR_SDK_DIR}/ -maxdepth 1 -type d -name "popart*" execute_process(COMMAND find ${POPLAR_SDK_DIR}/ -maxdepth 1 -type d -name "popart*"
OUTPUT_VARIABLE POPART_DIR OUTPUT_STRIP_TRAILING_WHITESPACE) OUTPUT_VARIABLE POPART_DIR OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND find ${POPLAR_SDK_DIR}/ -maxdepth 1 -type d -name "poplar-*" -o -name "poplar" execute_process(COMMAND find ${POPLAR_SDK_DIR}/ -maxdepth 1 -type d -name "poplar-*" -o -name "poplar"
OUTPUT_VARIABLE POPLAR_DIR OUTPUT_STRIP_TRAILING_WHITESPACE) OUTPUT_VARIABLE POPLAR_DIR OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT IS_DIRECTORY "${POPLAR_DIR}")
message(FATAL_ERROR "Couldn't find a \"poplar\" or \"poplar-*\" folder in '${POPLAR_SDK_DIR}'")
endif()
if(NOT IS_DIRECTORY "${POPART_DIR}")
message(FATAL_ERROR "Couldn't find a \"popart*\" folder in '${POPLAR_SDK_DIR}'")
endif()
else()
message(FATAL_ERROR "You must provide a path to a Poplar install using export POPLAR_SDK_DIR=/path/to/poplar_sdk")
endif() endif()
if(DEFINED ENV{POPLAR_DIR})
message("POPLAR_DIR is ${POPLAR_DIR}") set(POPLAR_DIR $ENV{POPLAR_DIR})
message("POPART_DIR is ${POPART_DIR}") endif()
if(DEFINED ENV{POPART_DIR})
set(POPART_DIR $ENV{POPART_DIR})
endif()
if(EXISTS ${POPLAR_DIR}) if(EXISTS ${POPLAR_DIR})
message("POPLAR_DIR is ${POPLAR_DIR}")
if(NOT IS_DIRECTORY "${POPLAR_DIR}")
message(FATAL_ERROR "Couldn't find a \"poplar\" or \"poplar-*\" folder in '${POPLAR_SDK_DIR}'")
endif()
list(APPEND CMAKE_PREFIX_PATH ${POPLAR_DIR}) list(APPEND CMAKE_PREFIX_PATH ${POPLAR_DIR})
set(ENABLE_POPLAR_CMD "source ${POPLAR_DIR}/enable.sh") set(ENABLE_POPLAR_CMD "source ${POPLAR_DIR}/enable.sh")
find_package(poplar REQUIRED) find_package(poplar REQUIRED)
...@@ -45,8 +49,11 @@ if(WITH_IPU) ...@@ -45,8 +49,11 @@ if(WITH_IPU)
if(NOT poplar_FOUND) if(NOT poplar_FOUND)
message(FATAL_ERROR "You must provide a path to a Poplar install using -DPOPLAR_DIR=/path/to/popart/build/install") message(FATAL_ERROR "You must provide a path to a Poplar install using -DPOPLAR_DIR=/path/to/popart/build/install")
endif() endif()
if(EXISTS ${POPART_DIR}) if(EXISTS ${POPART_DIR})
message("POPART_DIR is ${POPART_DIR}")
if(NOT IS_DIRECTORY "${POPART_DIR}")
message(FATAL_ERROR "Couldn't find a \"popart*\" folder in '${POPLAR_SDK_DIR}'")
endif()
list(APPEND CMAKE_PREFIX_PATH ${POPART_DIR}) list(APPEND CMAKE_PREFIX_PATH ${POPART_DIR})
set(ENABLE_POPART_CMD "source ${POPART_DIR}/enable.sh") set(ENABLE_POPART_CMD "source ${POPART_DIR}/enable.sh")
find_package(popart REQUIRED COMPONENTS popart-only) find_package(popart REQUIRED COMPONENTS popart-only)
...@@ -56,6 +63,7 @@ if(WITH_IPU) ...@@ -56,6 +63,7 @@ if(WITH_IPU)
if(NOT popart_FOUND) if(NOT popart_FOUND)
message(FATAL_ERROR "You must provide a path to a Popart build using -DPOPART_DIR=/path/to/popart/build") message(FATAL_ERROR "You must provide a path to a Popart build using -DPOPART_DIR=/path/to/popart/build")
endif() endif()
add_definitions(-DONNX_NAMESPACE=onnx) add_definitions(-DONNX_NAMESPACE=onnx)
add_custom_target(extern_poplar DEPENDS poplar popart-only) add_custom_target(extern_poplar DEPENDS poplar popart-only)
endif() endif()
...@@ -64,6 +64,19 @@ void ForwardGraphExtractPass::ApplyImpl(ir::Graph* graph) const { ...@@ -64,6 +64,19 @@ void ForwardGraphExtractPass::ApplyImpl(ir::Graph* graph) const {
} }
} }
} }
// learning_rate var
for (auto* node : all_ops[OpRole::kOptimize]) {
if (node->Op()->Inputs().count("LearningRate") &&
!node->Op()->Inputs().at("LearningRate").empty()) {
auto lr_var_name = node->Op()->Inputs().at("LearningRate").front();
for (auto* in_var : node->inputs) {
if (in_var->Name() == lr_var_name) {
VLOG(10) << "found LearningRate var: " << in_var->Name();
forward_vars.insert(in_var);
}
}
}
}
// control_vars & backward_vars // control_vars & backward_vars
for (auto* node : graph->Nodes()) { for (auto* node : graph->Nodes()) {
if (!node->IsVar()) { if (!node->IsVar()) {
......
...@@ -51,11 +51,11 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const { ...@@ -51,11 +51,11 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const {
if (num_ipus > 1) { if (num_ipus > 1) {
ipu_strategy_instance_->need_avg_shard = true; ipu_strategy_instance_->need_avg_shard = true;
ipu_strategy_instance_->popart_options.virtualGraphMode = ipu_strategy_instance_->popart_options.virtualGraphMode =
platform::ipu::VirtualGraphMode::Manual; popart::VirtualGraphMode::Manual;
} else { } else {
ipu_strategy_instance_->need_avg_shard = false; ipu_strategy_instance_->need_avg_shard = false;
ipu_strategy_instance_->popart_options.virtualGraphMode = ipu_strategy_instance_->popart_options.virtualGraphMode =
platform::ipu::VirtualGraphMode::Off; popart::VirtualGraphMode::Off;
} }
// total num IPUs = num_ipus * replica_num // total num IPUs = num_ipus * replica_num
ipu_strategy_instance_->num_ipus = num_ipus * replica_num; ipu_strategy_instance_->num_ipus = num_ipus * replica_num;
......
...@@ -46,8 +46,7 @@ class IpuRuntimeOp : public framework::OperatorBase { ...@@ -46,8 +46,7 @@ class IpuRuntimeOp : public framework::OperatorBase {
for (size_t i = 0; i < outputs.size(); ++i) { for (size_t i = 0; i < outputs.size(); ++i) {
auto* out = outputs[i]; auto* out = outputs[i];
if (out->dims().size() == 0) { if (out->dims().size() == 0) {
auto tensor_dtype = framework::TransToProtoVarType(out->dtype()); auto sizeof_dtype = framework::DataTypeSize(out->dtype());
auto sizeof_dtype = framework::SizeOfType(tensor_dtype);
int64_t dim = out->memory_size() / sizeof_dtype; int64_t dim = out->memory_size() / sizeof_dtype;
out->Resize({dim}); out->Resize({dim});
VLOG(10) << "set ipu_runtime_op output: " << output_names[i] VLOG(10) << "set ipu_runtime_op output: " << output_names[i]
......
...@@ -102,7 +102,7 @@ void IpuBackend::SetCustomOps( ...@@ -102,7 +102,7 @@ void IpuBackend::SetCustomOps(
compiler_->SetCustomOps(custom_ops); compiler_->SetCustomOps(custom_ops);
} }
void IpuBackend::SaveMoldeProto(const std::string& path) { void IpuBackend::SaveModelProto(const std::string& path) {
if (ipu_strategy_->is_training && is_prepared_) { if (ipu_strategy_->is_training && is_prepared_) {
executor_->SaveModelToHost(path); executor_->SaveModelToHost(path);
} else if (is_compiled_) { } else if (is_compiled_) {
......
...@@ -74,7 +74,7 @@ class IpuBackend { ...@@ -74,7 +74,7 @@ class IpuBackend {
void SetCustomOps(const std::vector<IpuCustomOpIdentifier> &custom_ops); void SetCustomOps(const std::vector<IpuCustomOpIdentifier> &custom_ops);
// save compiled model to onnx // save compiled model to onnx
void SaveMoldeProto(const std::string &path); void SaveModelProto(const std::string &path);
private: private:
void Prepare(); void Prepare();
......
...@@ -297,9 +297,8 @@ void Compiler::LowerConstants(const Graph* graph, const Scope* scope) { ...@@ -297,9 +297,8 @@ void Compiler::LowerConstants(const Graph* graph, const Scope* scope) {
tensor->Resize(ddim); tensor->Resize(ddim);
auto const_data = std::unique_ptr<popart::ConstVoidData>(); auto const_data = std::unique_ptr<popart::ConstVoidData>();
popart::TensorInfo tensor_info( popart::TensorInfo tensor_info(PdDataType2PopartType(tensor->dtype()),
VarType2PopartType(framework::TransToProtoVarType(tensor->dtype())), shape);
shape);
const_data.reset(new popart::ConstVoidData(tensor->data(), tensor_info)); const_data.reset(new popart::ConstVoidData(tensor->data(), tensor_info));
popart::TensorId result = builder_->aiOnnxOpset11().constant(*const_data); popart::TensorId result = builder_->aiOnnxOpset11().constant(*const_data);
SetIpuIndexStage(result, op_desc); SetIpuIndexStage(result, op_desc);
...@@ -327,8 +326,7 @@ void Compiler::LowerWeights(const Graph* graph, const Scope* scope) { ...@@ -327,8 +326,7 @@ void Compiler::LowerWeights(const Graph* graph, const Scope* scope) {
auto var = scope->FindVar(var_name); auto var = scope->FindVar(var_name);
if (var) { if (var) {
auto tensor = var->Get<framework::LoDTensor>(); auto tensor = var->Get<framework::LoDTensor>();
auto dtype = VarType2PopartType( auto dtype = PdDataType2PopartType(tensor.dtype());
framework::TransToProtoVarType(tensor.dtype()));
auto shape = std::vector<int64_t>(); auto shape = std::vector<int64_t>();
for (size_t i = 0; i < tensor.dims().size(); ++i) { for (size_t i = 0; i < tensor.dims().size(); ++i) {
shape.push_back(tensor.dims().at(i)); shape.push_back(tensor.dims().at(i));
......
...@@ -154,7 +154,12 @@ void Executor::AcquireDevice() { ...@@ -154,7 +154,12 @@ void Executor::AcquireDevice() {
bool use_ipu_model = GetBoolEnv("POPLAR_IPUMODEL"); bool use_ipu_model = GetBoolEnv("POPLAR_IPUMODEL");
if (use_ipu_model) { if (use_ipu_model) {
std::map<std::string, std::string> deviceOpts{{"numIPUs", "1 "}}; std::map<std::string, std::string> deviceOpts{
{
"numIPUs", std::to_string(ipu_strategy_->num_ipus),
},
{"ipuVersion", "ipu2"},
};
device_ = popart::DeviceManager::createDeviceManager().createIpuModelDevice( device_ = popart::DeviceManager::createDeviceManager().createIpuModelDevice(
deviceOpts); deviceOpts);
} else { } else {
...@@ -210,8 +215,8 @@ void Executor::SetWeightsIO() { ...@@ -210,8 +215,8 @@ void Executor::SetWeightsIO() {
void Executor::ConvertWeights(bool align_to_popart) { void Executor::ConvertWeights(bool align_to_popart) {
for (auto weight_pair : executor_resources_->weights_and_opt_state) { for (auto weight_pair : executor_resources_->weights_and_opt_state) {
auto paddle_var = scope_->GetVar(weight_pair.second); auto paddle_var = scope_->GetVar(weight_pair.second);
auto paddle_var_dtype = VarType2PopartType( auto paddle_var_dtype = PdDataType2PopartType(
paddle_var->GetMutable<framework::LoDTensor>()->type()); paddle_var->GetMutable<framework::LoDTensor>()->dtype());
PADDLE_ENFORCE_EQ((paddle_var_dtype == popart::DataType::FLOAT || PADDLE_ENFORCE_EQ((paddle_var_dtype == popart::DataType::FLOAT ||
paddle_var_dtype == popart::DataType::FLOAT16), paddle_var_dtype == popart::DataType::FLOAT16),
......
...@@ -13,23 +13,451 @@ See the License for the specific language governing permissions and ...@@ -13,23 +13,451 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/device/ipu/ipu_strategy.h" #include "paddle/fluid/platform/device/ipu/ipu_strategy.h"
#include <glog/logging.h>
namespace {
template <typename Value, typename Lambda>
void RegisterSetter(
std::map<std::string, std::function<void(Value)>>& options, // NOLINT
const std::string& name, Lambda setter) {
options[name] = setter;
}
template <typename Value, typename Lambda>
void RegisterGetter(
std::map<std::string, std::function<Value()>>& options, // NOLINT
std::map<std::string, std::string>& options_type, // NOLINT
const std::string& name, const std::string& type_str, Lambda getter) {
options[name] = getter;
options_type[name] = type_str;
}
} // namespace
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
void IpuStrategy::enablePattern(const std::string& t) { IpuStrategy::IpuStrategy() {
#define ADD_BOOL_OPTION(name) \
RegisterSetter(bool_options, #name, [&](bool value) { name = value; }); \
RegisterGetter(options_getter, options_type, #name, "bool", \
[&]() { return std::to_string(name); })
#define ADD_UINT64_OPTION(name) \
RegisterSetter(uint64_options, #name, \
[&](std::uint64_t value) { name = value; }); \
RegisterGetter(options_getter, options_type, #name, "uint64", \
[&]() { return std::to_string(name); })
#define ADD_DOUBLE_OPTION(name) \
RegisterSetter(double_options, #name, [&](double value) { name = value; }); \
RegisterGetter(options_getter, options_type, #name, "double", \
[&]() { return std::to_string(name); })
#define ADD_STRING_OPTION(name) \
RegisterSetter(string_options, #name, \
[&](const std::string& value) { name = value; }); \
RegisterGetter(options_getter, options_type, #name, "string", \
[&]() { return name; })
ADD_BOOL_OPTION(is_training);
ADD_BOOL_OPTION(save_init_onnx);
ADD_BOOL_OPTION(save_onnx_checkpoint);
ADD_BOOL_OPTION(need_avg_shard);
ADD_BOOL_OPTION(enable_fp16);
ADD_UINT64_OPTION(num_ipus);
ADD_UINT64_OPTION(batches_per_step);
ADD_UINT64_OPTION(micro_batch_size);
ADD_UINT64_OPTION(save_per_n_step);
ADD_DOUBLE_OPTION(available_memory_proportion);
ADD_DOUBLE_OPTION(loss_scaling);
ADD_DOUBLE_OPTION(max_weight_norm);
#undef ADD_STRING_OPTION
#undef ADD_DOUBLE_OPTION
#undef ADD_UINT64_OPTION
#undef ADD_BOOL_OPTION
#define ADD_POPART_ENUM_OPTION_ALIAS(name, aliased_name, EnumType) \
RegisterSetter(uint64_options, #name, [&](std::uint64_t value) { \
PADDLE_ENFORCE_LT( \
value, static_cast<std::uint64_t>(popart::EnumType::N), \
errors::InvalidArgument("Value for %s out of range", #EnumType)); \
popart_options.aliased_name = static_cast<popart::EnumType>(value); \
}); \
RegisterGetter(options_getter, options_type, #name, "uint64", [&]() { \
return std::to_string( \
static_cast<std::uint64_t>(popart_options.aliased_name)); \
})
#define ADD_POPART_BOOL_OPTION_ALIAS(name, aliased_name) \
RegisterSetter(bool_options, #name, \
[&](bool value) { popart_options.aliased_name = value; }); \
RegisterGetter(options_getter, options_type, #name, "bool", [&]() { \
return std::to_string(popart_options.aliased_name); \
})
#define ADD_POPART_UINT64_OPTION_ALIAS(name, aliased_name) \
RegisterSetter(uint64_options, #name, [&](std::uint64_t value) { \
popart_options.aliased_name = value; \
}); \
RegisterGetter(options_getter, options_type, #name, "uint64", [&]() { \
return std::to_string(popart_options.aliased_name); \
})
#define ADD_POPART_DOUBLE_OPTION_ALIAS(name, aliased_name) \
RegisterSetter(double_options, #name, \
[&](double value) { popart_options.aliased_name = value; }); \
RegisterGetter(options_getter, options_type, #name, "double", [&]() { \
return std::to_string(popart_options.aliased_name); \
})
#define ADD_POPART_STRING_OPTION_ALIAS(name, aliased_name) \
RegisterSetter(string_options, #name, [&](const std::string& value) { \
popart_options.aliased_name = value; \
}); \
RegisterGetter(options_getter, options_type, #name, "string", \
[&]() { return popart_options.aliased_name; })
#define ADD_POPART_ENUM_OPTION(name, EnumType) \
ADD_POPART_ENUM_OPTION_ALIAS(name, name, EnumType)
#define ADD_POPART_BOOL_OPTION(name) ADD_POPART_BOOL_OPTION_ALIAS(name, name)
#define ADD_POPART_UINT64_OPTION(name) \
ADD_POPART_UINT64_OPTION_ALIAS(name, name)
#define ADD_POPART_DOUBLE_OPTION(name) \
ADD_POPART_DOUBLE_OPTION_ALIAS(name, name)
#define ADD_POPART_STRING_OPTION(name) \
ADD_POPART_STRING_OPTION_ALIAS(name, name)
ADD_POPART_ENUM_OPTION(autodiffSettings.stitchStrategy,
AutodiffStitchStrategy);
ADD_POPART_ENUM_OPTION(batchSerializationSettings.transformContext,
BatchSerializationTransformContext);
ADD_POPART_ENUM_OPTION(batchSerializationSettings.method,
BatchSerializationMethod);
ADD_POPART_ENUM_OPTION(batchSerializationSettings.batchSchedule,
BatchSerializationBatchSchedule);
ADD_POPART_ENUM_OPTION(autoRecomputation, RecomputationType);
ADD_POPART_ENUM_OPTION(mergeVarUpdate, MergeVarUpdateType);
ADD_POPART_ENUM_OPTION(virtualGraphMode, VirtualGraphMode);
ADD_POPART_ENUM_OPTION(syntheticDataMode, SyntheticDataMode);
ADD_POPART_ENUM_OPTION(subgraphCopyingStrategy, SubgraphCopyingStrategy);
ADD_POPART_ENUM_OPTION(accumulationAndReplicationReductionType,
ReductionType);
ADD_POPART_ENUM_OPTION(meanAccumulationAndReplicationReductionStrategy,
MeanReductionStrategy);
ADD_POPART_STRING_OPTION(logDir);
ADD_POPART_STRING_OPTION(cachePath);
ADD_POPART_STRING_OPTION(partialsTypeMatMuls);
ADD_POPART_STRING_OPTION(customCodeletCompileFlags);
ADD_POPART_STRING_OPTION(serializedPoprithmsShiftGraphsDir);
ADD_POPART_STRING_OPTION(kahnTieBreaker);
ADD_POPART_UINT64_OPTION(executionPhaseSettings.phases);
ADD_POPART_UINT64_OPTION(executionPhaseSettings.stages);
ADD_POPART_UINT64_OPTION(batchSerializationSettings.factor);
ADD_POPART_UINT64_OPTION(firstDotOp);
ADD_POPART_UINT64_OPTION(finalDotOp);
ADD_POPART_UINT64_OPTION(numIOTiles);
ADD_POPART_UINT64_OPTION(mergeVarUpdateMemThreshold);
ADD_POPART_UINT64_OPTION(looseThresholdAtPeak);
ADD_POPART_UINT64_OPTION(accumulationFactor);
ADD_POPART_UINT64_OPTION(swapLimitScheduler);
ADD_POPART_UINT64_OPTION(globalReplicationFactor);
ADD_POPART_UINT64_OPTION(globalReplicaOffset);
ADD_POPART_UINT64_OPTION(defaultPrefetchBufferingDepth);
ADD_POPART_UINT64_OPTION(compilationProgressTotal);
ADD_POPART_UINT64_OPTION(transitiveClosureOptimizationThreshold);
ADD_POPART_BOOL_OPTION(batchSerializationSettings.concatOnVirtualGraphChange);
ADD_POPART_BOOL_OPTION(
batchSerializationSettings.concatOnExecutionPhaseChange);
ADD_POPART_BOOL_OPTION(
batchSerializationSettings.concatOnPipelineStageChange);
ADD_POPART_BOOL_OPTION(strictOpVersions);
ADD_POPART_BOOL_OPTION(opxAliasChecking);
ADD_POPART_BOOL_OPTION(opxModifyChecking);
ADD_POPART_BOOL_OPTION(dotOpNames);
ADD_POPART_BOOL_OPTION(exportPoplarComputationGraph);
ADD_POPART_BOOL_OPTION(exportPoplarVertexGraph);
ADD_POPART_BOOL_OPTION(separateCallOpPdfs);
ADD_POPART_BOOL_OPTION(enableOutlining);
ADD_POPART_BOOL_OPTION(enableOutliningCopyCostPruning);
ADD_POPART_BOOL_OPTION(rearrangeAnchorsOnHost);
ADD_POPART_BOOL_OPTION(enablePrefetchDatastreams);
ADD_POPART_BOOL_OPTION(enableNonStableSoftmax);
ADD_POPART_BOOL_OPTION(enableReplicatedGraphs);
ADD_POPART_BOOL_OPTION(enableGradientAccumulation);
ADD_POPART_BOOL_OPTION(instrumentWithHardwareCycleCounter);
ADD_POPART_BOOL_OPTION(enablePipelining);
ADD_POPART_BOOL_OPTION_ALIAS(enable_pipelining, enablePipelining);
ADD_POPART_BOOL_OPTION(disableGradAccumulationTensorStreams);
ADD_POPART_BOOL_OPTION(compileEngine);
ADD_POPART_BOOL_OPTION(constantWeights);
ADD_POPART_BOOL_OPTION(enableEngineCaching);
ADD_POPART_BOOL_OPTION(enableMergeExchange);
ADD_POPART_BOOL_OPTION(enableFloatingPointChecks);
ADD_POPART_BOOL_OPTION(enableStochasticRounding);
ADD_POPART_BOOL_OPTION_ALIAS(enable_stochastic_rounding,
enableStochasticRounding);
ADD_POPART_BOOL_OPTION(explicitRecomputation);
ADD_POPART_BOOL_OPTION(enableExplicitMainLoops);
ADD_POPART_BOOL_OPTION(useHostCopyOps);
ADD_POPART_BOOL_OPTION(aliasZeroCopy);
ADD_POPART_BOOL_OPTION(delayVarUpdates);
ADD_POPART_BOOL_OPTION(enableFullyConnectedPass);
ADD_POPART_BOOL_OPTION(enableSerializedMatmuls);
ADD_POPART_BOOL_OPTION(enableStableNorm);
ADD_POPART_BOOL_OPTION(decomposeGradSum);
ADD_POPART_BOOL_OPTION(enableDistributedReplicatedGraphs);
ADD_POPART_BOOL_OPTION(groupHostSync);
ADD_POPART_BOOL_OPTION(automaticLossScalingSettings.enabled);
ADD_POPART_BOOL_OPTION(instrumentWithHardwareCycleCounter);
ADD_POPART_BOOL_OPTION(enableSupportedDataTypeCasting);
ADD_POPART_BOOL_OPTION(groupNormStridedChannelGrouping);
ADD_POPART_BOOL_OPTION(scheduleNonWeightUpdateGradientConsumersEarly);
ADD_POPART_DOUBLE_OPTION(outlineSequenceBreakCost);
ADD_POPART_DOUBLE_OPTION(outlineThreshold);
ADD_POPART_DOUBLE_OPTION(timeLimitScheduler);
ADD_POPART_DOUBLE_OPTION(automaticLossScalingSettings.binEdgeLocation);
ADD_POPART_DOUBLE_OPTION(
automaticLossScalingSettings.thresholdUpperCountProportion);
#undef ADD_POPART_STRING_OPTION
#undef ADD_POPART_DOUBLE_OPTION
#undef ADD_POPART_UINT64_OPTION
#undef ADD_POPART_BOOL_OPTION
#undef ADD_POPART_ENUM_OPTION
#undef ADD_POPART_STRING_OPTION_ALIAS
#undef ADD_POPART_DOUBLE_OPTION_ALIAS
#undef ADD_POPART_UINT64_OPTION_ALIAS
#undef ADD_POPART_BOOL_OPTION_ALIAS
#undef ADD_POPART_ENUM_OPTION_ALIAS
RegisterSetter(bool_options, "enable_manual_shard", [&](bool value) {
if (value) {
popart_options.virtualGraphMode = popart::VirtualGraphMode::Manual;
} else {
popart_options.virtualGraphMode = popart::VirtualGraphMode::Off;
}
});
RegisterGetter(options_getter, options_type, "enable_manual_shard", "bool",
[&]() {
return std::to_string(popart_options.virtualGraphMode ==
popart::VirtualGraphMode::Manual);
});
RegisterSetter(bool_options, "enable_half_partial", [&](bool value) {
if (value) {
popart_options.partialsTypeMatMuls = "half";
} else {
popart_options.partialsTypeMatMuls = "float";
}
});
RegisterGetter(
options_getter, options_type, "enable_half_partial", "bool", [&]() {
return std::to_string(popart_options.partialsTypeMatMuls == "half");
});
RegisterSetter(
container_options, "dotChecks",
[&](const std::pair<std::string, std::string>& p) {
std::uint64_t value = std::stoul(p.first);
popart_options.dotChecks.insert(static_cast<popart::DotCheck>(value));
});
RegisterGetter(
vector_options_getter, options_type, "dotChecks", "vector", [&]() {
std::vector<std::string> res;
for (auto x : popart_options.dotChecks) {
res.push_back(std::to_string(static_cast<std::uint64_t>(x)));
}
return res;
});
RegisterSetter(container_options, "hardwareInstrumentations",
[&](const std::pair<std::string, std::string>& p) {
std::uint64_t value = std::stoul(p.first);
popart_options.hardwareInstrumentations.insert(
static_cast<popart::Instrumentation>(value));
});
RegisterGetter(
vector_options_getter, options_type, "hardwareInstrumentations", "vector",
[&]() {
std::vector<std::string> res;
for (auto x : popart_options.hardwareInstrumentations) {
res.push_back(std::to_string(static_cast<std::uint64_t>(x)));
}
return res;
});
RegisterSetter(container_options, "customCodelets",
[&](const std::pair<std::string, std::string>& p) {
popart_options.customCodelets.push_back(p.first);
});
RegisterGetter(vector_options_getter, options_type, "customCodelets",
"vector", [&]() {
std::vector<std::string> res;
for (auto x : popart_options.customCodelets) {
res.push_back(x);
}
return res;
});
RegisterSetter(container_options, "engineOptions",
[&](const std::pair<std::string, std::string>& p) {
popart_options.engineOptions.emplace(p);
});
RegisterGetter(map_options_getter, options_type, "engineOptions", "map",
[&]() { return popart_options.engineOptions; });
RegisterSetter(container_options, "reportOptions",
[&](const std::pair<std::string, std::string>& p) {
popart_options.reportOptions.emplace(p);
});
RegisterGetter(map_options_getter, options_type, "reportOptions", "map",
[&]() { return popart_options.reportOptions; });
RegisterSetter(container_options, "convolutionOptions",
[&](const std::pair<std::string, std::string>& p) {
popart_options.convolutionOptions.emplace(p);
});
RegisterGetter(map_options_getter, options_type, "convolutionOptions", "map",
[&]() { return popart_options.convolutionOptions; });
RegisterSetter(container_options, "lstmOptions",
[&](const std::pair<std::string, std::string>& p) {
popart_options.lstmOptions.emplace(p);
});
RegisterGetter(map_options_getter, options_type, "lstmOptions", "map",
[&]() { return popart_options.lstmOptions; });
RegisterSetter(container_options, "gclOptions",
[&](const std::pair<std::string, std::string>& p) {
popart_options.gclOptions.emplace(p);
});
RegisterGetter(map_options_getter, options_type, "gclOptions", "map",
[&]() { return popart_options.gclOptions; });
}
void IpuStrategy::AddBoolOption(const std::string& option, bool value) {
set(option, value, bool_options, "bool");
}
void IpuStrategy::AddUint64Option(const std::string& option,
std::uint64_t value) {
set(option, value, uint64_options, "uint64");
}
void IpuStrategy::AddDoubleOption(const std::string& option, double value) {
set(option, value, double_options, "double");
}
void IpuStrategy::AddStringOption(const std::string& option,
const std::string& value) {
set(option, value, string_options, "string");
}
void IpuStrategy::InsertStringOption(const std::string& option,
const std::string& value) {
set(option, std::pair<std::string, std::string>(value, ""), container_options,
"vector");
}
void IpuStrategy::InsertStringPairOption(const std::string& option,
const std::string& key,
const std::string& value) {
set(option, std::pair<std::string, std::string>(key, value),
container_options, "map");
}
void IpuStrategy::SetTensorLocation(const std::string& tensor,
const std::string& opt,
std::uint64_t value) {
VLOG(10) << "Setting " << opt << " to " << value << " for location "
<< tensor;
popart::TensorLocationSettings* settings;
if (tensor == "location_activation") {
settings = &popart_options.activationTensorLocationSettings;
} else if (tensor == "location_weight") {
settings = &popart_options.weightTensorLocationSettings;
} else if (tensor == "location_optimizer") {
settings = &popart_options.optimizerStateTensorLocationSettings;
} else if (tensor == "location_accumulator") {
settings = &popart_options.accumulatorTensorLocationSettings;
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Unknown tensor location: %s", tensor));
}
if (opt == "minElementsForOffChip") {
settings->minElementsForOffChip = value;
} else if (opt == "minElementsForReplicatedTensorSharding") {
settings->minElementsForReplicatedTensorSharding = value;
} else if (opt == "onChip") {
settings->location.storage = value > 0 ? popart::TensorStorage::OnChip
: popart::TensorStorage::OffChip;
} else if (opt == "useReplicatedTensorSharding") {
settings->location.replicatedTensorSharding =
value > 0 ? popart::ReplicatedTensorSharding::On
: popart::ReplicatedTensorSharding::Off;
} else if (opt == "useIOTilesToLoad") {
settings->location.loadTileSet =
value > 0 ? popart::TileSet::IO : popart::TileSet::Compute;
} else if (opt == "useIOTilesToStore") {
settings->location.storageTileSet =
value > 0 ? popart::TileSet::IO : popart::TileSet::Compute;
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Unknown option ' %s' for tensor location: %s", opt, tensor));
}
}
std::string IpuStrategy::GetOption(const std::string& option) {
return get(option, options_getter);
}
std::vector<std::string> IpuStrategy::GetVectorOption(
const std::string& option) {
return get(option, vector_options_getter);
}
std::map<std::string, std::string> IpuStrategy::GetMapOption(
const std::string& option) {
return get(option, map_options_getter);
}
std::string IpuStrategy::GetOptionType(const std::string& option) {
return options_type[option];
}
void IpuStrategy::EnablePattern(const std::string& t) {
VLOG(10) << "enable popart pattern: " << t; VLOG(10) << "enable popart pattern: " << t;
popart_patterns.enablePattern(t, true); popart_patterns.enablePattern(t, true);
} }
void IpuStrategy::disablePattern(const std::string& t) { void IpuStrategy::DisablePattern(const std::string& t) {
VLOG(10) << "disable popart pattern: " << t; VLOG(10) << "disable popart pattern: " << t;
popart_patterns.enablePattern(t, false); popart_patterns.enablePattern(t, false);
} }
const bool IpuStrategy::isPatternEnabled(const std::string& t) { const bool IpuStrategy::IsPatternEnabled(const std::string& t) {
return popart_patterns.isPatternEnabled(t); return popart_patterns.isPatternEnabled(t);
} }
......
...@@ -14,50 +14,19 @@ limitations under the License. */ ...@@ -14,50 +14,19 @@ limitations under the License. */
#pragma once #pragma once
#include <popart/op.hpp> #include <popart/patterns/patterns.hpp>
#include <popart/sessionoptions.hpp> #include <popart/sessionoptions.hpp>
#include <popart/tensorlocation.hpp> #include <popart/tensorlocation.hpp>
#include "popart/patterns/patterns.hpp" #include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace ipu { namespace ipu {
using VirtualGraphMode = popart::VirtualGraphMode;
using RecomputationType = popart::RecomputationType;
struct IpuStrategy { struct IpuStrategy {
IpuStrategy() { IpuStrategy();
// we always save optimizer state to OffChip and enable rts for saving
// memory
auto storage = popart::TensorLocation(popart::TensorStorage::OffChip,
popart::ReplicatedTensorSharding::On);
popart_options.optimizerStateTensorLocationSettings =
popart::TensorLocationSettings(storage);
// We divide the accumulationFactor and replicatedGraphCount after all
// reduce
popart_options.accumulationAndReplicationReductionType =
popart::ReductionType::Mean;
popart_options.meanAccumulationAndReplicationReductionStrategy =
popart::MeanReductionStrategy::Post;
popart_options.enableFloatingPointChecks = false;
// A directory for log traces to be written into.
popart_options.logDir = "popart_log";
}
~IpuStrategy() {}
// Number ipus total needed, replica * ipu_per_replica
int num_ipus = 1;
// batches per step
int batches_per_step = 1;
// micro batch-size
int micro_batch_size = 1;
// TODO(alleng) create PaddleOptions
// training flag, true for training // training flag, true for training
bool is_training = true; bool is_training = true;
...@@ -67,15 +36,25 @@ struct IpuStrategy { ...@@ -67,15 +36,25 @@ struct IpuStrategy {
// save the trained model // save the trained model
bool save_onnx_checkpoint = false; bool save_onnx_checkpoint = false;
// save paddle model per n steps
int save_per_n_step = 1;
// average sharding, debugging used // average sharding, debugging used
bool need_avg_shard = false; bool need_avg_shard = false;
// flag for fp16, true for pure fp16 // flag for fp16, true for pure fp16
bool enable_fp16 = false; bool enable_fp16 = false;
// Number ipus total needed, replica * ipu_per_replica
int num_ipus = 1;
// batches per step
int batches_per_step = 1;
// micro batch-size
int micro_batch_size = 1;
// save paddle model per n steps
int save_per_n_step = 1;
// TODO(alleng) remove this param
// available memory proportion, 0.0f for disable // available memory proportion, 0.0f for disable
float available_memory_proportion = 0.0f; float available_memory_proportion = 0.0f;
...@@ -88,12 +67,71 @@ struct IpuStrategy { ...@@ -88,12 +67,71 @@ struct IpuStrategy {
// popart session option // popart session option
popart::SessionOptions popart_options; popart::SessionOptions popart_options;
// popart pattern manager
popart::Patterns popart_patterns; popart::Patterns popart_patterns;
private:
std::map<std::string, std::function<void(bool)>> bool_options;
std::map<std::string, std::function<void(std::uint64_t)>> uint64_options;
std::map<std::string, std::function<void(double)>> double_options;
std::map<std::string, std::function<void(std::string)>> string_options;
std::map<std::string,
std::function<void(std::pair<std::string, std::string>)>>
container_options;
std::map<std::string, std::function<std::string()>> options_getter;
std::map<std::string, std::function<std::vector<std::string>()>>
vector_options_getter;
std::map<std::string, std::function<std::map<std::string, std::string>()>>
map_options_getter;
std::map<std::string, std::string> options_type;
template <typename ValueType>
void set(
const std::string &key, ValueType value,
std::map<std::string, std::function<void(ValueType)>> &options, // NOLINT
const std::string &type_str) {
auto it = options.find(key);
PADDLE_ENFORCE_NE(it, options.end(), platform::errors::InvalidArgument(
"Cannot find option: %s, type: %s "
"when setting IpuStrategy options",
key, type_str));
it->second(value);
}
template <typename ValueType>
ValueType get(
const std::string &key,
std::map<std::string, std::function<ValueType()>> &options) { // NOLINT
auto it = options.find(key);
PADDLE_ENFORCE_NE(
it, options.end(),
platform::errors::InvalidArgument(
"Cannot find option name: %s when trying to get IpuStrategy option",
key));
return it->second();
}
public: public:
void enablePattern(const std::string& t); void AddBoolOption(const std::string &option, bool value);
void disablePattern(const std::string& t); void AddUint64Option(const std::string &option, std::uint64_t value);
const bool isPatternEnabled(const std::string& t); void AddDoubleOption(const std::string &option, double value);
void AddStringOption(const std::string &option, const std::string &value);
void InsertStringOption(const std::string &option, const std::string &value);
void InsertStringPairOption(const std::string &option, const std::string &key,
const std::string &value);
void SetTensorLocation(const std::string &tensor, const std::string &option,
std::uint64_t value);
std::string GetOption(const std::string &);
std::vector<std::string> GetVectorOption(const std::string &);
std::map<std::string, std::string> GetMapOption(const std::string &);
std::string GetOptionType(const std::string &);
void EnablePattern(const std::string &t);
void DisablePattern(const std::string &t);
const bool IsPatternEnabled(const std::string &t);
}; };
} // namespace ipu } // namespace ipu
......
...@@ -22,7 +22,7 @@ namespace ipu { ...@@ -22,7 +22,7 @@ namespace ipu {
void* PaddleIArray::data() { return tensor_.data(); } void* PaddleIArray::data() { return tensor_.data(); }
popart::DataType PaddleIArray::dataType() const { popart::DataType PaddleIArray::dataType() const {
return VarType2PopartType(tensor_.type()); return PdDataType2PopartType(tensor_.dtype());
} }
std::size_t PaddleIArray::rank() const { return tensor_.dims().size(); } std::size_t PaddleIArray::rank() const { return tensor_.dims().size(); }
...@@ -66,11 +66,44 @@ popart::DataType VarType2PopartType( ...@@ -66,11 +66,44 @@ popart::DataType VarType2PopartType(
case framework::proto::VarType::COMPLEX128: case framework::proto::VarType::COMPLEX128:
return popart::DataType::COMPLEX128; return popart::DataType::COMPLEX128;
default: default:
PADDLE_THROW(paddle::platform::errors::Unavailable( PADDLE_THROW(paddle::platform::errors::Unimplemented(
"Unsupported Paddle var type.")); "Unsupported Paddle var type."));
} }
} }
popart::DataType PdDataType2PopartType(
const paddle::experimental::DataType type) {
switch (type) {
case paddle::experimental::DataType::UINT8:
return popart::DataType::UINT8;
case paddle::experimental::DataType::INT8:
return popart::DataType::INT8;
case paddle::experimental::DataType::INT16:
return popart::DataType::INT16;
case paddle::experimental::DataType::INT32:
return popart::DataType::INT32;
case paddle::experimental::DataType::INT64:
return popart::DataType::INT64;
case paddle::experimental::DataType::BOOL:
return popart::DataType::BOOL;
case paddle::experimental::DataType::FLOAT64:
return popart::DataType::DOUBLE;
case paddle::experimental::DataType::FLOAT32:
return popart::DataType::FLOAT;
case paddle::experimental::DataType::FLOAT16:
return popart::DataType::FLOAT16;
case paddle::experimental::DataType::BFLOAT16:
return popart::DataType::BFLOAT16;
case paddle::experimental::DataType::COMPLEX64:
return popart::DataType::COMPLEX64;
case paddle::experimental::DataType::COMPLEX128:
return popart::DataType::COMPLEX128;
default:
PADDLE_THROW(paddle::platform::errors::Unimplemented(
"Unsupported Paddle data type."));
}
}
framework::proto::VarType::Type PopartType2VarType( framework::proto::VarType::Type PopartType2VarType(
const popart::DataType type) { const popart::DataType type) {
switch (type) { switch (type) {
......
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include <popart/tensorinfo.hpp> #include <popart/tensorinfo.hpp>
#include <popart/vendored/any.hpp> #include <popart/vendored/any.hpp>
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
...@@ -83,14 +84,15 @@ class PaddleIArray final : public popart::IArray { ...@@ -83,14 +84,15 @@ class PaddleIArray final : public popart::IArray {
}; };
popart::DataType VarType2PopartType(const framework::proto::VarType::Type type); popart::DataType VarType2PopartType(const framework::proto::VarType::Type type);
popart::DataType PdDataType2PopartType(
const paddle::experimental::DataType type);
framework::proto::VarType::Type PopartType2VarType(const popart::DataType type); framework::proto::VarType::Type PopartType2VarType(const popart::DataType type);
popart::DataType OnnxDtype2PopartType(const int type); popart::DataType OnnxDtype2PopartType(const int type);
bool GetBoolEnv(std::string str); bool GetBoolEnv(std::string str);
template <typename T> template <typename T>
std::unique_ptr<popart::NDArrayWrapper<T>> Tensor2IArray(const Tensor& tensor) { std::unique_ptr<popart::NDArrayWrapper<T>> Tensor2IArray(const Tensor& tensor) {
auto dtype = auto dtype = PdDataType2PopartType(tensor.dtype());
VarType2PopartType(framework::TransToProtoVarType(tensor.dtype()));
auto shape = std::vector<int64_t>(); auto shape = std::vector<int64_t>();
for (size_t i = 0; i < tensor.dims().size(); ++i) { for (size_t i = 0; i < tensor.dims().size(); ++i) {
shape.push_back(tensor.dims().at(i)); shape.push_back(tensor.dims().at(i));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册