未验证 提交 9f2ae360 编写于 作者: A Allen Guo 提交者: GitHub

support more ops (#41421) (#41731)

上级 261f97fb
...@@ -12,6 +12,19 @@ ...@@ -12,6 +12,19 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
macro(find_popart_version popart_version_file)
file(READ ${popart_version_file} popart_version_file_content)
string(REGEX MATCH "(POPART_VERSION_STRING)[ \t\r\n](\")([0-9]+\.[0-9]+\.[0-9]+)(\\+)([A-Za-z0-9_]*)(\")" POPART_VERSION ${popart_version_file_content})
string(REPLACE "POPART_VERSION_STRING" "" POPART_VERSION "${POPART_VERSION}")
string(REPLACE "\"" "" POPART_VERSION "${POPART_VERSION}")
string(REPLACE " " "" POPART_VERSION "${POPART_VERSION}")
if(NOT POPART_VERSION)
set(POPART_VERSION "Unknown version")
else()
message(STATUS "Current PopART version is ${POPART_VERSION}")
endif()
endmacro()
if(WITH_IPU) if(WITH_IPU)
set(POPLAR_DIR CACHE PATH "Path to a Poplar install") set(POPLAR_DIR CACHE PATH "Path to a Poplar install")
set(POPART_DIR CACHE PATH "Path to a Popart install") set(POPART_DIR CACHE PATH "Path to a Popart install")
...@@ -64,6 +77,8 @@ if(WITH_IPU) ...@@ -64,6 +77,8 @@ if(WITH_IPU)
message(FATAL_ERROR "You must provide a path to a Popart build using -DPOPART_DIR=/path/to/popart/build") message(FATAL_ERROR "You must provide a path to a Popart build using -DPOPART_DIR=/path/to/popart/build")
endif() endif()
find_popart_version("${POPART_DIR}/include/popart/version.hpp")
add_definitions(-DONNX_NAMESPACE=onnx) add_definitions(-DONNX_NAMESPACE=onnx)
add_custom_target(extern_poplar DEPENDS poplar popart-only) add_custom_target(extern_poplar DEPENDS poplar popart-only)
endif() endif()
...@@ -398,7 +398,8 @@ function(version version_file) ...@@ -398,7 +398,8 @@ function(version version_file)
"WITH_GPU: ${WITH_GPU}\n" "WITH_GPU: ${WITH_GPU}\n"
"WITH_ROCM: ${WITH_ROCM}\n" "WITH_ROCM: ${WITH_ROCM}\n"
"WITH_ASCEND_CL: ${WITH_ASCEND_CL}\n" "WITH_ASCEND_CL: ${WITH_ASCEND_CL}\n"
"WITH_ASCEND_CXX11: ${WITH_ASCEND_CXX11}\n") "WITH_ASCEND_CXX11: ${WITH_ASCEND_CXX11}\n"
"WITH_IPU: ${WITH_IPU}\n")
if(WITH_GPU) if(WITH_GPU)
file(APPEND ${version_file} file(APPEND ${version_file}
"CUDA version: ${CUDA_VERSION}\n" "CUDA version: ${CUDA_VERSION}\n"
...@@ -414,6 +415,10 @@ function(version version_file) ...@@ -414,6 +415,10 @@ function(version version_file)
"Ascend Toolkit version: ${ASCEND_TOOLKIT_VERSION}\n" "Ascend Toolkit version: ${ASCEND_TOOLKIT_VERSION}\n"
"Ascend Driver version: ${ASCEND_DRIVER_VERSION}\n") "Ascend Driver version: ${ASCEND_DRIVER_VERSION}\n")
endif() endif()
if(WITH_IPU)
file(APPEND ${version_file}
"PopART version: ${POPART_VERSION}\n")
endif()
file(APPEND ${version_file} "CXX compiler version: ${CMAKE_CXX_COMPILER_VERSION}\n") file(APPEND ${version_file} "CXX compiler version: ${CMAKE_CXX_COMPILER_VERSION}\n")
if(TENSORRT_FOUND) if(TENSORRT_FOUND)
file(APPEND ${version_file} file(APPEND ${version_file}
......
...@@ -474,6 +474,7 @@ void Compiler::LowerOptimizer(const Scope* scope) { ...@@ -474,6 +474,7 @@ void Compiler::LowerOptimizer(const Scope* scope) {
auto adam_mode = auto adam_mode =
AdamModeFromStr(adam_mode_, ipu_strategy_->use_no_bias_optimizer); AdamModeFromStr(adam_mode_, ipu_strategy_->use_no_bias_optimizer);
auto weight_decay_mode_ = ipu_strategy_->weight_decay_mode; auto weight_decay_mode_ = ipu_strategy_->weight_decay_mode;
auto scaled_optimizer_state_ = ipu_strategy_->scaled_optimizer_state;
if (weight_decay_mode_.empty()) { if (weight_decay_mode_.empty()) {
weight_decay_mode_ = BOOST_GET_CONST( weight_decay_mode_ = BOOST_GET_CONST(
std::string, op_desc->GetAttr("weight_decay_mode")); std::string, op_desc->GetAttr("weight_decay_mode"));
...@@ -492,7 +493,7 @@ void Compiler::LowerOptimizer(const Scope* scope) { ...@@ -492,7 +493,7 @@ void Compiler::LowerOptimizer(const Scope* scope) {
auto optimizer_instance = std::make_unique<popart::Adam>( auto optimizer_instance = std::make_unique<popart::Adam>(
optimizer_value, adam_mode, weight_decay_mode, optimizer_value, adam_mode, weight_decay_mode,
popart::DataType::UNDEFINED, accl1_type, accl2_type, popart::DataType::UNDEFINED, accl1_type, accl2_type,
clip_norm_settings); clip_norm_settings, scaled_optimizer_state_);
for (int i = 0; i < weight_decay_vars.size(); i++) { for (int i = 0; i < weight_decay_vars.size(); i++) {
optimizer_instance->insertSpecific( optimizer_instance->insertSpecific(
weight_decay_vars[i], weight_decay_vars[i],
...@@ -511,11 +512,10 @@ void Compiler::LowerOptimizer(const Scope* scope) { ...@@ -511,11 +512,10 @@ void Compiler::LowerOptimizer(const Scope* scope) {
popart::OptimizerValue(loss_scaling, true), popart::OptimizerValue(loss_scaling, true),
popart::OptimizerValue(mwn, true), adam_mode, weight_decay_mode, popart::OptimizerValue(mwn, true), adam_mode, weight_decay_mode,
popart::DataType::UNDEFINED, accl1_type, accl2_type, popart::DataType::UNDEFINED, accl1_type, accl2_type,
clip_norm_settings); clip_norm_settings, scaled_optimizer_state_);
} }
}; };
if (adam_mode == popart::AdamMode::Lamb || if (adam_mode == popart::AdamMode::Lamb) {
adam_mode == popart::AdamMode::LambNoBias) {
const std::map<std::string, std::pair<float, bool>> optimizer_value = const std::map<std::string, std::pair<float, bool>> optimizer_value =
{{"defaultLearningRate", {0.0, false}}, {{"defaultLearningRate", {0.0, false}},
{"defaultBeta1", {beta1, false}}, {"defaultBeta1", {beta1, false}},
...@@ -526,7 +526,26 @@ void Compiler::LowerOptimizer(const Scope* scope) { ...@@ -526,7 +526,26 @@ void Compiler::LowerOptimizer(const Scope* scope) {
auto eval_optimizer = std::make_unique<popart::Adam>( auto eval_optimizer = std::make_unique<popart::Adam>(
optimizer_value, adam_mode, weight_decay_mode, optimizer_value, adam_mode, weight_decay_mode,
popart::DataType::UNDEFINED, popart::DataType::FLOAT, popart::DataType::UNDEFINED, popart::DataType::FLOAT,
popart::DataType::FLOAT, clip_norm_settings); popart::DataType::FLOAT, clip_norm_settings,
scaled_optimizer_state_);
for (int i = 0; i < weight_decay_vars.size(); i++) {
eval_optimizer->insertSpecific(weight_decay_vars[i],
{{"weightDecay", {0.0, false}}});
}
resources_->eval_optimizer = std::move(eval_optimizer);
} else if (adam_mode == popart::AdamMode::LambNoBias) {
const std::map<std::string, std::pair<float, bool>> optimizer_value =
{{"defaultLearningRate", {0.0, false}},
{"defaultBeta1", {1.0, false}},
{"defaultBeta2", {1.0, false}},
{"defaultEps", {eps, true}},
{"lossScaling", {loss_scaling, true}},
{"defaultMaxWeightNorm", {mwn, true}}};
auto eval_optimizer = std::make_unique<popart::Adam>(
optimizer_value, adam_mode, weight_decay_mode,
popart::DataType::UNDEFINED, popart::DataType::FLOAT,
popart::DataType::FLOAT, clip_norm_settings,
scaled_optimizer_state_);
for (int i = 0; i < weight_decay_vars.size(); i++) { for (int i = 0; i < weight_decay_vars.size(); i++) {
eval_optimizer->insertSpecific(weight_decay_vars[i], eval_optimizer->insertSpecific(weight_decay_vars[i],
{{"weightDecay", {0.0, false}}}); {{"weightDecay", {0.0, false}}});
...@@ -542,7 +561,8 @@ void Compiler::LowerOptimizer(const Scope* scope) { ...@@ -542,7 +561,8 @@ void Compiler::LowerOptimizer(const Scope* scope) {
popart::OptimizerValue(loss_scaling, true), popart::OptimizerValue(loss_scaling, true),
popart::OptimizerValue(mwn, true), adam_mode, weight_decay_mode, popart::OptimizerValue(mwn, true), adam_mode, weight_decay_mode,
popart::DataType::UNDEFINED, popart::DataType::FLOAT, popart::DataType::UNDEFINED, popart::DataType::FLOAT,
popart::DataType::FLOAT, clip_norm_settings); popart::DataType::FLOAT, clip_norm_settings,
scaled_optimizer_state_);
} }
} else if (type == "adaptive") { } else if (type == "adaptive") {
auto alpha = BOOST_GET_CONST(float, op_desc->GetAttr("alpha")); auto alpha = BOOST_GET_CONST(float, op_desc->GetAttr("alpha"));
......
...@@ -67,6 +67,7 @@ IpuStrategy::IpuStrategy() { ...@@ -67,6 +67,7 @@ IpuStrategy::IpuStrategy() {
ADD_BOOL_OPTION(transfer_cast_op); ADD_BOOL_OPTION(transfer_cast_op);
ADD_BOOL_OPTION(use_no_bias_optimizer); ADD_BOOL_OPTION(use_no_bias_optimizer);
ADD_BOOL_OPTION(enable_distribution); ADD_BOOL_OPTION(enable_distribution);
ADD_BOOL_OPTION(scaled_optimizer_state);
ADD_UINT64_OPTION(num_ipus); ADD_UINT64_OPTION(num_ipus);
ADD_UINT64_OPTION(batches_per_step); ADD_UINT64_OPTION(batches_per_step);
ADD_UINT64_OPTION(micro_batch_size); ADD_UINT64_OPTION(micro_batch_size);
......
...@@ -37,13 +37,13 @@ class IpuStrategy { ...@@ -37,13 +37,13 @@ class IpuStrategy {
// training flag, true for training // training flag, true for training
bool is_training = true; bool is_training = true;
// average sharding, debugging used // Average sharding, debugging used
bool need_avg_shard = false; bool need_avg_shard = false;
// flag for fp16, true for pure fp16 // Flag for fp16, true for pure fp16
bool enable_fp16 = false; bool enable_fp16 = false;
// enable transfer cast Op target from fp32 to fp16 in fp16 mode // Enable transfer cast Op target from fp32 to fp16 in fp16 mode
bool transfer_cast_op = true; bool transfer_cast_op = true;
// The mode of Adam/Lamb optimizer // The mode of Adam/Lamb optimizer
...@@ -51,33 +51,35 @@ class IpuStrategy { ...@@ -51,33 +51,35 @@ class IpuStrategy {
// true: The Adam_No_Bias/Lamb_No_Bias optimizer from PopART // true: The Adam_No_Bias/Lamb_No_Bias optimizer from PopART
bool use_no_bias_optimizer = false; bool use_no_bias_optimizer = false;
// enable distributed computing for POD128 or POD256 // Enable distributed computing for POD128 or POD256
bool enable_distribution = false; bool enable_distribution = false;
// Enable Scaled optimizer state only for Adam and Lamb
bool scaled_optimizer_state = false;
// Number ipus total needed, local_replica * ipu_per_replica // Number ipus total needed, local_replica * ipu_per_replica
int num_ipus = 1; int num_ipus = 1;
// batches per step // Batches per step
int batches_per_step = 1; int batches_per_step = 1;
// micro batch-size // Micro batch-size
int micro_batch_size = 1; int micro_batch_size = 1;
// random seed // Random seed
std::uint64_t random_seed = std::numeric_limits<std::uint64_t>::max(); std::uint64_t random_seed = std::numeric_limits<std::uint64_t>::max();
// TODO(alleng) remove this param // Available memory proportion, 0.0f for disable
// available memory proportion, 0.0f for disable
float available_memory_proportion = 0.0f; float available_memory_proportion = 0.0f;
// loss scaling, currently we can't get loss scaling from // Loss scaling, currently we can't get loss scaling from
// optimizer_extract_pass, so we have to set it here // optimizer_extract_pass, so we have to set it here
float loss_scaling = 1.0f; float loss_scaling = 1.0f;
// defaultMaxWeightNorm for adam optimizer // DefaultMaxWeightNorm for adam optimizer
float max_weight_norm = 65504.0f; float max_weight_norm = 65504.0f;
// file path for dumping compiled model in onnx format // File path for dumping compiled model in onnx format
std::string onnx_dump_path; std::string onnx_dump_path;
// Data type to use for tensor that stores first-order momentum optimizer // Data type to use for tensor that stores first-order momentum optimizer
...@@ -106,7 +108,7 @@ class IpuStrategy { ...@@ -106,7 +108,7 @@ class IpuStrategy {
// popart pattern manager // popart pattern manager
popart::Patterns popart_patterns; popart::Patterns popart_patterns;
// custom ops // Custom ops
std::vector<IpuCustomOpIdentifier> custom_ops; std::vector<IpuCustomOpIdentifier> custom_ops;
public: public:
......
...@@ -157,7 +157,6 @@ Node *softmax_handler(Graph *graph, Node *node) { ...@@ -157,7 +157,6 @@ Node *softmax_handler(Graph *graph, Node *node) {
Node *scale_handler(Graph *graph, Node *node) { Node *scale_handler(Graph *graph, Node *node) {
auto *op = node->Op(); auto *op = node->Op();
auto scale_ = BOOST_GET_CONST(float, op->GetAttr("scale"));
auto bias_ = BOOST_GET_CONST(float, op->GetAttr("bias")); auto bias_ = BOOST_GET_CONST(float, op->GetAttr("bias"));
auto bias_after_scale_ = auto bias_after_scale_ =
BOOST_GET_CONST(bool, op->GetAttr("bias_after_scale")); BOOST_GET_CONST(bool, op->GetAttr("bias_after_scale"));
...@@ -191,6 +190,7 @@ Node *scale_handler(Graph *graph, Node *node) { ...@@ -191,6 +190,7 @@ Node *scale_handler(Graph *graph, Node *node) {
} }
} }
} else { } else {
auto scale_ = BOOST_GET_CONST(float, op->GetAttr("scale"));
if (is_float_equal(bias_, 0.0) && is_float_equal(scale_, 1.0)) { if (is_float_equal(bias_, 0.0) && is_float_equal(scale_, 1.0)) {
return CreateBaseOp(graph, node, "popart_identity", return CreateBaseOp(graph, node, "popart_identity",
{GetInputVarNode("X", node)}, node->outputs, {}); {GetInputVarNode("X", node)}, node->outputs, {});
......
...@@ -95,6 +95,21 @@ Node *pool2d_handler(Graph *graph, Node *node) { ...@@ -95,6 +95,21 @@ Node *pool2d_handler(Graph *graph, Node *node) {
auto *op = node->Op(); auto *op = node->Op();
auto pooling_type = BOOST_GET_CONST(std::string, op->GetAttr("pooling_type")); auto pooling_type = BOOST_GET_CONST(std::string, op->GetAttr("pooling_type"));
auto global_pooling = BOOST_GET_CONST(bool, op->GetAttr("global_pooling")); auto global_pooling = BOOST_GET_CONST(bool, op->GetAttr("global_pooling"));
if (op->HasAttr("adaptive")) {
auto adaptive = BOOST_GET_CONST(bool, op->GetAttr("adaptive"));
if (adaptive) {
auto ksize = BOOST_GET_CONST(std::vector<int>, op->GetAttr("ksize"));
if (ksize[0] != 1 || ksize[1] != 1) {
PADDLE_THROW(platform::errors::InvalidArgument(
"Only support pool_size=1 with adaptive mode."));
}
// adaptive maxpool op is max_pool2d_with_index. Only process avgpool
// here.
return CreateBaseOp(graph, node, "popart_globalaveragepool", node->inputs,
node->outputs);
}
}
if (global_pooling) { if (global_pooling) {
if (pooling_type == "max") { if (pooling_type == "max") {
return CreateBaseOp(graph, node, "popart_globalmaxpool", node->inputs, return CreateBaseOp(graph, node, "popart_globalmaxpool", node->inputs,
...@@ -159,6 +174,17 @@ Node *pool2d_handler(Graph *graph, Node *node) { ...@@ -159,6 +174,17 @@ Node *pool2d_handler(Graph *graph, Node *node) {
} }
} }
Node *max_pool2d_with_index_handler(Graph *graph, Node *node) {
auto *op = node->Op();
auto ksize = BOOST_GET_CONST(std::vector<int>, op->GetAttr("ksize"));
if (ksize[0] != 1 || ksize[1] != 1) {
PADDLE_THROW(platform::errors::InvalidArgument(
"Only support pool_size=1 with adaptive mode."));
}
return CreateBaseOp(graph, node, "popart_globalmaxpool", node->inputs,
{GetOutputVarNode("Out", node)});
}
Node *group_norm_handler(Graph *graph, Node *node) { Node *group_norm_handler(Graph *graph, Node *node) {
auto *op = node->Op(); auto *op = node->Op();
auto epsilon_ = BOOST_GET_CONST(float, op->GetAttr("epsilon")); auto epsilon_ = BOOST_GET_CONST(float, op->GetAttr("epsilon"));
...@@ -304,6 +330,7 @@ Node *dropout_handler(Graph *graph, Node *node) { ...@@ -304,6 +330,7 @@ Node *dropout_handler(Graph *graph, Node *node) {
} // namespace paddle } // namespace paddle
REGISTER_HANDLER(pool2d, pool2d_handler); REGISTER_HANDLER(pool2d, pool2d_handler);
REGISTER_HANDLER(max_pool2d_with_index, max_pool2d_with_index_handler);
REGISTER_HANDLER(batch_norm, batch_norm_handler); REGISTER_HANDLER(batch_norm, batch_norm_handler);
REGISTER_HANDLER(group_norm, group_norm_handler); REGISTER_HANDLER(group_norm, group_norm_handler);
REGISTER_HANDLER(instance_norm, instance_norm_handler); REGISTER_HANDLER(instance_norm, instance_norm_handler);
......
...@@ -331,7 +331,7 @@ Node *shape_handler(Graph *graph, Node *node) { ...@@ -331,7 +331,7 @@ Node *shape_handler(Graph *graph, Node *node) {
Node *slice_handler(Graph *graph, Node *node) { Node *slice_handler(Graph *graph, Node *node) {
auto *op = node->Op(); auto *op = node->Op();
Node *starts = nullptr; Node *starts = nullptr;
if (!op->Input("StartsTensor").empty()) { if (!op->HasAttr("starts")) {
starts = GetInputVarNode("StartsTensor", node); starts = GetInputVarNode("StartsTensor", node);
} else { } else {
auto starts_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("starts")); auto starts_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("starts"));
...@@ -341,7 +341,7 @@ Node *slice_handler(Graph *graph, Node *node) { ...@@ -341,7 +341,7 @@ Node *slice_handler(Graph *graph, Node *node) {
starts = starts->outputs[0]; starts = starts->outputs[0];
} }
Node *ends = nullptr; Node *ends = nullptr;
if (!op->Input("EndsTensor").empty()) { if (!op->HasAttr("ends")) {
ends = GetInputVarNode("EndsTensor", node); ends = GetInputVarNode("EndsTensor", node);
} else { } else {
auto ends_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("ends")); auto ends_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("ends"));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册