未验证 提交 fc621dfe 编写于 作者: A Allen Guo 提交者: GitHub

support more ops (#41421)

上级 535810ba
......@@ -12,6 +12,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
macro(find_popart_version popart_version_file)
file(READ ${popart_version_file} popart_version_file_content)
string(REGEX MATCH "(POPART_VERSION_STRING)[ \t\r\n](\")([0-9]+\.[0-9]+\.[0-9]+)(\\+)([A-Za-z0-9_]*)(\")" POPART_VERSION ${popart_version_file_content})
string(REPLACE "POPART_VERSION_STRING" "" POPART_VERSION "${POPART_VERSION}")
string(REPLACE "\"" "" POPART_VERSION "${POPART_VERSION}")
string(REPLACE " " "" POPART_VERSION "${POPART_VERSION}")
if(NOT POPART_VERSION)
set(POPART_VERSION "Unknown version")
else()
message(STATUS "Current PopART version is ${POPART_VERSION}")
endif()
endmacro()
if(WITH_IPU)
set(POPLAR_DIR CACHE PATH "Path to a Poplar install")
set(POPART_DIR CACHE PATH "Path to a Popart install")
......@@ -64,6 +77,8 @@ if(WITH_IPU)
message(FATAL_ERROR "You must provide a path to a Popart build using -DPOPART_DIR=/path/to/popart/build")
endif()
find_popart_version("${POPART_DIR}/include/popart/version.hpp")
add_definitions(-DONNX_NAMESPACE=onnx)
add_custom_target(extern_poplar DEPENDS poplar popart-only)
endif()
......@@ -398,7 +398,8 @@ function(version version_file)
"WITH_GPU: ${WITH_GPU}\n"
"WITH_ROCM: ${WITH_ROCM}\n"
"WITH_ASCEND_CL: ${WITH_ASCEND_CL}\n"
"WITH_ASCEND_CXX11: ${WITH_ASCEND_CXX11}\n")
"WITH_ASCEND_CXX11: ${WITH_ASCEND_CXX11}\n"
"WITH_IPU: ${WITH_IPU}\n")
if(WITH_GPU)
file(APPEND ${version_file}
"CUDA version: ${CUDA_VERSION}\n"
......@@ -414,6 +415,10 @@ function(version version_file)
"Ascend Toolkit version: ${ASCEND_TOOLKIT_VERSION}\n"
"Ascend Driver version: ${ASCEND_DRIVER_VERSION}\n")
endif()
if(WITH_IPU)
file(APPEND ${version_file}
"PopART version: ${POPART_VERSION}\n")
endif()
file(APPEND ${version_file} "CXX compiler version: ${CMAKE_CXX_COMPILER_VERSION}\n")
if(TENSORRT_FOUND)
file(APPEND ${version_file}
......
......@@ -474,6 +474,7 @@ void Compiler::LowerOptimizer(const Scope* scope) {
auto adam_mode =
AdamModeFromStr(adam_mode_, ipu_strategy_->use_no_bias_optimizer);
auto weight_decay_mode_ = ipu_strategy_->weight_decay_mode;
auto scaled_optimizer_state_ = ipu_strategy_->scaled_optimizer_state;
if (weight_decay_mode_.empty()) {
weight_decay_mode_ = BOOST_GET_CONST(
std::string, op_desc->GetAttr("weight_decay_mode"));
......@@ -492,7 +493,7 @@ void Compiler::LowerOptimizer(const Scope* scope) {
auto optimizer_instance = std::make_unique<popart::Adam>(
optimizer_value, adam_mode, weight_decay_mode,
popart::DataType::UNDEFINED, accl1_type, accl2_type,
clip_norm_settings);
clip_norm_settings, scaled_optimizer_state_);
for (int i = 0; i < weight_decay_vars.size(); i++) {
optimizer_instance->insertSpecific(
weight_decay_vars[i],
......@@ -511,11 +512,10 @@ void Compiler::LowerOptimizer(const Scope* scope) {
popart::OptimizerValue(loss_scaling, true),
popart::OptimizerValue(mwn, true), adam_mode, weight_decay_mode,
popart::DataType::UNDEFINED, accl1_type, accl2_type,
clip_norm_settings);
clip_norm_settings, scaled_optimizer_state_);
}
};
if (adam_mode == popart::AdamMode::Lamb ||
adam_mode == popart::AdamMode::LambNoBias) {
if (adam_mode == popart::AdamMode::Lamb) {
const std::map<std::string, std::pair<float, bool>> optimizer_value =
{{"defaultLearningRate", {0.0, false}},
{"defaultBeta1", {beta1, false}},
......@@ -526,7 +526,26 @@ void Compiler::LowerOptimizer(const Scope* scope) {
auto eval_optimizer = std::make_unique<popart::Adam>(
optimizer_value, adam_mode, weight_decay_mode,
popart::DataType::UNDEFINED, popart::DataType::FLOAT,
popart::DataType::FLOAT, clip_norm_settings);
popart::DataType::FLOAT, clip_norm_settings,
scaled_optimizer_state_);
for (int i = 0; i < weight_decay_vars.size(); i++) {
eval_optimizer->insertSpecific(weight_decay_vars[i],
{{"weightDecay", {0.0, false}}});
}
resources_->eval_optimizer = std::move(eval_optimizer);
} else if (adam_mode == popart::AdamMode::LambNoBias) {
const std::map<std::string, std::pair<float, bool>> optimizer_value =
{{"defaultLearningRate", {0.0, false}},
{"defaultBeta1", {1.0, false}},
{"defaultBeta2", {1.0, false}},
{"defaultEps", {eps, true}},
{"lossScaling", {loss_scaling, true}},
{"defaultMaxWeightNorm", {mwn, true}}};
auto eval_optimizer = std::make_unique<popart::Adam>(
optimizer_value, adam_mode, weight_decay_mode,
popart::DataType::UNDEFINED, popart::DataType::FLOAT,
popart::DataType::FLOAT, clip_norm_settings,
scaled_optimizer_state_);
for (int i = 0; i < weight_decay_vars.size(); i++) {
eval_optimizer->insertSpecific(weight_decay_vars[i],
{{"weightDecay", {0.0, false}}});
......@@ -542,7 +561,8 @@ void Compiler::LowerOptimizer(const Scope* scope) {
popart::OptimizerValue(loss_scaling, true),
popart::OptimizerValue(mwn, true), adam_mode, weight_decay_mode,
popart::DataType::UNDEFINED, popart::DataType::FLOAT,
popart::DataType::FLOAT, clip_norm_settings);
popart::DataType::FLOAT, clip_norm_settings,
scaled_optimizer_state_);
}
} else if (type == "adaptive") {
auto alpha = BOOST_GET_CONST(float, op_desc->GetAttr("alpha"));
......
......@@ -67,6 +67,7 @@ IpuStrategy::IpuStrategy() {
ADD_BOOL_OPTION(transfer_cast_op);
ADD_BOOL_OPTION(use_no_bias_optimizer);
ADD_BOOL_OPTION(enable_distribution);
ADD_BOOL_OPTION(scaled_optimizer_state);
ADD_UINT64_OPTION(num_ipus);
ADD_UINT64_OPTION(batches_per_step);
ADD_UINT64_OPTION(micro_batch_size);
......
......@@ -37,13 +37,13 @@ class IpuStrategy {
// training flag, true for training
bool is_training = true;
// average sharding, debugging used
// Average sharding, debugging used
bool need_avg_shard = false;
// flag for fp16, true for pure fp16
// Flag for fp16, true for pure fp16
bool enable_fp16 = false;
// enable transfer cast Op target from fp32 to fp16 in fp16 mode
// Enable transfer cast Op target from fp32 to fp16 in fp16 mode
bool transfer_cast_op = true;
// The mode of Adam/Lamb optimizer
......@@ -51,33 +51,35 @@ class IpuStrategy {
// true: The Adam_No_Bias/Lamb_No_Bias optimizer from PopART
bool use_no_bias_optimizer = false;
// enable distributed computing for POD128 or POD256
// Enable distributed computing for POD128 or POD256
bool enable_distribution = false;
// Enable Scaled optimizer state only for Adam and Lamb
bool scaled_optimizer_state = false;
// Number ipus total needed, local_replica * ipu_per_replica
int num_ipus = 1;
// batches per step
// Batches per step
int batches_per_step = 1;
// micro batch-size
// Micro batch-size
int micro_batch_size = 1;
// random seed
// Random seed
std::uint64_t random_seed = std::numeric_limits<std::uint64_t>::max();
// TODO(alleng) remove this param
// available memory proportion, 0.0f for disable
// Available memory proportion, 0.0f for disable
float available_memory_proportion = 0.0f;
// loss scaling, currently we can't get loss scaling from
// Loss scaling, currently we can't get loss scaling from
// optimizer_extract_pass, so we have to set it here
float loss_scaling = 1.0f;
// defaultMaxWeightNorm for adam optimizer
// DefaultMaxWeightNorm for adam optimizer
float max_weight_norm = 65504.0f;
// file path for dumping compiled model in onnx format
// File path for dumping compiled model in onnx format
std::string onnx_dump_path;
// Data type to use for tensor that stores first-order momentum optimizer
......@@ -106,7 +108,7 @@ class IpuStrategy {
// popart pattern manager
popart::Patterns popart_patterns;
// custom ops
// Custom ops
std::vector<IpuCustomOpIdentifier> custom_ops;
public:
......
......@@ -157,7 +157,6 @@ Node *softmax_handler(Graph *graph, Node *node) {
Node *scale_handler(Graph *graph, Node *node) {
auto *op = node->Op();
auto scale_ = BOOST_GET_CONST(float, op->GetAttr("scale"));
auto bias_ = BOOST_GET_CONST(float, op->GetAttr("bias"));
auto bias_after_scale_ =
BOOST_GET_CONST(bool, op->GetAttr("bias_after_scale"));
......@@ -191,6 +190,7 @@ Node *scale_handler(Graph *graph, Node *node) {
}
}
} else {
auto scale_ = BOOST_GET_CONST(float, op->GetAttr("scale"));
if (is_float_equal(bias_, 0.0) && is_float_equal(scale_, 1.0)) {
return CreateBaseOp(graph, node, "popart_identity",
{GetInputVarNode("X", node)}, node->outputs, {});
......
......@@ -95,6 +95,21 @@ Node *pool2d_handler(Graph *graph, Node *node) {
auto *op = node->Op();
auto pooling_type = BOOST_GET_CONST(std::string, op->GetAttr("pooling_type"));
auto global_pooling = BOOST_GET_CONST(bool, op->GetAttr("global_pooling"));
if (op->HasAttr("adaptive")) {
auto adaptive = BOOST_GET_CONST(bool, op->GetAttr("adaptive"));
if (adaptive) {
auto ksize = BOOST_GET_CONST(std::vector<int>, op->GetAttr("ksize"));
if (ksize[0] != 1 || ksize[1] != 1) {
PADDLE_THROW(platform::errors::InvalidArgument(
"Only support pool_size=1 with adaptive mode."));
}
// adaptive maxpool op is max_pool2d_with_index. Only process avgpool
// here.
return CreateBaseOp(graph, node, "popart_globalaveragepool", node->inputs,
node->outputs);
}
}
if (global_pooling) {
if (pooling_type == "max") {
return CreateBaseOp(graph, node, "popart_globalmaxpool", node->inputs,
......@@ -159,6 +174,17 @@ Node *pool2d_handler(Graph *graph, Node *node) {
}
}
Node *max_pool2d_with_index_handler(Graph *graph, Node *node) {
auto *op = node->Op();
auto ksize = BOOST_GET_CONST(std::vector<int>, op->GetAttr("ksize"));
if (ksize[0] != 1 || ksize[1] != 1) {
PADDLE_THROW(platform::errors::InvalidArgument(
"Only support pool_size=1 with adaptive mode."));
}
return CreateBaseOp(graph, node, "popart_globalmaxpool", node->inputs,
{GetOutputVarNode("Out", node)});
}
Node *group_norm_handler(Graph *graph, Node *node) {
auto *op = node->Op();
auto epsilon_ = BOOST_GET_CONST(float, op->GetAttr("epsilon"));
......@@ -304,6 +330,7 @@ Node *dropout_handler(Graph *graph, Node *node) {
} // namespace paddle
REGISTER_HANDLER(pool2d, pool2d_handler);
REGISTER_HANDLER(max_pool2d_with_index, max_pool2d_with_index_handler);
REGISTER_HANDLER(batch_norm, batch_norm_handler);
REGISTER_HANDLER(group_norm, group_norm_handler);
REGISTER_HANDLER(instance_norm, instance_norm_handler);
......
......@@ -331,7 +331,7 @@ Node *shape_handler(Graph *graph, Node *node) {
Node *slice_handler(Graph *graph, Node *node) {
auto *op = node->Op();
Node *starts = nullptr;
if (!op->Input("StartsTensor").empty()) {
if (!op->HasAttr("starts")) {
starts = GetInputVarNode("StartsTensor", node);
} else {
auto starts_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("starts"));
......@@ -341,7 +341,7 @@ Node *slice_handler(Graph *graph, Node *node) {
starts = starts->outputs[0];
}
Node *ends = nullptr;
if (!op->Input("EndsTensor").empty()) {
if (!op->HasAttr("ends")) {
ends = GetInputVarNode("EndsTensor", node);
} else {
auto ends_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("ends"));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册