From 42d58ddd4992d2296aca9f54ff067602cc4d8734 Mon Sep 17 00:00:00 2001 From: Allen Guo Date: Wed, 27 Jul 2022 09:56:29 +0800 Subject: [PATCH] [IPU] small bug fix (#44473) * sync misc changes * add authors Co-authored-by: Zhaorui Chen * up x * Revert "up x" This reverts commit f3fde458c6cc48613269a643cfe2acf689caccd3. * add guarg for ipu Co-authored-by: Zhaorui Chen --- .../fluid/platform/device/ipu/ipu_compiler.cc | 2 +- .../fluid/platform/device/ipu/ipu_strategy.h | 8 +-- .../ipu/popart_canonicalization/loss_ops.cc | 6 +- .../ipu/popart_canonicalization/other_ops.cc | 33 +++++++++ .../device/ipu/supported_ops_autogen.h | 17 ++++- .../device/ipu/supported_ops_custom.h | 1 - paddle/fluid/pybind/tensor.cc | 5 ++ python/paddle/fluid/executor.py | 6 +- .../tests/unittests/ipu/test_print_op_ipu.py | 71 ++++++++++++++++++- 9 files changed, 137 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/platform/device/ipu/ipu_compiler.cc b/paddle/fluid/platform/device/ipu/ipu_compiler.cc index a113bbbe265..2bebdd51280 100644 --- a/paddle/fluid/platform/device/ipu/ipu_compiler.cc +++ b/paddle/fluid/platform/device/ipu/ipu_compiler.cc @@ -300,7 +300,7 @@ void Compiler::RegisterOpFunc() { #define INT32 std::int32_t #define BOOL bool #define STRING std::string -#define STRING_VEC std::vector +#define STRING_VEC std::vector #define NONE #define ARG(Type, Name) , GetAttrAllowNull(#Name, op_desc) diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.h b/paddle/fluid/platform/device/ipu/ipu_strategy.h index 997bc310df3..34beb44686b 100644 --- a/paddle/fluid/platform/device/ipu/ipu_strategy.h +++ b/paddle/fluid/platform/device/ipu/ipu_strategy.h @@ -159,8 +159,8 @@ class IpuStrategy { const std::string &type_str) { auto it = options.find(key); PADDLE_ENFORCE_NE( - it, - options.end(), + it == options.end(), + true, platform::errors::InvalidArgument("Cannot find option: %s, type: %s " "when setting IpuStrategy options", key, @@ -174,8 +174,8 @@ class IpuStrategy { std::map> &options) { // NOLINT auto it = options.find(key); PADDLE_ENFORCE_NE( - it, - options.end(), + it == options.end(), + true, platform::errors::InvalidArgument( "Cannot find option name: %s when trying to get IpuStrategy option", key)); diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/loss_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/loss_ops.cc index aa4c3638868..035b15b2770 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/loss_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/loss_ops.cc @@ -285,7 +285,7 @@ Node *binary_cross_entropy_handler(Graph *graph, Node *node) { reduction = RemoveTailReduction(graph, node, "Out"); } bool append_identity_loss = - is_dynamic_graph() && IsLastVarNode(GetOutputVarNode("Loss", node)); + is_dynamic_graph() && IsLastVarNode(GetOutputVarNode("Out", node)); auto x = GetInputVarNode("X", node); auto label = GetInputVarNode("Label", node); @@ -478,12 +478,12 @@ Node *warpctc_handler(Graph *graph, Node *node) { auto loss = CreateBaseOp( graph, node, - "popart_ctcloss", + "popart_ctcloss_v2", {log_softmax_logits, cast_label, cast_logits_length, cast_label_length}, append_identity_loss ? std::vector{} : std::vector{GetOutputVarNode("Loss", node)}, - {{"blank", blank}, + {{"blank", int64_t{blank}}, {"reduction", reduction}, {"outDataType", std::string("UNDEFINED")}}); if (append_identity_loss) { diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/other_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/other_ops.cc index 1cbe9eb4663..410bfafaca1 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/other_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/other_ops.cc @@ -32,6 +32,39 @@ Node *custom_op_handler(Graph *graph, Node *node) { Node *print_handler(Graph *graph, Node *node) { auto *op = node->Op(); + auto print_output = node->outputs.front(); + auto print_input = node->inputs.front(); + if (print_output->outputs.size() == 0) { + LOG(WARNING) << "The output of Print OP is not used on IPU graph. Setting " + "the input of Print as Output."; + for (auto &subnode : print_input->outputs) { + if (subnode == node) continue; + ConnectNodes(print_output, subnode); + DisConnectNodes(print_input, subnode); + + // replace node_name in op_desc + std::vector new_inputs; + auto subnode_inmap = subnode->Op()->Inputs(); + for (auto &in_map : subnode_inmap) { + if (std::find(in_map.second.begin(), + in_map.second.end(), + print_input->Name()) != in_map.second.end()) { + std::transform(in_map.second.cbegin(), + in_map.second.cend(), + std::back_inserter(new_inputs), + [&](const std::string &node_name) { + if (node_name == print_input->Name()) { + return print_output->Name(); + } else { + return node_name; + } + }); + subnode->Op()->SetInput(in_map.first, new_inputs); + subnode->Op()->Flush(); + } + } + } + } auto print_phase = PADDLE_GET_CONST(std::string, op->GetAttr("print_phase")); int64_t print_gradient = 0; if (print_phase != "forward") { diff --git a/paddle/fluid/platform/device/ipu/supported_ops_autogen.h b/paddle/fluid/platform/device/ipu/supported_ops_autogen.h index 14dcf65afee..017ed648930 100644 --- a/paddle/fluid/platform/device/ipu/supported_ops_autogen.h +++ b/paddle/fluid/platform/device/ipu/supported_ops_autogen.h @@ -17,11 +17,13 @@ #pragma once // Ops from AiGraphcoreOpset1 +OP_DECL(popart_copyvarupdate_v2, aiGraphcoreOpset.copyvarupdate, NONE) // NOLINT OP_DECL(popart_groupnormalization_v2, aiGraphcoreOpset.groupnormalization, ARG(INT,num_groups) ARG(FLOAT,epsilon) ) // NOLINT OP_DECL(popart_subsample_v2, aiGraphcoreOpset.subsample, ARG(INT_VEC,strides) ) // NOLINT OP_DECL(popart_nop_v2, aiGraphcoreOpset.nop, NONE) // NOLINT OP_DECL(popart_scale_v2, aiGraphcoreOpset.scale, ARG(FLOAT,scale) ) // NOLINT OP_DECL(popart_scaledadd_v2, aiGraphcoreOpset.scaledadd, ARG(FLOAT,scale0) ARG(FLOAT,scale1) ) // NOLINT +OP_DECL(popart_lstm_v2, aiGraphcoreOpset.lstm, ARG(INT,outputFullSequence) ) // NOLINT OP_DECL(popart_gelu_v2, aiGraphcoreOpset.gelu, NONE) // NOLINT OP_DECL(popart_detach_v2, aiGraphcoreOpset.detach, NONE) // NOLINT OP_DECL(popart_depthtospace_v2, aiGraphcoreOpset.depthtospace, ARG(INT,blocksize) ARG(STRING,mode) ) // NOLINT @@ -32,8 +34,13 @@ OP_DECL(popart_dynamiczero_v2, aiGraphcoreOpset.dynamiczero, ARG(INT_VEC,axes) A OP_DECL(popart_dynamicadd_v2, aiGraphcoreOpset.dynamicadd, ARG(INT_VEC,axes) ARG(INT_VEC,sizes) ) // NOLINT OP_DECL(popart_sequenceslice_v2, aiGraphcoreOpset.sequenceslice, ARG(INT,zeroUnused) ) // NOLINT OP_DECL(popart_replicatedallreduce_v2, aiGraphcoreOpset.replicatedallreduce, OPT_ARG(INT_VEC,commGroup) ) // NOLINT +OP_DECL(popart_l1loss_v2, aiGraphcoreOpset.l1loss, ARG(FLOAT,lambda) SIG_ARG(INT32,popart::ReductionType,reduction) ) // NOLINT +OP_DECL(popart_nllloss_v2, aiGraphcoreOpset.nllloss, SIG_ARG(INT32,popart::ReductionType,reduction) OPT_ARG(INT32,ignoreIndex) ARG(BOOL,inputIsLogProbability) ) // NOLINT +OP_DECL(popart_identityloss_v2, aiGraphcoreOpset.identityloss, SIG_ARG(INT32,popart::ReductionType,reduction) ) // NOLINT +OP_DECL(popart_tensorremap_v2, aiGraphcoreOpset.tensorremap, ARG(INT,remap_type) ) // NOLINT +OP_DECL(popart_ctcloss_v2, aiGraphcoreOpset.ctcloss, SIG_ARG(INT32,popart::ReductionType,reduction) ARG(INT,blank) ARG(STRING,outDataType) ) // NOLINT +OP_DECL(popart__ctcloss_v2, aiGraphcoreOpset._ctcloss, SIG_ARG(INT32,popart::ReductionType,reduction) ARG(INT,blank) ARG(STRING,outDataType) ) // NOLINT OP_DECL(popart_ctcbeamsearchdecoder_v2, aiGraphcoreOpset.ctcbeamsearchdecoder, ARG(INT,blank) ARG(INT,beamWidth) ARG(INT,topPaths) ) // NOLINT -OP_DECL(popart_ctcloss, aiGraphcoreOpset.ctcloss, SIG_ARG(INT32,popart::ReductionType,reduction) ARG(INT32,blank) ARG(STRING,outDataType) ) // NOLINT OP_DECL(popart_shapeddropout_v2, aiGraphcoreOpset.shapeddropout, ARG(INT_VEC,shape) ARG(FLOAT,ratio) ) // NOLINT OP_DECL(popart_atan2_v2, aiGraphcoreOpset.atan2, NONE) // NOLINT OP_DECL(popart_expm1_v2, aiGraphcoreOpset.expm1, NONE) // NOLINT @@ -47,6 +54,9 @@ OP_DECL(popart_bitwiseor_v2, aiGraphcoreOpset.bitwiseor, NONE) // NOLINT OP_DECL(popart_bitwisexor_v2, aiGraphcoreOpset.bitwisexor, NONE) // NOLINT OP_DECL(popart_bitwisexnor_v2, aiGraphcoreOpset.bitwisexnor, NONE) // NOLINT OP_DECL(popart_reducemedian_v2, aiGraphcoreOpset.reducemedian, OPT_ARG(INT_VEC,axes) ARG(INT,keepdims) ) // NOLINT +OP_DECL(popart_scatterreduce_v2, aiGraphcoreOpset.scatterreduce, ARG(INT,axis_size) ARG(INT,axis) SIG_ARG(INT32,popart::ScatterReduction,reduction) ) // NOLINT +OP_DECL(popart_swish_v2, aiGraphcoreOpset.swish, NONE) // NOLINT +OP_DECL(popart_incrementmod_v2, aiGraphcoreOpset.incrementmod, ARG(FLOAT,increment) ARG(FLOAT,modulus) ) // NOLINT // Ops from AiOnnxOpset11 OP_DECL(popart_argmax, aiOnnxOpset.argmax, ARG(INT,axis) ARG(INT,keepdims) ) // NOLINT OP_DECL(popart_argmin, aiOnnxOpset.argmin, ARG(INT,axis) ARG(INT,keepdims) ) // NOLINT @@ -117,6 +127,7 @@ OP_DECL(popart_qlinearmatmul, aiOnnxOpset.qlinearmatmul, NONE) // NOLINT OP_DECL(popart_quantizelinear, aiOnnxOpset.quantizelinear, NONE) // NOLINT OP_DECL(popart_reversesequence, aiOnnxOpset.reversesequence, ARG(INT,batch_axis) ARG(INT,time_axis) ) // NOLINT OP_DECL(popart_roialign, aiOnnxOpset.roialign, ARG(STRING,mode) ARG(INT,output_height) ARG(INT,output_width) ARG(INT,sampling_ratio) ARG(FLOAT,spatial_scale) ) // NOLINT +OP_DECL(popart_stringnormalizer, aiOnnxOpset.stringnormalizer, ARG(STRING,case_change_action) ARG(INT,is_case_sensitive) OPT_ARG(STRING,locale) ARG(STRING_VEC,stopwords) ) // NOLINT OP_DECL(popart_thresholdedrelu, aiOnnxOpset.thresholdedrelu, ARG(FLOAT,alpha) ) // NOLINT OP_DECL(popart_upsample, aiOnnxOpset.upsample, ARG(STRING,mode) ) // NOLINT // Ops from AiOnnxOpset9 @@ -138,6 +149,7 @@ OP_DECL(popart_prelu, aiOnnxOpset.prelu, NONE) // NOLINT OP_DECL(popart_shrink, aiOnnxOpset.shrink, ARG(FLOAT,bias) ARG(FLOAT,lambd) ) // NOLINT OP_DECL(popart_sign, aiOnnxOpset.sign, NONE) // NOLINT OP_DECL(popart_sinh, aiOnnxOpset.sinh, NONE) // NOLINT +OP_DECL(popart_tfidfvectorizer, aiOnnxOpset.tfidfvectorizer, ARG(INT,max_gram_length) ARG(INT,max_skip_count) ARG(INT,min_gram_length) ARG(STRING,mode) ARG(INT_VEC,ngram_counts) ARG(INT_VEC,ngram_indexes) ARG(INT_VEC,pool_int64s) ARG(STRING_VEC,pool_strings) ARG(FLOAT_VEC,weights) ) // NOLINT OP_DECL(popart_where, aiOnnxOpset.where, NONE) // NOLINT // Ops from AiOnnxOpset8 OP_DECL(popart_expand, aiOnnxOpset.expand, NONE) // NOLINT @@ -153,10 +165,13 @@ OP_DECL(popart_asin, aiOnnxOpset.asin, NONE) // NOLINT OP_DECL(popart_atan, aiOnnxOpset.atan, NONE) // NOLINT OP_DECL(popart_cos, aiOnnxOpset.cos, NONE) // NOLINT OP_DECL(popart_div, aiOnnxOpset.div, NONE) // NOLINT +OP_DECL(popart_gru, aiOnnxOpset.gru, ARG(INT,num_outputs) ARG(FLOAT_VEC,activation_alpha) ARG(FLOAT_VEC,activation_beta) ARG(STRING_VEC,activations) OPT_ARG(FLOAT,clip) ARG(STRING,direction) OPT_ARG(INT,hidden_size) ARG(INT,linear_before_reset) ) // NOLINT +OP_DECL(popart_lstm, aiOnnxOpset.lstm, ARG(INT,num_outputs) ARG(FLOAT_VEC,activation_alpha) ARG(FLOAT_VEC,activation_beta) ARG(STRING_VEC,activations) OPT_ARG(FLOAT,clip) ARG(STRING,direction) OPT_ARG(INT,hidden_size) ARG(INT,input_forget) ) // NOLINT OP_DECL(popart_mul, aiOnnxOpset.mul, NONE) // NOLINT OP_DECL(popart_multinomial, aiOnnxOpset.multinomial, ARG(INT,dtype) ARG(INT,sample_size) OPT_ARG(FLOAT,seed) ) // NOLINT OP_DECL(popart_logical_or, aiOnnxOpset.logical_or, NONE) // NOLINT OP_DECL(popart_pow, aiOnnxOpset.pow, NONE) // NOLINT +OP_DECL(popart_rnn, aiOnnxOpset.rnn, ARG(INT,num_outputs) ARG(FLOAT_VEC,activation_alpha) ARG(FLOAT_VEC,activation_beta) ARG(STRING_VEC,activations) OPT_ARG(FLOAT,clip) ARG(STRING,direction) OPT_ARG(INT,hidden_size) ) // NOLINT OP_DECL(popart_sin, aiOnnxOpset.sin, NONE) // NOLINT OP_DECL(popart_sub, aiOnnxOpset.sub, NONE) // NOLINT OP_DECL(popart_tan, aiOnnxOpset.tan, NONE) // NOLINT diff --git a/paddle/fluid/platform/device/ipu/supported_ops_custom.h b/paddle/fluid/platform/device/ipu/supported_ops_custom.h index 04c57cc0104..9a69567aa6d 100644 --- a/paddle/fluid/platform/device/ipu/supported_ops_custom.h +++ b/paddle/fluid/platform/device/ipu/supported_ops_custom.h @@ -16,7 +16,6 @@ #pragma once -OP_DECL(popart_nllloss_v2, aiGraphcoreOpset.nllloss, SIG_ARG(INT32,popart::ReductionType,reduction) OPT_ARG(INT32,ignoreIndex) ARG(BOOL,inputIsLogProbability) ) // NOLINT OP_DECL(popart_identity_loss, aiGraphcoreOpset.identityloss, SIG_ARG(INT32,popart::ReductionType,reduction) ) // NOLINT // clang-format on diff --git a/paddle/fluid/pybind/tensor.cc b/paddle/fluid/pybind/tensor.cc index 660d9badc30..8a6a6782f44 100644 --- a/paddle/fluid/pybind/tensor.cc +++ b/paddle/fluid/pybind/tensor.cc @@ -378,6 +378,11 @@ void BindTensor(pybind11::module &m) { // NOLINT py::arg("tensor"), py::arg("place"), py::arg("batch_size") = -1) + .def("_copy_from", + &TensorCopyFrom, + py::arg("tensor"), + py::arg("place"), + py::arg("batch_size") = -1) .def("_copy_from", &TensorCopyFrom, py::arg("tensor"), diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index 9961c1a106b..e9e2252f906 100755 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -1486,7 +1486,11 @@ class Executor(object): # NOTE(dev): `set` always call TensorCopySync that is a # blocking behavior. So we use `_copy_from` to replace it. cpu_tensor = _as_lodtensor(data, core.CPUPlace()) - tensor._copy_from(cpu_tensor, self.place) + # for ipu, tensor is allocated on cpu + if core.is_compiled_with_ipu(): + tensor._copy_from(cpu_tensor, tensor._place()) + else: + tensor._copy_from(cpu_tensor, self.place) return new_exe.run(scope, list(feed.keys()), fetch_list, return_numpy) diff --git a/python/paddle/fluid/tests/unittests/ipu/test_print_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_print_op_ipu.py index a2da444519d..33f4a331611 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_print_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_print_op_ipu.py @@ -17,7 +17,8 @@ import unittest import numpy as np import paddle import paddle.static -from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest +from paddle.jit import to_static +from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest, IPUD2STest class TestBase(IPUOpTest): @@ -106,5 +107,73 @@ class TestCase2(TestBase): } +class SimpleLayer(paddle.nn.Layer): + + def __init__(self): + super(SimpleLayer, self).__init__() + self.conv = paddle.nn.Conv2D(in_channels=3, + out_channels=1, + kernel_size=2, + stride=1) + + @to_static() + def forward(self, x, target=None): + x = self.conv(x) + print(x) + x = paddle.fluid.layers.flatten(x, axis=1) + if target is not None: + x = paddle.fluid.layers.softmax(x) + loss = paddle.fluid.layers.cross_entropy(x, target) + loss = paddle.incubate.identity_loss(loss, 1) + return x, loss + return x + + +class TestD2S(IPUD2STest): + + def setUp(self): + self.set_data_feed() + + def set_data_feed(self): + self.data = paddle.uniform((8, 3, 10, 10), dtype='float32') + self.label = paddle.randint(0, 10, shape=[8], dtype='int64') + + def _test(self, use_ipu=False): + paddle.seed(self.SEED) + np.random.seed(self.SEED) + model = SimpleLayer() + optim = paddle.optimizer.Adam(learning_rate=0.01, + parameters=model.parameters()) + + if use_ipu: + paddle.set_device('ipu') + ipu_strategy = paddle.static.IpuStrategy() + ipu_strategy.set_graph_config(num_ipus=1, + is_training=True, + micro_batch_size=1, + enable_manual_shard=False) + ipu_strategy.set_optimizer(optim) + + result = [] + for _ in range(2): + # ipu only needs call model() to do forward/backward/grad_update + pred, loss = model(self.data, self.label) + if not use_ipu: + loss.backward() + optim.step() + optim.clear_grad() + result.append(loss) + + if use_ipu: + ipu_strategy.release_patch() + + return np.array(result) + + def test_training(self): + ipu_loss = self._test(True).flatten() + cpu_loss = self._test(False).flatten() + self.assertTrue(np.allclose(ipu_loss, cpu_loss, atol=1e-4)) + + if __name__ == "__main__": unittest.main() -- GitLab