diff --git a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc index e259d6d417a5c5ee9858e4eb50458f9acd91e50f..0de89aaad3b0dafbff6e7004de734912adfeffeb 100644 --- a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc +++ b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc @@ -545,6 +545,15 @@ void ReplaceSubGraphWithCinnOpNode( RemoveSubGraphFromGraph(cluster, cluster_internals, graph); } +static bool IsInplaceOp(const OpDesc& op_desc) { + auto inputs = op_desc.InputArgumentNames(); + std::unordered_set input_set(inputs.begin(), inputs.end()); + for (auto& name : op_desc.OutputArgumentNames()) { + if (input_set.count(name) > 0) return true; + } + return false; +} + // Search all subgraphs which all op node supported by CINN, // Here we using SubgraphDetector to detecte the subgraph that // all of op node supported by CINN. We using OpMapperRegistry @@ -565,9 +574,10 @@ void SearchAllSubgraphs(Graph* graph) { if (deny_ops.size()) { return registered && !deny_ops.count(node->Name()); } + // if the user doesn't set FLAGS_allow_cinn_ops and FLAGS_deny_cinn_ops, // return true only when it is registered in CINN - return registered; + return registered && (node->IsOp() && !IsInplaceOp(*node->Op())); }; VLOG(4) << "The allowed Cinn Ops: " << FLAGS_allow_cinn_ops; VLOG(4) << "The denied Cinn Ops: " << FLAGS_deny_cinn_ops; diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc index 51dca93c7c7f0c37bea5aa5bd6458f9953814f69..549c854961764f4303dcd1adb5bac1c2dd58f892 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc @@ -61,8 +61,8 @@ using ::cinn::hlir::framework::BuildScope; using ::cinn::hlir::framework::GraphCompiler; CinnCompiler* CinnCompiler::GetInstance() { - static CinnCompiler instance; - return &instance; + static CinnCompiler* instance = new CinnCompiler(); + return instance; } const CinnCompiledObject& CinnCompiler::Compile( diff --git a/paddle/fluid/operators/cinn/cinn_launch_op.h b/paddle/fluid/operators/cinn/cinn_launch_op.h index 024bf2bceb3d0167ca98c7fe219b33e5fe2d96ad..6001a4f5c070997dbcf747d41f5ac6252b907be3 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_op.h +++ b/paddle/fluid/operators/cinn/cinn_launch_op.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -101,8 +102,21 @@ class CinnLaunchOpKernel : public framework::OpKernel { // Step 2. Get compilation result of the graph auto target = details::PlaceToCinnTarget(place); + using ClockType = std::chrono::steady_clock; + std::chrono::time_point start_t, end_t; + if (VLOG_IS_ON(1)) { + VLOG(1) << "Starts to compile at thread " << std::this_thread::get_id(); + start_t = ClockType::now(); + } const auto& cinn_compiled_object = CinnCompiler::GetInstance()->Compile( compilation_key, inputs_name2tensor, target, stream); + if (VLOG_IS_ON(1)) { + end_t = ClockType::now(); + auto time_sec = std::chrono::duration_cast( + end_t - start_t); + VLOG(1) << "Ends to compile at thread " << std::this_thread::get_id() + << " , time cost : " << time_sec.count() << " ms"; + } details::DebugCinnCompiledResult(cinn_compiled_object); auto* launch_context = cinn_compiled_object.launch_context.get(); diff --git a/python/paddle/distributed/passes/cpp_pass.py b/python/paddle/distributed/passes/cpp_pass.py index 4a4e5ecbbb495f7893509cc69cd29e08e710acee..72525255b7eaaf678ae035e27fc7f2dcbc69f26d 100644 --- a/python/paddle/distributed/passes/cpp_pass.py +++ b/python/paddle/distributed/passes/cpp_pass.py @@ -13,6 +13,7 @@ # limitations under the License. from .pass_base import PassType, CPPPassWrapper, register_pass +from paddle.fluid.framework import core, _apply_pass as _apply_cpp_pass @register_pass("fuse_elewise_add_act") @@ -93,3 +94,35 @@ class InplaceAddtoOpPass(CPPPassWrapper): def _type(self): return PassType.CALC_OPT + + +@register_pass("build_cinn") +class BuildCINNPass(CPPPassWrapper): + def __init__(self): + super(BuildCINNPass, self).__init__() + self.set_attr("allow_ops", []) + self.set_attr("deny_ops", []) + + @property + def cpp_name(self): + return "build_cinn_pass" + + def _type(self): + return PassType.CALC_OPT + + def _apply_single_impl(self, main_program, startup_program, context): + allow_ops = ";".join(self.get_attr("allow_ops")) + deny_ops = ";".join(self.get_attr("deny_ops")) + + assert 'FLAGS_allow_cinn_ops' in core.globals( + ), "PaddlePaddle is not compiled with CINN support" + old_allow_ops = core.globals()['FLAGS_allow_cinn_ops'] + old_deny_ops = core.globals()['FLAGS_deny_cinn_ops'] + try: + core.globals()['FLAGS_allow_cinn_ops'] = allow_ops + core.globals()['FLAGS_deny_cinn_ops'] = deny_ops + _apply_cpp_pass(main_program, startup_program, self.cpp_name, {}, + self.cpp_attr_types) + finally: + core.globals()['FLAGS_allow_cinn_ops'] = old_allow_ops + core.globals()['FLAGS_deny_cinn_ops'] = old_deny_ops diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/dist_pass_test_base.py b/python/paddle/fluid/tests/unittests/distributed_passes/dist_pass_test_base.py index f0ed2cdc049500e53bae28b5d0503e50ac89c60d..786ee06487fbca09250da8c9742c10b9b8b34410 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/dist_pass_test_base.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/dist_pass_test_base.py @@ -39,7 +39,7 @@ def prepare_python_path_and_return_module(path): paths.append(dirname) python_path = ":".join(paths) else: - python_path = path + python_path = dirname os.environ[env_name] = python_path print('GLOG_v=', os.environ.get('GLOG_v', None), flush=1) return filename[:-len(py_suffix)] @@ -85,9 +85,9 @@ class DistPassTestBase(unittest.TestCase): raise NotImplementedError() def check_main(self, model=None, gpus=None, **kwargs): - no_pass_rets = self._distributed_launch( - model=model, apply_pass=True, gpus=gpus, **kwargs) pass_rets = self._distributed_launch( + model=model, apply_pass=True, gpus=gpus, **kwargs) + no_pass_rets = self._distributed_launch( model=model, apply_pass=False, gpus=gpus, **kwargs) self.check_results(no_pass_rets, pass_rets) diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/model_zoo.py b/python/paddle/fluid/tests/unittests/distributed_passes/model_zoo.py index 0b522b79c4e9300c4b8b4d46432d71a28762b4f3..7eebee47e59a8c97f5bf03a4ce8ec4cec304d97e 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/model_zoo.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/model_zoo.py @@ -59,3 +59,40 @@ def resnet_model(place, batch_size, image_shape=[3, 224, 224], main_program = paddle.static.default_main_program() startup_program = paddle.static.default_startup_program() return main_program, startup_program, [image, label], [loss], reader + + +def simple_net(place, batch_size, image_shape=[784], num_classes=10): + image = paddle.static.data( + shape=[batch_size] + image_shape, dtype='float32', name='image') + label = paddle.static.data( + shape=[batch_size, 1], dtype='int64', name='label') + linears = [nn.Linear(784, 784) for _ in range(3)] + hidden = image + for linear in linears: + hidden = linear(hidden) + hidden = nn.ReLU()(hidden) + loss_fn = nn.loss.CrossEntropyLoss() + loss = loss_fn(hidden, label) + optimizer = paddle.optimizer.Adam(learning_rate=1e-3) + + dist_strategy = fleet.DistributedStrategy() + dist_strategy.fuse_all_reduce_ops = False + dist_strategy.without_graph_optimization = True + fleet.init(is_collective=True, strategy=dist_strategy) + optimizer = fleet.distributed_optimizer(optimizer) + optimizer.minimize(loss) + + rank = paddle.distributed.get_rank() + + def reader(): + seed = get_seed_from_env() + np.random.seed(seed + rank) + for _ in range(10): + image_np = np.random.random(size=image.shape).astype('float32') + label_np = np.random.randint( + low=0, high=num_classes, size=label.shape).astype('int64') + yield image_np, label_np + + main_program = paddle.static.default_main_program() + startup_program = paddle.static.default_startup_program() + return main_program, startup_program, [image, label], [loss], reader diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_build_cinn_pass_resnet.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_build_cinn_pass_resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..8430eb615a20cd63d1a2ef8ffb34da317a97a3f4 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_build_cinn_pass_resnet.py @@ -0,0 +1,41 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddle.distributed.passes import new_pass, PassManager +import unittest +from dist_pass_test_base import DistPassTestBase +from model_zoo import resnet_model + + +class TestBuildCINNPass(DistPassTestBase): + def init(self): + self.atol = 0.5 + self.rtol = 0.0 + + def apply_passes(self, main_prog, startup_prog): + pass_manager = PassManager([ + new_pass("build_cinn"), + new_pass("fuse_elewise_add_act"), + ]) + pass_manager.apply([main_prog], [startup_prog]) + print(pass_manager.names) + + def test_bs_32(self): + if paddle.is_compiled_with_cinn(): + self.check_main(resnet_model, batch_size=32) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_build_cinn_pass_simple_net.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_build_cinn_pass_simple_net.py new file mode 100644 index 0000000000000000000000000000000000000000..e030420d3242021e009a984f06337c1c33564041 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_build_cinn_pass_simple_net.py @@ -0,0 +1,42 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddle.distributed.passes import new_pass, PassManager +import unittest +from dist_pass_test_base import DistPassTestBase +from model_zoo import simple_net + + +class TestBuildCINNPass(DistPassTestBase): + def init(self): + self.atol = 0.0 + self.rtol = 0.0 + + def apply_passes(self, main_prog, startup_prog): + pass_manager = PassManager([ + new_pass("build_cinn"), + new_pass("fuse_elewise_add_act"), + ]) + pass_manager.apply([main_prog], [startup_prog]) + op_types = [op.type for op in main_prog.global_block().ops] + self.assertTrue('cinn_launch' in op_types) + + def test_bs_32(self): + if paddle.is_compiled_with_cinn(): + self.check_main(simple_net, batch_size=32) + + +if __name__ == "__main__": + unittest.main()