From c37af19c963ab1b4c65ac4f7ca83e31f864c76d3 Mon Sep 17 00:00:00 2001 From: Roc <30228238+sljlp@users.noreply.github.com> Date: Fri, 15 Apr 2022 21:03:11 +0800 Subject: [PATCH] Moe ref (#41836) * moe ref * ref commit; test=document_fix * update; test=document_fix * update test=document_fix --- paddle/fluid/operators/assign_pos_op.cu | 11 ++++++++++- paddle/fluid/operators/limit_by_capacity_op.cu | 8 ++++++++ paddle/fluid/operators/number_count_op.cu | 8 ++++++++ paddle/fluid/operators/prune_gate_by_capacity_op.cu | 8 ++++++++ .../incubate/distributed/models/moe/gate/base_gate.py | 7 +++++++ .../distributed/models/moe/gate/gshard_gate.py | 7 +++++++ .../distributed/models/moe/gate/naive_gate.py | 9 ++++++++- .../distributed/models/moe/gate/switch_gate.py | 9 ++++++++- .../incubate/distributed/models/moe/grad_clip.py | 5 +++++ .../incubate/distributed/models/moe/moe_layer.py | 7 +++++++ .../paddle/incubate/distributed/models/moe/utils.py | 10 +++++++++- 11 files changed, 85 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/operators/assign_pos_op.cu b/paddle/fluid/operators/assign_pos_op.cu index 5fa159b94f9..d96d36931b3 100644 --- a/paddle/fluid/operators/assign_pos_op.cu +++ b/paddle/fluid/operators/assign_pos_op.cu @@ -10,7 +10,16 @@ Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and -limitations under the License. */ +limitations under the License. + +The file has been adapted from the two files: + https://github.com/laekov/fastmoe/blob/master/cuda/local_exchange.cu + https://github.com/laekov/fastmoe/blob/master/cuda/local_exchange.cuh + Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +We retain the following license from the original files: + Copyright 2021, Jiaao He + Licensed under the Apache License, Version 2.0 (the "License"). +*/ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/assign_pos_op.h" diff --git a/paddle/fluid/operators/limit_by_capacity_op.cu b/paddle/fluid/operators/limit_by_capacity_op.cu index 253ae8162c9..c77adf2200c 100644 --- a/paddle/fluid/operators/limit_by_capacity_op.cu +++ b/paddle/fluid/operators/limit_by_capacity_op.cu @@ -11,6 +11,14 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +// +// The file has been adapted from the two files: +// https://github.com/laekov/fastmoe/blob/master/cuda/balancing.cu +// https://github.com/laekov/fastmoe/blob/master/cuda/balancing.cuh +// Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +// We retain the following license from the original files: +// Copyright 2021, Jiaao He. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"). #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/limit_by_capacity_op.h" diff --git a/paddle/fluid/operators/number_count_op.cu b/paddle/fluid/operators/number_count_op.cu index 0106c70d8eb..923d89c2485 100644 --- a/paddle/fluid/operators/number_count_op.cu +++ b/paddle/fluid/operators/number_count_op.cu @@ -11,6 +11,14 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +// +// The file has been adapted from the two files: +// https://github.com/laekov/fastmoe/blob/master/cuda/local_exchange.cu +// https://github.com/laekov/fastmoe/blob/master/cuda/local_exchange.cuh +// Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +// We retain the following license from the original files: +// Copyright 2021, Jiaao He. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"). #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/number_count_op.h" diff --git a/paddle/fluid/operators/prune_gate_by_capacity_op.cu b/paddle/fluid/operators/prune_gate_by_capacity_op.cu index 953847512bc..7228bdbf380 100644 --- a/paddle/fluid/operators/prune_gate_by_capacity_op.cu +++ b/paddle/fluid/operators/prune_gate_by_capacity_op.cu @@ -11,6 +11,14 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +// +// The file has been adapted from the two files: +// https://github.com/laekov/fastmoe/blob/master/cuda/balancing.cu +// https://github.com/laekov/fastmoe/blob/master/cuda/balancing.cuh +// Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +// We retain the following license from the original files: +// Copyright 2021, Jiaao He. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"). #include "paddle/fluid/operators/prune_gate_by_capacity_op.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" diff --git a/python/paddle/incubate/distributed/models/moe/gate/base_gate.py b/python/paddle/incubate/distributed/models/moe/gate/base_gate.py index 100d201d4b3..f527e82f043 100644 --- a/python/paddle/incubate/distributed/models/moe/gate/base_gate.py +++ b/python/paddle/incubate/distributed/models/moe/gate/base_gate.py @@ -11,6 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# +# The file has been adapted from the file: +# https://github.com/laekov/fastmoe/blob/master/fmoe/gates/base_gate.py +# Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +# We retain the following license from the original files: +# Copyright 2021, Jiaao He. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"). import paddle.nn as nn diff --git a/python/paddle/incubate/distributed/models/moe/gate/gshard_gate.py b/python/paddle/incubate/distributed/models/moe/gate/gshard_gate.py index 3ab3cf69014..3618ec56e96 100644 --- a/python/paddle/incubate/distributed/models/moe/gate/gshard_gate.py +++ b/python/paddle/incubate/distributed/models/moe/gate/gshard_gate.py @@ -11,6 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# +# The file has been adapted from the file: +# https://github.com/laekov/fastmoe/blob/master/fmoe/gates/gshard_gate.py +# Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +# We retain the following license from the original files: +# Copyright 2021, Jiaao He. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"). import math import paddle diff --git a/python/paddle/incubate/distributed/models/moe/gate/naive_gate.py b/python/paddle/incubate/distributed/models/moe/gate/naive_gate.py index 785d2e971bb..491d1f95e10 100644 --- a/python/paddle/incubate/distributed/models/moe/gate/naive_gate.py +++ b/python/paddle/incubate/distributed/models/moe/gate/naive_gate.py @@ -1,5 +1,5 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,6 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# +# The file has been adapted from the file: +# https://github.com/laekov/fastmoe/blob/master/fmoe/gates/gshard_gate.py +# Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +# We retain the following license from the original files: +# Copyright 2021, Jiaao He. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"). from .base_gate import BaseGate diff --git a/python/paddle/incubate/distributed/models/moe/gate/switch_gate.py b/python/paddle/incubate/distributed/models/moe/gate/switch_gate.py index 54bf3ab148a..776516989e5 100644 --- a/python/paddle/incubate/distributed/models/moe/gate/switch_gate.py +++ b/python/paddle/incubate/distributed/models/moe/gate/switch_gate.py @@ -1,5 +1,5 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,6 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# +# The file has been adapted from the file: +# https://github.com/laekov/fastmoe/blob/master/fmoe/gates/switch_gate.py +# Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +# We retain the following license from the original files: +# Copyright 2021, Jiaao He. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"). import math import paddle diff --git a/python/paddle/incubate/distributed/models/moe/grad_clip.py b/python/paddle/incubate/distributed/models/moe/grad_clip.py index cde5455d271..b620253b9f2 100644 --- a/python/paddle/incubate/distributed/models/moe/grad_clip.py +++ b/python/paddle/incubate/distributed/models/moe/grad_clip.py @@ -55,6 +55,11 @@ class ClipGradForMOEByGlobalNorm(ClipGradBase): ``need_clip`` of ``ClipGradyGlobalNorm`` HAS BEEN DEPRECATED since 2.0. Please use ``need_clip`` in ``ParamAttr`` to speficiy the clip scope. + Reference: + https://github.com/laekov/fastmoe/blob/master/examples/megatron/clip-grad-v2.2.patch + Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 + + Args: clip_norm (float): The maximum norm value. is_expert_param_func (function): a function to decide whether a param should be put into moe_params_grads diff --git a/python/paddle/incubate/distributed/models/moe/moe_layer.py b/python/paddle/incubate/distributed/models/moe/moe_layer.py index 99cc38d04bd..eebb635e3ea 100644 --- a/python/paddle/incubate/distributed/models/moe/moe_layer.py +++ b/python/paddle/incubate/distributed/models/moe/moe_layer.py @@ -11,6 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# +# The file has been adapted from the file: +# https://github.com/laekov/fastmoe/blob/master/fmoe/layers.py +# Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +# We retain the following license from the original files: +# Copyright 2021, Jiaao He. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"). import collections import math diff --git a/python/paddle/incubate/distributed/models/moe/utils.py b/python/paddle/incubate/distributed/models/moe/utils.py index 0e87fe3e313..25c76c97530 100644 --- a/python/paddle/incubate/distributed/models/moe/utils.py +++ b/python/paddle/incubate/distributed/models/moe/utils.py @@ -1,5 +1,5 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,6 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# +# The file has been adapted from the file: +# https://github.com/laekov/fastmoe/blob/master/fmoe/functions.py +# Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +# We retain the following license from the original files: +# Copyright 2021, Jiaao He. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"). + from paddle.distributed.models.moe.utils import _number_count, _limit_by_capacity, _prune_gate_by_capacity, _assign_pos import paddle -- GitLab