diff --git a/paddle/fluid/operators/assign_pos_op.cu b/paddle/fluid/operators/assign_pos_op.cu index 5fa159b94f9834e43db1cb0a419eefd2f60181b0..d96d36931b3230c99ae13b16c53533074ec348e0 100644 --- a/paddle/fluid/operators/assign_pos_op.cu +++ b/paddle/fluid/operators/assign_pos_op.cu @@ -10,7 +10,16 @@ Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and -limitations under the License. */ +limitations under the License. + +The file has been adapted from the two files: + https://github.com/laekov/fastmoe/blob/master/cuda/local_exchange.cu + https://github.com/laekov/fastmoe/blob/master/cuda/local_exchange.cuh + Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +We retain the following license from the original files: + Copyright 2021, Jiaao He + Licensed under the Apache License, Version 2.0 (the "License"). +*/ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/assign_pos_op.h" diff --git a/paddle/fluid/operators/limit_by_capacity_op.cu b/paddle/fluid/operators/limit_by_capacity_op.cu index 253ae8162c9b4e668b5651e7b73b58ade9136b05..c77adf2200cbe7ce51b71ae0b6fc7338d5d7fd8a 100644 --- a/paddle/fluid/operators/limit_by_capacity_op.cu +++ b/paddle/fluid/operators/limit_by_capacity_op.cu @@ -11,6 +11,14 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +// +// The file has been adapted from the two files: +// https://github.com/laekov/fastmoe/blob/master/cuda/balancing.cu +// https://github.com/laekov/fastmoe/blob/master/cuda/balancing.cuh +// Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +// We retain the following license from the original files: +// Copyright 2021, Jiaao He. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"). #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/limit_by_capacity_op.h" diff --git a/paddle/fluid/operators/number_count_op.cu b/paddle/fluid/operators/number_count_op.cu index 0106c70d8eb53888801e942fc6c7c9ca57644062..923d89c24853f7902e55988cf4b656105e8ff826 100644 --- a/paddle/fluid/operators/number_count_op.cu +++ b/paddle/fluid/operators/number_count_op.cu @@ -11,6 +11,14 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +// +// The file has been adapted from the two files: +// https://github.com/laekov/fastmoe/blob/master/cuda/local_exchange.cu +// https://github.com/laekov/fastmoe/blob/master/cuda/local_exchange.cuh +// Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +// We retain the following license from the original files: +// Copyright 2021, Jiaao He. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"). #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/number_count_op.h" diff --git a/paddle/fluid/operators/prune_gate_by_capacity_op.cu b/paddle/fluid/operators/prune_gate_by_capacity_op.cu index 953847512bc1a775e9475d9419b475ebeaf5e569..7228bdbf3805a30869c9dd9e3ca0e38f5349865c 100644 --- a/paddle/fluid/operators/prune_gate_by_capacity_op.cu +++ b/paddle/fluid/operators/prune_gate_by_capacity_op.cu @@ -11,6 +11,14 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +// +// The file has been adapted from the two files: +// https://github.com/laekov/fastmoe/blob/master/cuda/balancing.cu +// https://github.com/laekov/fastmoe/blob/master/cuda/balancing.cuh +// Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +// We retain the following license from the original files: +// Copyright 2021, Jiaao He. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"). #include "paddle/fluid/operators/prune_gate_by_capacity_op.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" diff --git a/python/paddle/incubate/distributed/models/moe/gate/base_gate.py b/python/paddle/incubate/distributed/models/moe/gate/base_gate.py index 100d201d4b3d1bdd9f100f8817f30ea8c707eda0..f527e82f043c78becea6ec41cbad4347e130ad69 100644 --- a/python/paddle/incubate/distributed/models/moe/gate/base_gate.py +++ b/python/paddle/incubate/distributed/models/moe/gate/base_gate.py @@ -11,6 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# +# The file has been adapted from the file: +# https://github.com/laekov/fastmoe/blob/master/fmoe/gates/base_gate.py +# Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +# We retain the following license from the original files: +# Copyright 2021, Jiaao He. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"). import paddle.nn as nn diff --git a/python/paddle/incubate/distributed/models/moe/gate/gshard_gate.py b/python/paddle/incubate/distributed/models/moe/gate/gshard_gate.py index b1c0cd4214dbb1b66cea91224fb5e3eaa094b991..3618ec56e96c914ee941bc8a79fd82bcb838daa6 100644 --- a/python/paddle/incubate/distributed/models/moe/gate/gshard_gate.py +++ b/python/paddle/incubate/distributed/models/moe/gate/gshard_gate.py @@ -11,6 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# +# The file has been adapted from the file: +# https://github.com/laekov/fastmoe/blob/master/fmoe/gates/gshard_gate.py +# Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +# We retain the following license from the original files: +# Copyright 2021, Jiaao He. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"). import math import paddle @@ -62,6 +69,6 @@ class GShardGate(NaiveGate): if self.random_routing: rand_routing_prob = paddle.rand( shape=[gate_score.shape[0]], dtype="float32") - topk_idx = paddle.distributed.utils.random_routing( + topk_idx = paddle.distributed.models.moe.utils._random_routing( topk_idx, topk_val, rand_routing_prob) return topk_val, topk_idx diff --git a/python/paddle/incubate/distributed/models/moe/gate/naive_gate.py b/python/paddle/incubate/distributed/models/moe/gate/naive_gate.py index 785d2e971bb3681a50746920d2e04aefdde97242..c3c68685445c80266b4c19f8fb5bac46ecf8b980 100644 --- a/python/paddle/incubate/distributed/models/moe/gate/naive_gate.py +++ b/python/paddle/incubate/distributed/models/moe/gate/naive_gate.py @@ -1,5 +1,5 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,6 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# +# The file has been adapted from the file: +# https://github.com/laekov/fastmoe/blob/master/fmoe/gates/naive_gate.py +# Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +# We retain the following license from the original files: +# Copyright 2021, Jiaao He. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"). from .base_gate import BaseGate diff --git a/python/paddle/incubate/distributed/models/moe/gate/switch_gate.py b/python/paddle/incubate/distributed/models/moe/gate/switch_gate.py index 54bf3ab148ab2a7b6ed666de1a67a944f6e108f8..776516989e5a12e6aee4a0402b1e07b08dd77308 100644 --- a/python/paddle/incubate/distributed/models/moe/gate/switch_gate.py +++ b/python/paddle/incubate/distributed/models/moe/gate/switch_gate.py @@ -1,5 +1,5 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,6 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# +# The file has been adapted from the file: +# https://github.com/laekov/fastmoe/blob/master/fmoe/gates/switch_gate.py +# Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +# We retain the following license from the original files: +# Copyright 2021, Jiaao He. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"). import math import paddle diff --git a/python/paddle/incubate/distributed/models/moe/grad_clip.py b/python/paddle/incubate/distributed/models/moe/grad_clip.py index cde5455d271683c4a6867e0a4ac4a0472b24b2df..b620253b9f26f4f067c716adf2080f2fc41c4da8 100644 --- a/python/paddle/incubate/distributed/models/moe/grad_clip.py +++ b/python/paddle/incubate/distributed/models/moe/grad_clip.py @@ -55,6 +55,11 @@ class ClipGradForMOEByGlobalNorm(ClipGradBase): ``need_clip`` of ``ClipGradyGlobalNorm`` HAS BEEN DEPRECATED since 2.0. Please use ``need_clip`` in ``ParamAttr`` to speficiy the clip scope. + Reference: + https://github.com/laekov/fastmoe/blob/master/examples/megatron/clip-grad-v2.2.patch + Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 + + Args: clip_norm (float): The maximum norm value. is_expert_param_func (function): a function to decide whether a param should be put into moe_params_grads diff --git a/python/paddle/incubate/distributed/models/moe/moe_layer.py b/python/paddle/incubate/distributed/models/moe/moe_layer.py index 99cc38d04bdda79201c73a6a1fecf453602a2dca..eebb635e3ead76e0ef95e3f6eb558c43b4008c1c 100644 --- a/python/paddle/incubate/distributed/models/moe/moe_layer.py +++ b/python/paddle/incubate/distributed/models/moe/moe_layer.py @@ -11,6 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# +# The file has been adapted from the file: +# https://github.com/laekov/fastmoe/blob/master/fmoe/layers.py +# Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +# We retain the following license from the original files: +# Copyright 2021, Jiaao He. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"). import collections import math diff --git a/python/paddle/incubate/distributed/models/moe/utils.py b/python/paddle/incubate/distributed/models/moe/utils.py index 99e31a16273bf7ef939d724c00d35e7fb647aada..25c76c9753035a8491723c85c55e1d03653d23a7 100644 --- a/python/paddle/incubate/distributed/models/moe/utils.py +++ b/python/paddle/incubate/distributed/models/moe/utils.py @@ -1,5 +1,5 @@ # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,7 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from paddle.distributed.models.moe.utils import * +# +# The file has been adapted from the file: +# https://github.com/laekov/fastmoe/blob/master/fmoe/functions.py +# Git commit hash: 295a615aacce7e54a37e7935274ba15e901c78e4 +# We retain the following license from the original files: +# Copyright 2021, Jiaao He. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"). + +from paddle.distributed.models.moe.utils import _number_count, _limit_by_capacity, _prune_gate_by_capacity, _assign_pos +import paddle def _alltoall(in_tensor_list, group=None, use_calc_stream=True):