diff --git a/paddle/fluid/operators/collective/global_gather_op.cu.cc b/paddle/fluid/operators/collective/global_gather_op.cu.cc index 70b5d0244d3852a95a4367e248a4a2ac7c7ca96c..0207f0e1872ba641d51f8917adbaba59e8138bba 100644 --- a/paddle/fluid/operators/collective/global_gather_op.cu.cc +++ b/paddle/fluid/operators/collective/global_gather_op.cu.cc @@ -10,7 +10,7 @@ Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and -limitations under the License. */ +limitations under the License.*/ #include "paddle/fluid/operators/collective/global_gather_op.h" @@ -101,7 +101,8 @@ class GlobalGatherOpCUDAKernel : public framework::OpKernel { auto send_ptr = 0; auto send_buf = x->data(); auto recv_buf = out->mutable_data(out_dims, place); - + // Taken and modified for PaddlePaddle from: + // https://github.com/laekov/fastmoe for (auto i = 0; i < n_expert; ++i) { PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclGroupStart()); for (auto j = 0; j < nranks; ++j) { diff --git a/paddle/fluid/operators/collective/global_scatter_op.cu.cc b/paddle/fluid/operators/collective/global_scatter_op.cu.cc index bec984c6b57e19dd890c0a8f3321d69242bd67e5..2bd42e1dadcef0513dc8c71a588d4c46a530dba5 100644 --- a/paddle/fluid/operators/collective/global_scatter_op.cu.cc +++ b/paddle/fluid/operators/collective/global_scatter_op.cu.cc @@ -10,7 +10,7 @@ Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and -limitations under the License. */ +limitations under the License.*/ #include "paddle/fluid/operators/collective/global_scatter_op.h" @@ -100,7 +100,8 @@ class GlobalScatterOpCUDAKernel : public framework::OpKernel { auto recv_ptr = 0; auto send_buf = x->data(); auto recv_buf = out->mutable_data(out_dims, place); - + // Taken and modified for PaddlePaddle from: + // https://github.com/laekov/fastmoe for (auto i = 0; i < n_expert; ++i) { PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclGroupStart()); for (auto j = 0; j < nranks; ++j) {