未验证 提交 aef291f4 编写于 作者: 李季 提交者: GitHub

Add reference to global_gather and global_scatter operators (#36985)

* add reference to global_gather and global_scatter operators
上级 251f68e7
...@@ -10,7 +10,7 @@ Unless required by applicable law or agreed to in writing, software ...@@ -10,7 +10,7 @@ Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License.*/
#include "paddle/fluid/operators/collective/global_gather_op.h" #include "paddle/fluid/operators/collective/global_gather_op.h"
...@@ -101,7 +101,8 @@ class GlobalGatherOpCUDAKernel : public framework::OpKernel<T> { ...@@ -101,7 +101,8 @@ class GlobalGatherOpCUDAKernel : public framework::OpKernel<T> {
auto send_ptr = 0; auto send_ptr = 0;
auto send_buf = x->data<T>(); auto send_buf = x->data<T>();
auto recv_buf = out->mutable_data<T>(out_dims, place); auto recv_buf = out->mutable_data<T>(out_dims, place);
// Taken and modified for PaddlePaddle from:
// https://github.com/laekov/fastmoe
for (auto i = 0; i < n_expert; ++i) { for (auto i = 0; i < n_expert; ++i) {
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclGroupStart()); PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclGroupStart());
for (auto j = 0; j < nranks; ++j) { for (auto j = 0; j < nranks; ++j) {
......
...@@ -10,7 +10,7 @@ Unless required by applicable law or agreed to in writing, software ...@@ -10,7 +10,7 @@ Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License.*/
#include "paddle/fluid/operators/collective/global_scatter_op.h" #include "paddle/fluid/operators/collective/global_scatter_op.h"
...@@ -100,7 +100,8 @@ class GlobalScatterOpCUDAKernel : public framework::OpKernel<T> { ...@@ -100,7 +100,8 @@ class GlobalScatterOpCUDAKernel : public framework::OpKernel<T> {
auto recv_ptr = 0; auto recv_ptr = 0;
auto send_buf = x->data<T>(); auto send_buf = x->data<T>();
auto recv_buf = out->mutable_data<T>(out_dims, place); auto recv_buf = out->mutable_data<T>(out_dims, place);
// Taken and modified for PaddlePaddle from:
// https://github.com/laekov/fastmoe
for (auto i = 0; i < n_expert; ++i) { for (auto i = 0; i < n_expert; ++i) {
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclGroupStart()); PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclGroupStart());
for (auto j = 0; j < nranks; ++j) { for (auto j = 0; j < nranks; ++j) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册