Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
846c7e70
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
846c7e70
编写于
9月 20, 2022
作者:
N
Nyakku Shigure
提交者:
GitHub
9月 20, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[CodeStyle] remove crlf for cpp files (#46156)
上级
c6c9c186
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
1472 addition
and
1472 deletion
+1472
-1472
paddle/fluid/inference/analysis/analysis_pass.h
paddle/fluid/inference/analysis/analysis_pass.h
+53
-53
paddle/fluid/operators/collective/c_allgather_op_npu_test.cc
paddle/fluid/operators/collective/c_allgather_op_npu_test.cc
+191
-191
paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc
...fluid/operators/collective/c_allreduce_max_op_npu_test.cc
+187
-187
paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc
...fluid/operators/collective/c_allreduce_sum_op_npu_test.cc
+198
-198
paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc
paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc
+180
-180
paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc
...le/fluid/operators/collective/c_reduce_sum_op_npu_test.cc
+191
-191
paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc
...fluid/operators/collective/c_reducescatter_op_npu_test.cc
+188
-188
paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc
paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc
+140
-140
paddle/fluid/operators/unbind_op.cc
paddle/fluid/operators/unbind_op.cc
+90
-90
paddle/fluid/operators/unbind_op.h
paddle/fluid/operators/unbind_op.h
+54
-54
未找到文件。
paddle/fluid/inference/analysis/analysis_pass.h
浏览文件 @
846c7e70
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <iosfwd>
#include <string>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/inference/analysis/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
/*
* AnalysisPass is a pass used to control the IR passes.
*/
class
AnalysisPass
{
public:
AnalysisPass
()
=
default
;
virtual
~
AnalysisPass
()
=
default
;
// Run on a single Graph.
void
Run
(
Argument
*
argument
)
{
RunImpl
(
argument
);
}
// Human-readable short representation.
virtual
std
::
string
repr
()
const
=
0
;
// Human-readable long description.
virtual
std
::
string
description
()
const
{
return
"No DOC"
;
}
protected:
// User should implement these.
virtual
void
RunImpl
(
Argument
*
argument
)
=
0
;
};
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <iosfwd>
#include <string>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/inference/analysis/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
/*
* AnalysisPass is a pass used to control the IR passes.
*/
class
AnalysisPass
{
public:
AnalysisPass
()
=
default
;
virtual
~
AnalysisPass
()
=
default
;
// Run on a single Graph.
void
Run
(
Argument
*
argument
)
{
RunImpl
(
argument
);
}
// Human-readable short representation.
virtual
std
::
string
repr
()
const
=
0
;
// Human-readable long description.
virtual
std
::
string
description
()
const
{
return
"No DOC"
;
}
protected:
// User should implement these.
virtual
void
RunImpl
(
Argument
*
argument
)
=
0
;
};
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/operators/collective/c_allgather_op_npu_test.cc
浏览文件 @
846c7e70
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef _WIN32
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/collective/c_allgather_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_reducescatter_op.h"
#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace
f
=
paddle
::
framework
;
namespace
p
=
paddle
::
platform
;
USE_OP
(
c_allgather
);
USE_NO_KERNEL_OP
(
c_gen_hccl_id
);
USE_NO_KERNEL_OP
(
c_comm_init_hccl
);
USE_OP_DEVICE_KERNEL
(
c_allgather
,
NPU
);
DECLARE_string
(
selected_npus
);
template
<
typename
T
>
void
PrintDebugInfo
(
const
std
::
string
preStr
,
const
std
::
vector
<
T
>&
data
)
{
std
::
string
debugstring
=
""
;
for
(
auto
ele
:
data
)
{
debugstring
+=
std
::
to_string
(
ele
)
+
std
::
string
(
","
);
}
VLOG
(
2
)
<<
preStr
<<
":"
<<
std
::
endl
<<
debugstring
;
}
void
PrepareUniqueId
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
std
::
vector
<
int
>
rank_ids
{
0
,
1
};
f
::
AttributeMap
gen_hccl_id
;
std
::
vector
<
std
::
string
>
endpointList
=
{
"127.0.0.1:6175"
,
"127.0.0.1:6177"
};
gen_hccl_id
[
"rank"
]
=
rank_id
;
gen_hccl_id
[
"endpoint"
]
=
endpointList
[
rank_id
];
std
::
vector
<
std
::
string
>
other_endpoints
=
{
endpointList
[
rank_id
==
0
?
1
:
0
]};
gen_hccl_id
[
"other_endpoints"
]
=
other_endpoints
;
auto
out
=
scope
->
Var
(
"Out"
);
auto
id
=
out
->
GetMutable
<
HcclRootInfo
>
();
VLOG
(
3
)
<<
"break"
;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_gen_hccl_id"
,
{},
{{
"Out"
,
{
"Out"
}}},
gen_hccl_id
);
VLOG
(
3
)
<<
"break"
;
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
memcpy
(
hccl_id
,
id
,
1024
);
}
void
Prepare
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
auto
x
=
scope
->
Var
(
"X"
);
auto
id
=
x
->
GetMutable
<
HcclRootInfo
>
();
memcpy
(
id
,
hccl_id
,
1024
);
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
// std::vector<int> rank_ids{0, 1};
f
::
AttributeMap
comm_init_attrs
;
comm_init_attrs
[
"ring_id"
]
=
0
;
comm_init_attrs
[
"rank_ids"
]
=
2
;
comm_init_attrs
[
"rank"
]
=
rank_id
;
comm_init_attrs
[
"device_id"
]
=
device_id
;
// comm_init_attrs["rank_ids"] = rank_ids;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_comm_init_hccl"
,
{{
"X"
,
{
"X"
}}},
{},
comm_init_attrs
);
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
}
void
TestHCCLAllGatherOp
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
)
{
// init
auto
x
=
scope
->
Var
(
"Data"
);
auto
tensor_x
=
x
->
GetMutable
<
f
::
LoDTensor
>
();
std
::
vector
<
float
>
init
;
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
num1
=
1
;
int
num2
=
4
;
for
(
int64_t
i
=
0
;
i
<
num1
*
num2
;
++
i
)
{
init
.
push_back
(
1.0
+
rank_id
);
}
PrintDebugInfo
(
"input data"
,
init
);
paddle
::
framework
::
TensorFromVector
(
init
,
ctx
,
tensor_x
);
tensor_x
->
Resize
({
num1
,
num2
});
ctx
.
Wait
();
auto
place
=
ctx
.
GetPlace
();
auto
out
=
scope
->
Var
(
"OutData"
);
auto
tensor_out
=
out
->
GetMutable
<
f
::
LoDTensor
>
();
tensor_out
->
Resize
({
num1
,
num2
});
tensor_out
->
mutable_data
<
float
>
(
place
);
// allocate
ctx
.
Wait
();
// run
f
::
AttributeMap
attrs
;
attrs
[
"tag"
]
=
std
::
string
(
"tagx"
);
attrs
[
"ring_id"
]
=
0
;
attrs
[
"nranks"
]
=
2
;
auto
op
=
f
::
OpRegistry
::
CreateOp
(
"c_allgather"
,
{{
"X"
,
{
"Data"
}}},
{{
"Out"
,
{
"OutData"
}}},
attrs
);
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
op
->
Run
(
*
scope
,
place
);
}
ctx
.
Wait
();
std
::
vector
<
float
>
out_vec
;
paddle
::
framework
::
TensorToVector
(
*
tensor_out
,
ctx
,
&
out_vec
);
ctx
.
Wait
();
PrintDebugInfo
(
"output data"
,
out_vec
);
EXPECT_EQ
(
out_vec
.
size
(),
init
.
size
()
*
2
);
for
(
uint32_t
i
=
0
;
i
<
out_vec
.
size
()
/
2
;
i
++
)
{
EXPECT_EQ
(
out_vec
[
i
],
1.0
);
}
for
(
uint32_t
i
=
out_vec
.
size
()
/
2
;
i
<
out_vec
.
size
();
i
++
)
{
EXPECT_EQ
(
out_vec
[
i
],
2.0
);
}
}
TEST
(
c_allgather
,
NPU
)
{
f
::
Scope
scope
;
HcclRootInfo
hccl_id
;
// only support one device, if more than one device, use first default
p
::
NPUDeviceContext
ctx
(
p
::
NPUPlace
(
atoi
(
FLAGS_selected_npus
.
c_str
())));
PrepareUniqueId
(
&
scope
,
ctx
,
&
hccl_id
);
Prepare
(
&
scope
,
ctx
,
&
hccl_id
);
TestHCCLAllGatherOp
(
&
scope
,
ctx
);
}
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef _WIN32
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/collective/c_allgather_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_reducescatter_op.h"
#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace
f
=
paddle
::
framework
;
namespace
p
=
paddle
::
platform
;
USE_OP
(
c_allgather
);
USE_NO_KERNEL_OP
(
c_gen_hccl_id
);
USE_NO_KERNEL_OP
(
c_comm_init_hccl
);
USE_OP_DEVICE_KERNEL
(
c_allgather
,
NPU
);
DECLARE_string
(
selected_npus
);
template
<
typename
T
>
void
PrintDebugInfo
(
const
std
::
string
preStr
,
const
std
::
vector
<
T
>&
data
)
{
std
::
string
debugstring
=
""
;
for
(
auto
ele
:
data
)
{
debugstring
+=
std
::
to_string
(
ele
)
+
std
::
string
(
","
);
}
VLOG
(
2
)
<<
preStr
<<
":"
<<
std
::
endl
<<
debugstring
;
}
void
PrepareUniqueId
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
std
::
vector
<
int
>
rank_ids
{
0
,
1
};
f
::
AttributeMap
gen_hccl_id
;
std
::
vector
<
std
::
string
>
endpointList
=
{
"127.0.0.1:6175"
,
"127.0.0.1:6177"
};
gen_hccl_id
[
"rank"
]
=
rank_id
;
gen_hccl_id
[
"endpoint"
]
=
endpointList
[
rank_id
];
std
::
vector
<
std
::
string
>
other_endpoints
=
{
endpointList
[
rank_id
==
0
?
1
:
0
]};
gen_hccl_id
[
"other_endpoints"
]
=
other_endpoints
;
auto
out
=
scope
->
Var
(
"Out"
);
auto
id
=
out
->
GetMutable
<
HcclRootInfo
>
();
VLOG
(
3
)
<<
"break"
;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_gen_hccl_id"
,
{},
{{
"Out"
,
{
"Out"
}}},
gen_hccl_id
);
VLOG
(
3
)
<<
"break"
;
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
memcpy
(
hccl_id
,
id
,
1024
);
}
void
Prepare
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
auto
x
=
scope
->
Var
(
"X"
);
auto
id
=
x
->
GetMutable
<
HcclRootInfo
>
();
memcpy
(
id
,
hccl_id
,
1024
);
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
// std::vector<int> rank_ids{0, 1};
f
::
AttributeMap
comm_init_attrs
;
comm_init_attrs
[
"ring_id"
]
=
0
;
comm_init_attrs
[
"rank_ids"
]
=
2
;
comm_init_attrs
[
"rank"
]
=
rank_id
;
comm_init_attrs
[
"device_id"
]
=
device_id
;
// comm_init_attrs["rank_ids"] = rank_ids;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_comm_init_hccl"
,
{{
"X"
,
{
"X"
}}},
{},
comm_init_attrs
);
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
}
void
TestHCCLAllGatherOp
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
)
{
// init
auto
x
=
scope
->
Var
(
"Data"
);
auto
tensor_x
=
x
->
GetMutable
<
f
::
LoDTensor
>
();
std
::
vector
<
float
>
init
;
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
num1
=
1
;
int
num2
=
4
;
for
(
int64_t
i
=
0
;
i
<
num1
*
num2
;
++
i
)
{
init
.
push_back
(
1.0
+
rank_id
);
}
PrintDebugInfo
(
"input data"
,
init
);
paddle
::
framework
::
TensorFromVector
(
init
,
ctx
,
tensor_x
);
tensor_x
->
Resize
({
num1
,
num2
});
ctx
.
Wait
();
auto
place
=
ctx
.
GetPlace
();
auto
out
=
scope
->
Var
(
"OutData"
);
auto
tensor_out
=
out
->
GetMutable
<
f
::
LoDTensor
>
();
tensor_out
->
Resize
({
num1
,
num2
});
tensor_out
->
mutable_data
<
float
>
(
place
);
// allocate
ctx
.
Wait
();
// run
f
::
AttributeMap
attrs
;
attrs
[
"tag"
]
=
std
::
string
(
"tagx"
);
attrs
[
"ring_id"
]
=
0
;
attrs
[
"nranks"
]
=
2
;
auto
op
=
f
::
OpRegistry
::
CreateOp
(
"c_allgather"
,
{{
"X"
,
{
"Data"
}}},
{{
"Out"
,
{
"OutData"
}}},
attrs
);
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
op
->
Run
(
*
scope
,
place
);
}
ctx
.
Wait
();
std
::
vector
<
float
>
out_vec
;
paddle
::
framework
::
TensorToVector
(
*
tensor_out
,
ctx
,
&
out_vec
);
ctx
.
Wait
();
PrintDebugInfo
(
"output data"
,
out_vec
);
EXPECT_EQ
(
out_vec
.
size
(),
init
.
size
()
*
2
);
for
(
uint32_t
i
=
0
;
i
<
out_vec
.
size
()
/
2
;
i
++
)
{
EXPECT_EQ
(
out_vec
[
i
],
1.0
);
}
for
(
uint32_t
i
=
out_vec
.
size
()
/
2
;
i
<
out_vec
.
size
();
i
++
)
{
EXPECT_EQ
(
out_vec
[
i
],
2.0
);
}
}
TEST
(
c_allgather
,
NPU
)
{
f
::
Scope
scope
;
HcclRootInfo
hccl_id
;
// only support one device, if more than one device, use first default
p
::
NPUDeviceContext
ctx
(
p
::
NPUPlace
(
atoi
(
FLAGS_selected_npus
.
c_str
())));
PrepareUniqueId
(
&
scope
,
ctx
,
&
hccl_id
);
Prepare
(
&
scope
,
ctx
,
&
hccl_id
);
TestHCCLAllGatherOp
(
&
scope
,
ctx
);
}
paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc
浏览文件 @
846c7e70
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef _WIN32
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/collective/c_allgather_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_reducescatter_op.h"
#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace
f
=
paddle
::
framework
;
namespace
p
=
paddle
::
platform
;
USE_OP
(
c_allreduce_max
);
USE_NO_KERNEL_OP
(
c_gen_hccl_id
);
USE_NO_KERNEL_OP
(
c_comm_init_hccl
);
USE_OP_DEVICE_KERNEL
(
c_allreduce_max
,
NPU
);
DECLARE_string
(
selected_npus
);
template
<
typename
T
>
void
PrintDebugInfo
(
const
std
::
string
preStr
,
const
std
::
vector
<
T
>&
data
)
{
std
::
string
debugstring
=
""
;
for
(
auto
ele
:
data
)
{
debugstring
+=
std
::
to_string
(
ele
)
+
std
::
string
(
","
);
}
VLOG
(
2
)
<<
preStr
<<
":"
<<
std
::
endl
<<
debugstring
;
}
void
PrepareUniqueId
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
std
::
vector
<
int
>
rank_ids
{
0
,
1
};
f
::
AttributeMap
gen_hccl_id
;
std
::
vector
<
std
::
string
>
endpointList
=
{
"127.0.0.1:6175"
,
"127.0.0.1:6177"
};
gen_hccl_id
[
"rank"
]
=
rank_id
;
gen_hccl_id
[
"endpoint"
]
=
endpointList
[
rank_id
];
std
::
vector
<
std
::
string
>
other_endpoints
=
{
endpointList
[
rank_id
==
0
?
1
:
0
]};
gen_hccl_id
[
"other_endpoints"
]
=
other_endpoints
;
auto
out
=
scope
->
Var
(
"Out"
);
auto
id
=
out
->
GetMutable
<
HcclRootInfo
>
();
VLOG
(
3
)
<<
"break"
;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_gen_hccl_id"
,
{},
{{
"Out"
,
{
"Out"
}}},
gen_hccl_id
);
VLOG
(
3
)
<<
"break"
;
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
memcpy
(
hccl_id
,
id
,
1024
);
}
void
Prepare
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
auto
x
=
scope
->
Var
(
"X"
);
auto
id
=
x
->
GetMutable
<
HcclRootInfo
>
();
memcpy
(
id
,
hccl_id
,
1024
);
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
// std::vector<int> rank_ids{0, 1};
f
::
AttributeMap
comm_init_attrs
;
comm_init_attrs
[
"ring_id"
]
=
0
;
comm_init_attrs
[
"rank_ids"
]
=
2
;
comm_init_attrs
[
"rank"
]
=
rank_id
;
comm_init_attrs
[
"device_id"
]
=
device_id
;
// comm_init_attrs["rank_ids"] = rank_ids;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_comm_init_hccl"
,
{{
"X"
,
{
"X"
}}},
{},
comm_init_attrs
);
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
}
void
TestHCCLAllReduceOp
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
)
{
// init
auto
x
=
scope
->
Var
(
"Data"
);
auto
tensor_x
=
x
->
GetMutable
<
f
::
LoDTensor
>
();
std
::
vector
<
float
>
init
;
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
num1
=
100
;
int
num2
=
100
;
for
(
int64_t
i
=
0
;
i
<
num1
*
num2
;
++
i
)
{
init
.
push_back
(
1.0
+
rank_id
*
3
);
}
PrintDebugInfo
(
"input data"
,
init
);
paddle
::
framework
::
TensorFromVector
(
init
,
ctx
,
tensor_x
);
tensor_x
->
Resize
({
num1
,
num2
});
ctx
.
Wait
();
auto
place
=
ctx
.
GetPlace
();
auto
out
=
scope
->
Var
(
"OutData"
);
auto
tensor_out
=
out
->
GetMutable
<
f
::
LoDTensor
>
();
tensor_out
->
Resize
({
num1
,
num2
});
tensor_out
->
mutable_data
<
float
>
(
place
);
// allocate
ctx
.
Wait
();
// run
f
::
AttributeMap
attrs
;
attrs
[
"tag"
]
=
std
::
string
(
"tagx"
);
attrs
[
"ring_id"
]
=
0
;
auto
op
=
f
::
OpRegistry
::
CreateOp
(
"c_allreduce_max"
,
{{
"X"
,
{
"Data"
}}},
{{
"Out"
,
{
"OutData"
}}},
attrs
);
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
op
->
Run
(
*
scope
,
place
);
}
ctx
.
Wait
();
std
::
vector
<
float
>
out_vec
;
paddle
::
framework
::
TensorToVector
(
*
tensor_out
,
ctx
,
&
out_vec
);
ctx
.
Wait
();
PrintDebugInfo
(
"output data"
,
out_vec
);
EXPECT_EQ
(
out_vec
.
size
(),
init
.
size
());
for
(
uint32_t
i
=
0
;
i
<
out_vec
.
size
();
i
++
)
{
EXPECT_EQ
(
out_vec
[
i
],
4.0
);
}
}
TEST
(
c_allreduce_max
,
NPU
)
{
f
::
Scope
scope
;
HcclRootInfo
hccl_id
;
// only support one device, if more than one device, use first default
p
::
NPUDeviceContext
ctx
(
p
::
NPUPlace
(
atoi
(
FLAGS_selected_npus
.
c_str
())));
PrepareUniqueId
(
&
scope
,
ctx
,
&
hccl_id
);
Prepare
(
&
scope
,
ctx
,
&
hccl_id
);
TestHCCLAllReduceOp
(
&
scope
,
ctx
);
}
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef _WIN32
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/collective/c_allgather_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_reducescatter_op.h"
#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace
f
=
paddle
::
framework
;
namespace
p
=
paddle
::
platform
;
USE_OP
(
c_allreduce_max
);
USE_NO_KERNEL_OP
(
c_gen_hccl_id
);
USE_NO_KERNEL_OP
(
c_comm_init_hccl
);
USE_OP_DEVICE_KERNEL
(
c_allreduce_max
,
NPU
);
DECLARE_string
(
selected_npus
);
template
<
typename
T
>
void
PrintDebugInfo
(
const
std
::
string
preStr
,
const
std
::
vector
<
T
>&
data
)
{
std
::
string
debugstring
=
""
;
for
(
auto
ele
:
data
)
{
debugstring
+=
std
::
to_string
(
ele
)
+
std
::
string
(
","
);
}
VLOG
(
2
)
<<
preStr
<<
":"
<<
std
::
endl
<<
debugstring
;
}
void
PrepareUniqueId
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
std
::
vector
<
int
>
rank_ids
{
0
,
1
};
f
::
AttributeMap
gen_hccl_id
;
std
::
vector
<
std
::
string
>
endpointList
=
{
"127.0.0.1:6175"
,
"127.0.0.1:6177"
};
gen_hccl_id
[
"rank"
]
=
rank_id
;
gen_hccl_id
[
"endpoint"
]
=
endpointList
[
rank_id
];
std
::
vector
<
std
::
string
>
other_endpoints
=
{
endpointList
[
rank_id
==
0
?
1
:
0
]};
gen_hccl_id
[
"other_endpoints"
]
=
other_endpoints
;
auto
out
=
scope
->
Var
(
"Out"
);
auto
id
=
out
->
GetMutable
<
HcclRootInfo
>
();
VLOG
(
3
)
<<
"break"
;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_gen_hccl_id"
,
{},
{{
"Out"
,
{
"Out"
}}},
gen_hccl_id
);
VLOG
(
3
)
<<
"break"
;
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
memcpy
(
hccl_id
,
id
,
1024
);
}
void
Prepare
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
auto
x
=
scope
->
Var
(
"X"
);
auto
id
=
x
->
GetMutable
<
HcclRootInfo
>
();
memcpy
(
id
,
hccl_id
,
1024
);
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
// std::vector<int> rank_ids{0, 1};
f
::
AttributeMap
comm_init_attrs
;
comm_init_attrs
[
"ring_id"
]
=
0
;
comm_init_attrs
[
"rank_ids"
]
=
2
;
comm_init_attrs
[
"rank"
]
=
rank_id
;
comm_init_attrs
[
"device_id"
]
=
device_id
;
// comm_init_attrs["rank_ids"] = rank_ids;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_comm_init_hccl"
,
{{
"X"
,
{
"X"
}}},
{},
comm_init_attrs
);
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
}
void
TestHCCLAllReduceOp
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
)
{
// init
auto
x
=
scope
->
Var
(
"Data"
);
auto
tensor_x
=
x
->
GetMutable
<
f
::
LoDTensor
>
();
std
::
vector
<
float
>
init
;
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
num1
=
100
;
int
num2
=
100
;
for
(
int64_t
i
=
0
;
i
<
num1
*
num2
;
++
i
)
{
init
.
push_back
(
1.0
+
rank_id
*
3
);
}
PrintDebugInfo
(
"input data"
,
init
);
paddle
::
framework
::
TensorFromVector
(
init
,
ctx
,
tensor_x
);
tensor_x
->
Resize
({
num1
,
num2
});
ctx
.
Wait
();
auto
place
=
ctx
.
GetPlace
();
auto
out
=
scope
->
Var
(
"OutData"
);
auto
tensor_out
=
out
->
GetMutable
<
f
::
LoDTensor
>
();
tensor_out
->
Resize
({
num1
,
num2
});
tensor_out
->
mutable_data
<
float
>
(
place
);
// allocate
ctx
.
Wait
();
// run
f
::
AttributeMap
attrs
;
attrs
[
"tag"
]
=
std
::
string
(
"tagx"
);
attrs
[
"ring_id"
]
=
0
;
auto
op
=
f
::
OpRegistry
::
CreateOp
(
"c_allreduce_max"
,
{{
"X"
,
{
"Data"
}}},
{{
"Out"
,
{
"OutData"
}}},
attrs
);
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
op
->
Run
(
*
scope
,
place
);
}
ctx
.
Wait
();
std
::
vector
<
float
>
out_vec
;
paddle
::
framework
::
TensorToVector
(
*
tensor_out
,
ctx
,
&
out_vec
);
ctx
.
Wait
();
PrintDebugInfo
(
"output data"
,
out_vec
);
EXPECT_EQ
(
out_vec
.
size
(),
init
.
size
());
for
(
uint32_t
i
=
0
;
i
<
out_vec
.
size
();
i
++
)
{
EXPECT_EQ
(
out_vec
[
i
],
4.0
);
}
}
TEST
(
c_allreduce_max
,
NPU
)
{
f
::
Scope
scope
;
HcclRootInfo
hccl_id
;
// only support one device, if more than one device, use first default
p
::
NPUDeviceContext
ctx
(
p
::
NPUPlace
(
atoi
(
FLAGS_selected_npus
.
c_str
())));
PrepareUniqueId
(
&
scope
,
ctx
,
&
hccl_id
);
Prepare
(
&
scope
,
ctx
,
&
hccl_id
);
TestHCCLAllReduceOp
(
&
scope
,
ctx
);
}
paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc
浏览文件 @
846c7e70
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef _WIN32
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
// Node1: HCCL_WHITELIST_DISABLE=1 FLAGS_selected_npus=1 GLOG_v=4 RANK_ID=1
// DEVICE_ID=1 ./paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test
// Node2: HCCL_WHITELIST_DISABLE=1 FLAGS_selected_npus=0 GLOG_v=4 RANK_ID=0
// DEVICE_ID=0 ./paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test
namespace
f
=
paddle
::
framework
;
namespace
p
=
paddle
::
platform
;
USE_OP
(
c_allreduce_sum
);
USE_NO_KERNEL_OP
(
c_gen_hccl_id
);
USE_NO_KERNEL_OP
(
c_comm_init_hccl
);
USE_OP_DEVICE_KERNEL
(
c_allreduce_sum
,
NPU
);
DECLARE_string
(
selected_npus
);
template
<
typename
T
>
void
PrintDebugInfo
(
const
std
::
string
preStr
,
const
std
::
vector
<
T
>&
data
)
{
std
::
string
debugstring
=
""
;
std
::
cout
<<
preStr
<<
":"
<<
std
::
endl
<<
debugstring
;
for
(
auto
ele
:
data
)
{
std
::
cout
<<
ele
<<
" "
;
}
std
::
cout
<<
std
::
endl
;
}
void
PrepareUniqueId
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
std
::
vector
<
int
>
rank_ids
{
0
,
1
};
f
::
AttributeMap
gen_hccl_id
;
std
::
vector
<
std
::
string
>
endpointList
=
{
"127.0.0.1:6175"
,
"127.0.0.1:6177"
};
gen_hccl_id
[
"rank"
]
=
rank_id
;
gen_hccl_id
[
"endpoint"
]
=
endpointList
[
rank_id
];
std
::
vector
<
std
::
string
>
other_endpoints
=
{
endpointList
[
rank_id
==
0
?
1
:
0
]};
gen_hccl_id
[
"other_endpoints"
]
=
other_endpoints
;
auto
out
=
scope
->
Var
(
"Out"
);
auto
id
=
out
->
GetMutable
<
HcclRootInfo
>
();
VLOG
(
3
)
<<
"break"
;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_gen_hccl_id"
,
{},
{{
"Out"
,
{
"Out"
}}},
gen_hccl_id
);
VLOG
(
3
)
<<
"break"
;
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
memcpy
(
hccl_id
,
id
,
1024
);
}
void
Prepare
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
auto
x
=
scope
->
Var
(
"X"
);
auto
id
=
x
->
GetMutable
<
HcclRootInfo
>
();
memcpy
(
id
,
hccl_id
,
1024
);
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
// std::vector<int> rank_ids{0, 1};
f
::
AttributeMap
comm_init_attrs
;
comm_init_attrs
[
"ring_id"
]
=
0
;
comm_init_attrs
[
"rank_ids"
]
=
2
;
comm_init_attrs
[
"rank"
]
=
rank_id
;
comm_init_attrs
[
"device_id"
]
=
device_id
;
// comm_init_attrs["rank_ids"] = rank_ids;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_comm_init_hccl"
,
{{
"X"
,
{
"X"
}}},
{},
comm_init_attrs
);
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
}
template
<
typename
T
>
void
TestHCCLAllReduceOp
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
int
iter
)
{
// init
auto
x
=
scope
->
Var
(
"Data"
);
auto
tensor_x
=
x
->
GetMutable
<
f
::
LoDTensor
>
();
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
num1
=
3
;
int
num2
=
128
;
std
::
vector
<
T
>
init
;
for
(
int64_t
i
=
0
;
i
<
num1
*
num2
;
++
i
)
{
init
.
push_back
(
static_cast
<
T
>
(
1.0
+
rank_id
));
}
init
[
0
]
=
static_cast
<
T
>
(
std
::
numeric_limits
<
float
>::
quiet_NaN
());
PrintDebugInfo
(
"input data"
,
init
);
auto
place
=
ctx
.
GetPlace
();
paddle
::
framework
::
TensorFromVector
(
init
,
ctx
,
tensor_x
);
tensor_x
->
Resize
({
num1
,
num2
});
ctx
.
Wait
();
auto
out
=
scope
->
Var
(
"OutData"
);
auto
tensor_out
=
out
->
GetMutable
<
f
::
LoDTensor
>
();
tensor_out
->
Resize
({
num1
,
num2
});
tensor_out
->
mutable_data
<
T
>
(
place
);
// allocate
ctx
.
Wait
();
// run
f
::
AttributeMap
attrs
;
attrs
[
"tag"
]
=
std
::
string
(
"tagx_"
+
std
::
to_string
(
iter
));
attrs
[
"ring_id"
]
=
0
;
attrs
[
"use_calc_stream"
]
=
1
;
auto
op
=
f
::
OpRegistry
::
CreateOp
(
"c_allreduce_sum"
,
{{
"X"
,
{
"Data"
}}},
{{
"Out"
,
{
"OutData"
}}},
attrs
);
for
(
int
i
=
0
;
i
<
1
;
i
++
)
{
op
->
Run
(
*
scope
,
place
);
}
ctx
.
Wait
();
std
::
vector
<
T
>
out_vec
;
paddle
::
framework
::
TensorToVector
(
*
tensor_out
,
ctx
,
&
out_vec
);
ctx
.
Wait
();
PrintDebugInfo
(
"output data"
,
out_vec
);
float
diff
=
static_cast
<
float
>
(
out_vec
[
0
])
-
65504
;
EXPECT_TRUE
(
diff
<
0.1
&&
diff
>
-
0.1
);
EXPECT_EQ
(
out_vec
.
size
(),
init
.
size
());
for
(
uint32_t
i
=
1
;
i
<
10
;
i
++
)
{
EXPECT_EQ
(
out_vec
[
i
],
static_cast
<
paddle
::
platform
::
float16
>
(
3.0
));
}
}
TEST
(
c_allreduce_sum
,
NPU
)
{
f
::
Scope
scope
;
HcclRootInfo
hccl_id
;
p
::
NPUDeviceContext
ctx
(
p
::
NPUPlace
(
atoi
(
FLAGS_selected_npus
.
c_str
())));
// only support one device, if more than one device, use first default
PrepareUniqueId
(
&
scope
,
ctx
,
&
hccl_id
);
Prepare
(
&
scope
,
ctx
,
&
hccl_id
);
TestHCCLAllReduceOp
<
paddle
::
platform
::
float16
>
(
&
scope
,
ctx
,
1
);
// TestHCCLAllReduceOp<float>(&scope, ctx, 0);
}
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef _WIN32
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
// Node1: HCCL_WHITELIST_DISABLE=1 FLAGS_selected_npus=1 GLOG_v=4 RANK_ID=1
// DEVICE_ID=1 ./paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test
// Node2: HCCL_WHITELIST_DISABLE=1 FLAGS_selected_npus=0 GLOG_v=4 RANK_ID=0
// DEVICE_ID=0 ./paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test
namespace
f
=
paddle
::
framework
;
namespace
p
=
paddle
::
platform
;
USE_OP
(
c_allreduce_sum
);
USE_NO_KERNEL_OP
(
c_gen_hccl_id
);
USE_NO_KERNEL_OP
(
c_comm_init_hccl
);
USE_OP_DEVICE_KERNEL
(
c_allreduce_sum
,
NPU
);
DECLARE_string
(
selected_npus
);
template
<
typename
T
>
void
PrintDebugInfo
(
const
std
::
string
preStr
,
const
std
::
vector
<
T
>&
data
)
{
std
::
string
debugstring
=
""
;
std
::
cout
<<
preStr
<<
":"
<<
std
::
endl
<<
debugstring
;
for
(
auto
ele
:
data
)
{
std
::
cout
<<
ele
<<
" "
;
}
std
::
cout
<<
std
::
endl
;
}
void
PrepareUniqueId
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
std
::
vector
<
int
>
rank_ids
{
0
,
1
};
f
::
AttributeMap
gen_hccl_id
;
std
::
vector
<
std
::
string
>
endpointList
=
{
"127.0.0.1:6175"
,
"127.0.0.1:6177"
};
gen_hccl_id
[
"rank"
]
=
rank_id
;
gen_hccl_id
[
"endpoint"
]
=
endpointList
[
rank_id
];
std
::
vector
<
std
::
string
>
other_endpoints
=
{
endpointList
[
rank_id
==
0
?
1
:
0
]};
gen_hccl_id
[
"other_endpoints"
]
=
other_endpoints
;
auto
out
=
scope
->
Var
(
"Out"
);
auto
id
=
out
->
GetMutable
<
HcclRootInfo
>
();
VLOG
(
3
)
<<
"break"
;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_gen_hccl_id"
,
{},
{{
"Out"
,
{
"Out"
}}},
gen_hccl_id
);
VLOG
(
3
)
<<
"break"
;
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
memcpy
(
hccl_id
,
id
,
1024
);
}
void
Prepare
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
auto
x
=
scope
->
Var
(
"X"
);
auto
id
=
x
->
GetMutable
<
HcclRootInfo
>
();
memcpy
(
id
,
hccl_id
,
1024
);
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
// std::vector<int> rank_ids{0, 1};
f
::
AttributeMap
comm_init_attrs
;
comm_init_attrs
[
"ring_id"
]
=
0
;
comm_init_attrs
[
"rank_ids"
]
=
2
;
comm_init_attrs
[
"rank"
]
=
rank_id
;
comm_init_attrs
[
"device_id"
]
=
device_id
;
// comm_init_attrs["rank_ids"] = rank_ids;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_comm_init_hccl"
,
{{
"X"
,
{
"X"
}}},
{},
comm_init_attrs
);
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
}
template
<
typename
T
>
void
TestHCCLAllReduceOp
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
int
iter
)
{
// init
auto
x
=
scope
->
Var
(
"Data"
);
auto
tensor_x
=
x
->
GetMutable
<
f
::
LoDTensor
>
();
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
num1
=
3
;
int
num2
=
128
;
std
::
vector
<
T
>
init
;
for
(
int64_t
i
=
0
;
i
<
num1
*
num2
;
++
i
)
{
init
.
push_back
(
static_cast
<
T
>
(
1.0
+
rank_id
));
}
init
[
0
]
=
static_cast
<
T
>
(
std
::
numeric_limits
<
float
>::
quiet_NaN
());
PrintDebugInfo
(
"input data"
,
init
);
auto
place
=
ctx
.
GetPlace
();
paddle
::
framework
::
TensorFromVector
(
init
,
ctx
,
tensor_x
);
tensor_x
->
Resize
({
num1
,
num2
});
ctx
.
Wait
();
auto
out
=
scope
->
Var
(
"OutData"
);
auto
tensor_out
=
out
->
GetMutable
<
f
::
LoDTensor
>
();
tensor_out
->
Resize
({
num1
,
num2
});
tensor_out
->
mutable_data
<
T
>
(
place
);
// allocate
ctx
.
Wait
();
// run
f
::
AttributeMap
attrs
;
attrs
[
"tag"
]
=
std
::
string
(
"tagx_"
+
std
::
to_string
(
iter
));
attrs
[
"ring_id"
]
=
0
;
attrs
[
"use_calc_stream"
]
=
1
;
auto
op
=
f
::
OpRegistry
::
CreateOp
(
"c_allreduce_sum"
,
{{
"X"
,
{
"Data"
}}},
{{
"Out"
,
{
"OutData"
}}},
attrs
);
for
(
int
i
=
0
;
i
<
1
;
i
++
)
{
op
->
Run
(
*
scope
,
place
);
}
ctx
.
Wait
();
std
::
vector
<
T
>
out_vec
;
paddle
::
framework
::
TensorToVector
(
*
tensor_out
,
ctx
,
&
out_vec
);
ctx
.
Wait
();
PrintDebugInfo
(
"output data"
,
out_vec
);
float
diff
=
static_cast
<
float
>
(
out_vec
[
0
])
-
65504
;
EXPECT_TRUE
(
diff
<
0.1
&&
diff
>
-
0.1
);
EXPECT_EQ
(
out_vec
.
size
(),
init
.
size
());
for
(
uint32_t
i
=
1
;
i
<
10
;
i
++
)
{
EXPECT_EQ
(
out_vec
[
i
],
static_cast
<
paddle
::
platform
::
float16
>
(
3.0
));
}
}
TEST
(
c_allreduce_sum
,
NPU
)
{
f
::
Scope
scope
;
HcclRootInfo
hccl_id
;
p
::
NPUDeviceContext
ctx
(
p
::
NPUPlace
(
atoi
(
FLAGS_selected_npus
.
c_str
())));
// only support one device, if more than one device, use first default
PrepareUniqueId
(
&
scope
,
ctx
,
&
hccl_id
);
Prepare
(
&
scope
,
ctx
,
&
hccl_id
);
TestHCCLAllReduceOp
<
paddle
::
platform
::
float16
>
(
&
scope
,
ctx
,
1
);
// TestHCCLAllReduceOp<float>(&scope, ctx, 0);
}
paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc
浏览文件 @
846c7e70
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef _WIN32
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace
f
=
paddle
::
framework
;
namespace
p
=
paddle
::
platform
;
USE_OP
(
c_broadcast
);
USE_NO_KERNEL_OP
(
c_gen_hccl_id
);
USE_NO_KERNEL_OP
(
c_comm_init_hccl
);
USE_OP_DEVICE_KERNEL
(
c_broadcast
,
NPU
);
DECLARE_string
(
selected_npus
);
template
<
typename
T
>
void
PrintDebugInfo
(
const
std
::
string
preStr
,
const
std
::
vector
<
T
>&
data
)
{
std
::
string
debugstring
=
""
;
for
(
auto
ele
:
data
)
{
debugstring
+=
std
::
to_string
(
ele
)
+
std
::
string
(
","
);
}
VLOG
(
2
)
<<
preStr
<<
":"
<<
std
::
endl
<<
debugstring
;
}
void
PrepareUniqueId
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
std
::
vector
<
int
>
rank_ids
{
0
,
1
};
f
::
AttributeMap
gen_hccl_id
;
std
::
vector
<
std
::
string
>
endpointList
=
{
"127.0.0.1:6175"
,
"127.0.0.1:6177"
};
gen_hccl_id
[
"rank"
]
=
rank_id
;
gen_hccl_id
[
"endpoint"
]
=
endpointList
[
rank_id
];
std
::
vector
<
std
::
string
>
other_endpoints
=
{
endpointList
[
rank_id
==
0
?
1
:
0
]};
gen_hccl_id
[
"other_endpoints"
]
=
other_endpoints
;
auto
out
=
scope
->
Var
(
"Out"
);
auto
id
=
out
->
GetMutable
<
HcclRootInfo
>
();
VLOG
(
3
)
<<
"break"
;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_gen_hccl_id"
,
{},
{{
"Out"
,
{
"Out"
}}},
gen_hccl_id
);
VLOG
(
3
)
<<
"break"
;
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
memcpy
(
hccl_id
,
id
,
1024
);
}
void
Prepare
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
auto
x
=
scope
->
Var
(
"X"
);
auto
id
=
x
->
GetMutable
<
HcclRootInfo
>
();
memcpy
(
id
,
hccl_id
,
1024
);
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
// std::vector<int> rank_ids{0, 1};
f
::
AttributeMap
comm_init_attrs
;
comm_init_attrs
[
"ring_id"
]
=
0
;
comm_init_attrs
[
"rank_ids"
]
=
2
;
comm_init_attrs
[
"rank"
]
=
rank_id
;
comm_init_attrs
[
"device_id"
]
=
device_id
;
// comm_init_attrs["rank_ids"] = rank_ids;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_comm_init_hccl"
,
{{
"X"
,
{
"X"
}}},
{},
comm_init_attrs
);
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
}
void
TestHCCLBroadcastOp
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
)
{
// init
auto
x
=
scope
->
Var
(
"Data"
);
auto
tensor_x
=
x
->
GetMutable
<
f
::
LoDTensor
>
();
int
num
=
2
;
std
::
vector
<
float
>
init
;
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
for
(
int64_t
i
=
0
;
i
<
num
*
num
;
++
i
)
{
init
.
push_back
(
1.0
+
rank_id
);
}
PrintDebugInfo
(
"input data"
,
init
);
paddle
::
framework
::
TensorFromVector
(
init
,
ctx
,
tensor_x
);
tensor_x
->
Resize
({
num
,
num
});
ctx
.
Wait
();
auto
place
=
ctx
.
GetPlace
();
auto
out
=
scope
->
Var
(
"OutData"
);
auto
tensor_out
=
out
->
GetMutable
<
f
::
LoDTensor
>
();
tensor_out
->
Resize
({
num
,
num
});
tensor_out
->
mutable_data
<
float
>
(
place
);
// allocate
ctx
.
Wait
();
// run
f
::
AttributeMap
attrs
;
attrs
[
"tag"
]
=
std
::
string
(
"tagx"
);
attrs
[
"root"
]
=
0
;
attrs
[
"ring_id"
]
=
0
;
auto
op
=
f
::
OpRegistry
::
CreateOp
(
"c_broadcast"
,
{{
"X"
,
{
"Data"
}}},
{{
"Out"
,
{
"OutData"
}}},
attrs
);
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
op
->
Run
(
*
scope
,
place
);
}
ctx
.
Wait
();
std
::
vector
<
float
>
out_vec
;
paddle
::
framework
::
TensorToVector
(
*
tensor_out
,
ctx
,
&
out_vec
);
ctx
.
Wait
();
PrintDebugInfo
(
"output data"
,
out_vec
);
EXPECT_EQ
(
out_vec
.
size
(),
init
.
size
());
for
(
uint32_t
i
=
0
;
i
<
out_vec
.
size
();
i
++
)
{
EXPECT_EQ
(
out_vec
[
i
],
1.0
);
}
}
TEST
(
c_broadcast
,
NPU
)
{
f
::
Scope
scope
;
HcclRootInfo
hccl_id
;
// only support one device, if more than one device, use first default
p
::
NPUDeviceContext
ctx
(
p
::
NPUPlace
(
atoi
(
FLAGS_selected_npus
.
c_str
())));
PrepareUniqueId
(
&
scope
,
ctx
,
&
hccl_id
);
Prepare
(
&
scope
,
ctx
,
&
hccl_id
);
TestHCCLBroadcastOp
(
&
scope
,
ctx
);
}
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef _WIN32
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace
f
=
paddle
::
framework
;
namespace
p
=
paddle
::
platform
;
USE_OP
(
c_broadcast
);
USE_NO_KERNEL_OP
(
c_gen_hccl_id
);
USE_NO_KERNEL_OP
(
c_comm_init_hccl
);
USE_OP_DEVICE_KERNEL
(
c_broadcast
,
NPU
);
DECLARE_string
(
selected_npus
);
template
<
typename
T
>
void
PrintDebugInfo
(
const
std
::
string
preStr
,
const
std
::
vector
<
T
>&
data
)
{
std
::
string
debugstring
=
""
;
for
(
auto
ele
:
data
)
{
debugstring
+=
std
::
to_string
(
ele
)
+
std
::
string
(
","
);
}
VLOG
(
2
)
<<
preStr
<<
":"
<<
std
::
endl
<<
debugstring
;
}
void
PrepareUniqueId
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
std
::
vector
<
int
>
rank_ids
{
0
,
1
};
f
::
AttributeMap
gen_hccl_id
;
std
::
vector
<
std
::
string
>
endpointList
=
{
"127.0.0.1:6175"
,
"127.0.0.1:6177"
};
gen_hccl_id
[
"rank"
]
=
rank_id
;
gen_hccl_id
[
"endpoint"
]
=
endpointList
[
rank_id
];
std
::
vector
<
std
::
string
>
other_endpoints
=
{
endpointList
[
rank_id
==
0
?
1
:
0
]};
gen_hccl_id
[
"other_endpoints"
]
=
other_endpoints
;
auto
out
=
scope
->
Var
(
"Out"
);
auto
id
=
out
->
GetMutable
<
HcclRootInfo
>
();
VLOG
(
3
)
<<
"break"
;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_gen_hccl_id"
,
{},
{{
"Out"
,
{
"Out"
}}},
gen_hccl_id
);
VLOG
(
3
)
<<
"break"
;
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
memcpy
(
hccl_id
,
id
,
1024
);
}
void
Prepare
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
auto
x
=
scope
->
Var
(
"X"
);
auto
id
=
x
->
GetMutable
<
HcclRootInfo
>
();
memcpy
(
id
,
hccl_id
,
1024
);
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
// std::vector<int> rank_ids{0, 1};
f
::
AttributeMap
comm_init_attrs
;
comm_init_attrs
[
"ring_id"
]
=
0
;
comm_init_attrs
[
"rank_ids"
]
=
2
;
comm_init_attrs
[
"rank"
]
=
rank_id
;
comm_init_attrs
[
"device_id"
]
=
device_id
;
// comm_init_attrs["rank_ids"] = rank_ids;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_comm_init_hccl"
,
{{
"X"
,
{
"X"
}}},
{},
comm_init_attrs
);
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
}
void
TestHCCLBroadcastOp
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
)
{
// init
auto
x
=
scope
->
Var
(
"Data"
);
auto
tensor_x
=
x
->
GetMutable
<
f
::
LoDTensor
>
();
int
num
=
2
;
std
::
vector
<
float
>
init
;
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
for
(
int64_t
i
=
0
;
i
<
num
*
num
;
++
i
)
{
init
.
push_back
(
1.0
+
rank_id
);
}
PrintDebugInfo
(
"input data"
,
init
);
paddle
::
framework
::
TensorFromVector
(
init
,
ctx
,
tensor_x
);
tensor_x
->
Resize
({
num
,
num
});
ctx
.
Wait
();
auto
place
=
ctx
.
GetPlace
();
auto
out
=
scope
->
Var
(
"OutData"
);
auto
tensor_out
=
out
->
GetMutable
<
f
::
LoDTensor
>
();
tensor_out
->
Resize
({
num
,
num
});
tensor_out
->
mutable_data
<
float
>
(
place
);
// allocate
ctx
.
Wait
();
// run
f
::
AttributeMap
attrs
;
attrs
[
"tag"
]
=
std
::
string
(
"tagx"
);
attrs
[
"root"
]
=
0
;
attrs
[
"ring_id"
]
=
0
;
auto
op
=
f
::
OpRegistry
::
CreateOp
(
"c_broadcast"
,
{{
"X"
,
{
"Data"
}}},
{{
"Out"
,
{
"OutData"
}}},
attrs
);
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
op
->
Run
(
*
scope
,
place
);
}
ctx
.
Wait
();
std
::
vector
<
float
>
out_vec
;
paddle
::
framework
::
TensorToVector
(
*
tensor_out
,
ctx
,
&
out_vec
);
ctx
.
Wait
();
PrintDebugInfo
(
"output data"
,
out_vec
);
EXPECT_EQ
(
out_vec
.
size
(),
init
.
size
());
for
(
uint32_t
i
=
0
;
i
<
out_vec
.
size
();
i
++
)
{
EXPECT_EQ
(
out_vec
[
i
],
1.0
);
}
}
TEST
(
c_broadcast
,
NPU
)
{
f
::
Scope
scope
;
HcclRootInfo
hccl_id
;
// only support one device, if more than one device, use first default
p
::
NPUDeviceContext
ctx
(
p
::
NPUPlace
(
atoi
(
FLAGS_selected_npus
.
c_str
())));
PrepareUniqueId
(
&
scope
,
ctx
,
&
hccl_id
);
Prepare
(
&
scope
,
ctx
,
&
hccl_id
);
TestHCCLBroadcastOp
(
&
scope
,
ctx
);
}
paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc
浏览文件 @
846c7e70
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef _WIN32
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/collective/c_reduce_op.h"
#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace
f
=
paddle
::
framework
;
namespace
p
=
paddle
::
platform
;
USE_OP
(
c_reduce_sum
);
USE_NO_KERNEL_OP
(
c_gen_hccl_id
);
USE_NO_KERNEL_OP
(
c_comm_init_hccl
);
USE_OP_DEVICE_KERNEL
(
c_reduce_sum
,
NPU
);
DECLARE_string
(
selected_npus
);
template
<
typename
T
>
void
PrintDebugInfo
(
const
std
::
string
preStr
,
const
std
::
vector
<
T
>&
data
)
{
std
::
string
debugstring
=
""
;
for
(
auto
ele
:
data
)
{
debugstring
+=
std
::
to_string
(
ele
)
+
std
::
string
(
","
);
}
VLOG
(
3
)
<<
preStr
<<
":"
<<
std
::
endl
<<
debugstring
;
}
void
PrepareUniqueId
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
std
::
vector
<
int
>
rank_ids
{
0
,
1
};
f
::
AttributeMap
gen_hccl_id
;
std
::
vector
<
std
::
string
>
endpointList
=
{
"127.0.0.1:6175"
,
"127.0.0.1:6177"
};
gen_hccl_id
[
"rank"
]
=
rank_id
;
gen_hccl_id
[
"endpoint"
]
=
endpointList
[
rank_id
];
std
::
vector
<
std
::
string
>
other_endpoints
=
{
endpointList
[
rank_id
==
0
?
1
:
0
]};
gen_hccl_id
[
"other_endpoints"
]
=
other_endpoints
;
auto
out
=
scope
->
Var
(
"Out"
);
auto
id
=
out
->
GetMutable
<
HcclRootInfo
>
();
VLOG
(
3
)
<<
"break"
;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_gen_hccl_id"
,
{},
{{
"Out"
,
{
"Out"
}}},
gen_hccl_id
);
VLOG
(
3
)
<<
"break"
;
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
memcpy
(
hccl_id
,
id
,
1024
);
}
void
Prepare
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
auto
x
=
scope
->
Var
(
"X"
);
auto
id
=
x
->
GetMutable
<
HcclRootInfo
>
();
memcpy
(
id
,
hccl_id
,
1024
);
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
// std::vector<int> rank_ids{0, 1};
f
::
AttributeMap
comm_init_attrs
;
comm_init_attrs
[
"ring_id"
]
=
0
;
comm_init_attrs
[
"rank_ids"
]
=
2
;
comm_init_attrs
[
"rank"
]
=
rank_id
;
comm_init_attrs
[
"device_id"
]
=
device_id
;
// comm_init_attrs["rank_ids"] = rank_ids;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_comm_init_hccl"
,
{{
"X"
,
{
"X"
}}},
{},
comm_init_attrs
);
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
}
void
TestHCCLReduceOp
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
int
iter
)
{
// init
auto
x
=
scope
->
Var
(
"Data"
);
auto
tensor_x
=
x
->
GetMutable
<
f
::
LoDTensor
>
();
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
num1
=
3
;
int
num2
=
128
;
std
::
vector
<
float
>
init
;
for
(
int64_t
i
=
0
;
i
<
num1
*
num2
;
++
i
)
{
init
.
push_back
(
1.0
+
rank_id
);
}
PrintDebugInfo
(
"input data"
,
init
);
auto
place
=
ctx
.
GetPlace
();
paddle
::
framework
::
TensorFromVector
(
init
,
ctx
,
tensor_x
);
tensor_x
->
Resize
({
num1
,
num2
});
ctx
.
Wait
();
auto
out
=
scope
->
Var
(
"OutData"
);
auto
tensor_out
=
out
->
GetMutable
<
f
::
LoDTensor
>
();
tensor_out
->
Resize
({
num1
,
num2
});
tensor_out
->
mutable_data
<
float
>
(
place
);
// allocate
ctx
.
Wait
();
// run
f
::
AttributeMap
attrs
;
attrs
[
"tag"
]
=
std
::
string
(
"tagx_"
+
std
::
to_string
(
iter
));
attrs
[
"ring_id"
]
=
0
;
int
root_id
=
0
;
attrs
[
"root_id"
]
=
root_id
;
auto
op
=
f
::
OpRegistry
::
CreateOp
(
"c_reduce_sum"
,
{{
"X"
,
{
"Data"
}}},
{{
"Out"
,
{
"OutData"
}}},
attrs
);
op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
std
::
vector
<
float
>
out_vec
;
paddle
::
framework
::
TensorToVector
(
*
tensor_out
,
ctx
,
&
out_vec
);
ctx
.
Wait
();
PrintDebugInfo
(
"output data"
,
out_vec
);
EXPECT_EQ
(
out_vec
.
size
(),
init
.
size
());
for
(
uint32_t
i
=
0
;
i
<
out_vec
.
size
();
i
++
)
{
if
(
rank_id
==
root_id
)
{
EXPECT_EQ
(
out_vec
[
i
],
3.0
);
}
else
{
EXPECT_EQ
(
out_vec
[
i
],
init
[
i
]);
}
}
}
TEST
(
c_reduce_sum
,
NPU
)
{
f
::
Scope
scope
;
HcclRootInfo
hccl_id
;
// only support one device, if more than one device, use first default
p
::
NPUDeviceContext
ctx
(
p
::
NPUPlace
(
atoi
(
FLAGS_selected_npus
.
c_str
())));
PrepareUniqueId
(
&
scope
,
ctx
,
&
hccl_id
);
Prepare
(
&
scope
,
ctx
,
&
hccl_id
);
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
VLOG
(
2
)
<<
"iter num: "
<<
i
;
TestHCCLReduceOp
(
&
scope
,
ctx
,
i
);
}
}
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef _WIN32
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/collective/c_reduce_op.h"
#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace
f
=
paddle
::
framework
;
namespace
p
=
paddle
::
platform
;
USE_OP
(
c_reduce_sum
);
USE_NO_KERNEL_OP
(
c_gen_hccl_id
);
USE_NO_KERNEL_OP
(
c_comm_init_hccl
);
USE_OP_DEVICE_KERNEL
(
c_reduce_sum
,
NPU
);
DECLARE_string
(
selected_npus
);
template
<
typename
T
>
void
PrintDebugInfo
(
const
std
::
string
preStr
,
const
std
::
vector
<
T
>&
data
)
{
std
::
string
debugstring
=
""
;
for
(
auto
ele
:
data
)
{
debugstring
+=
std
::
to_string
(
ele
)
+
std
::
string
(
","
);
}
VLOG
(
3
)
<<
preStr
<<
":"
<<
std
::
endl
<<
debugstring
;
}
void
PrepareUniqueId
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
std
::
vector
<
int
>
rank_ids
{
0
,
1
};
f
::
AttributeMap
gen_hccl_id
;
std
::
vector
<
std
::
string
>
endpointList
=
{
"127.0.0.1:6175"
,
"127.0.0.1:6177"
};
gen_hccl_id
[
"rank"
]
=
rank_id
;
gen_hccl_id
[
"endpoint"
]
=
endpointList
[
rank_id
];
std
::
vector
<
std
::
string
>
other_endpoints
=
{
endpointList
[
rank_id
==
0
?
1
:
0
]};
gen_hccl_id
[
"other_endpoints"
]
=
other_endpoints
;
auto
out
=
scope
->
Var
(
"Out"
);
auto
id
=
out
->
GetMutable
<
HcclRootInfo
>
();
VLOG
(
3
)
<<
"break"
;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_gen_hccl_id"
,
{},
{{
"Out"
,
{
"Out"
}}},
gen_hccl_id
);
VLOG
(
3
)
<<
"break"
;
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
memcpy
(
hccl_id
,
id
,
1024
);
}
void
Prepare
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
auto
x
=
scope
->
Var
(
"X"
);
auto
id
=
x
->
GetMutable
<
HcclRootInfo
>
();
memcpy
(
id
,
hccl_id
,
1024
);
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
// std::vector<int> rank_ids{0, 1};
f
::
AttributeMap
comm_init_attrs
;
comm_init_attrs
[
"ring_id"
]
=
0
;
comm_init_attrs
[
"rank_ids"
]
=
2
;
comm_init_attrs
[
"rank"
]
=
rank_id
;
comm_init_attrs
[
"device_id"
]
=
device_id
;
// comm_init_attrs["rank_ids"] = rank_ids;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_comm_init_hccl"
,
{{
"X"
,
{
"X"
}}},
{},
comm_init_attrs
);
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
}
void
TestHCCLReduceOp
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
int
iter
)
{
// init
auto
x
=
scope
->
Var
(
"Data"
);
auto
tensor_x
=
x
->
GetMutable
<
f
::
LoDTensor
>
();
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
num1
=
3
;
int
num2
=
128
;
std
::
vector
<
float
>
init
;
for
(
int64_t
i
=
0
;
i
<
num1
*
num2
;
++
i
)
{
init
.
push_back
(
1.0
+
rank_id
);
}
PrintDebugInfo
(
"input data"
,
init
);
auto
place
=
ctx
.
GetPlace
();
paddle
::
framework
::
TensorFromVector
(
init
,
ctx
,
tensor_x
);
tensor_x
->
Resize
({
num1
,
num2
});
ctx
.
Wait
();
auto
out
=
scope
->
Var
(
"OutData"
);
auto
tensor_out
=
out
->
GetMutable
<
f
::
LoDTensor
>
();
tensor_out
->
Resize
({
num1
,
num2
});
tensor_out
->
mutable_data
<
float
>
(
place
);
// allocate
ctx
.
Wait
();
// run
f
::
AttributeMap
attrs
;
attrs
[
"tag"
]
=
std
::
string
(
"tagx_"
+
std
::
to_string
(
iter
));
attrs
[
"ring_id"
]
=
0
;
int
root_id
=
0
;
attrs
[
"root_id"
]
=
root_id
;
auto
op
=
f
::
OpRegistry
::
CreateOp
(
"c_reduce_sum"
,
{{
"X"
,
{
"Data"
}}},
{{
"Out"
,
{
"OutData"
}}},
attrs
);
op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
std
::
vector
<
float
>
out_vec
;
paddle
::
framework
::
TensorToVector
(
*
tensor_out
,
ctx
,
&
out_vec
);
ctx
.
Wait
();
PrintDebugInfo
(
"output data"
,
out_vec
);
EXPECT_EQ
(
out_vec
.
size
(),
init
.
size
());
for
(
uint32_t
i
=
0
;
i
<
out_vec
.
size
();
i
++
)
{
if
(
rank_id
==
root_id
)
{
EXPECT_EQ
(
out_vec
[
i
],
3.0
);
}
else
{
EXPECT_EQ
(
out_vec
[
i
],
init
[
i
]);
}
}
}
TEST
(
c_reduce_sum
,
NPU
)
{
f
::
Scope
scope
;
HcclRootInfo
hccl_id
;
// only support one device, if more than one device, use first default
p
::
NPUDeviceContext
ctx
(
p
::
NPUPlace
(
atoi
(
FLAGS_selected_npus
.
c_str
())));
PrepareUniqueId
(
&
scope
,
ctx
,
&
hccl_id
);
Prepare
(
&
scope
,
ctx
,
&
hccl_id
);
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
VLOG
(
2
)
<<
"iter num: "
<<
i
;
TestHCCLReduceOp
(
&
scope
,
ctx
,
i
);
}
}
paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc
浏览文件 @
846c7e70
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef _WIN32
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/collective/c_allgather_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_reducescatter_op.h"
#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace
f
=
paddle
::
framework
;
namespace
p
=
paddle
::
platform
;
USE_OP
(
c_reducescatter
);
USE_NO_KERNEL_OP
(
c_gen_hccl_id
);
USE_NO_KERNEL_OP
(
c_comm_init_hccl
);
USE_OP_DEVICE_KERNEL
(
c_reducescatter
,
NPU
);
DECLARE_string
(
selected_npus
);
template
<
typename
T
>
void
PrintDebugInfo
(
const
std
::
string
preStr
,
const
std
::
vector
<
T
>&
data
)
{
std
::
string
debugstring
=
""
;
for
(
auto
ele
:
data
)
{
debugstring
+=
std
::
to_string
(
ele
)
+
std
::
string
(
","
);
}
VLOG
(
2
)
<<
preStr
<<
":"
<<
std
::
endl
<<
debugstring
;
}
void
PrepareUniqueId
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
std
::
vector
<
int
>
rank_ids
{
0
,
1
};
f
::
AttributeMap
gen_hccl_id
;
std
::
vector
<
std
::
string
>
endpointList
=
{
"127.0.0.1:6175"
,
"127.0.0.1:6177"
};
gen_hccl_id
[
"rank"
]
=
rank_id
;
gen_hccl_id
[
"endpoint"
]
=
endpointList
[
rank_id
];
std
::
vector
<
std
::
string
>
other_endpoints
=
{
endpointList
[
rank_id
==
0
?
1
:
0
]};
gen_hccl_id
[
"other_endpoints"
]
=
other_endpoints
;
auto
out
=
scope
->
Var
(
"Out"
);
auto
id
=
out
->
GetMutable
<
HcclRootInfo
>
();
VLOG
(
3
)
<<
"break"
;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_gen_hccl_id"
,
{},
{{
"Out"
,
{
"Out"
}}},
gen_hccl_id
);
VLOG
(
3
)
<<
"break"
;
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
memcpy
(
hccl_id
,
id
,
1024
);
}
void
Prepare
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
auto
x
=
scope
->
Var
(
"X"
);
auto
id
=
x
->
GetMutable
<
HcclRootInfo
>
();
memcpy
(
id
,
hccl_id
,
1024
);
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
// std::vector<int> rank_ids{0, 1};
f
::
AttributeMap
comm_init_attrs
;
comm_init_attrs
[
"ring_id"
]
=
0
;
comm_init_attrs
[
"rank_ids"
]
=
2
;
comm_init_attrs
[
"rank"
]
=
rank_id
;
comm_init_attrs
[
"device_id"
]
=
device_id
;
// comm_init_attrs["rank_ids"] = rank_ids;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_comm_init_hccl"
,
{{
"X"
,
{
"X"
}}},
{},
comm_init_attrs
);
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
}
void
TestHCCLReduceScatterOp
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
)
{
// init
auto
x
=
scope
->
Var
(
"Data"
);
auto
tensor_x
=
x
->
GetMutable
<
f
::
LoDTensor
>
();
std
::
vector
<
float
>
init
;
int
num1
=
4
;
int
num2
=
1
;
for
(
int64_t
i
=
0
;
i
<
num1
*
num2
;
++
i
)
{
init
.
push_back
(
1.0
);
}
PrintDebugInfo
(
"input data"
,
init
);
paddle
::
framework
::
TensorFromVector
(
init
,
ctx
,
tensor_x
);
tensor_x
->
Resize
({
num1
,
num2
});
ctx
.
Wait
();
auto
place
=
ctx
.
GetPlace
();
auto
out
=
scope
->
Var
(
"OutData"
);
auto
tensor_out
=
out
->
GetMutable
<
f
::
LoDTensor
>
();
tensor_out
->
Resize
({
num1
,
num2
});
tensor_out
->
mutable_data
<
float
>
(
place
);
// allocate
ctx
.
Wait
();
// run
f
::
AttributeMap
attrs
;
attrs
[
"tag"
]
=
std
::
string
(
"tagx"
);
attrs
[
"ring_id"
]
=
0
;
attrs
[
"nranks"
]
=
2
;
auto
op
=
f
::
OpRegistry
::
CreateOp
(
"c_reducescatter"
,
{{
"X"
,
{
"Data"
}}},
{{
"Out"
,
{
"OutData"
}}},
attrs
);
int
iter_num
=
10
;
for
(
int
i
=
0
;
i
<
iter_num
;
i
++
)
{
op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
}
std
::
vector
<
float
>
out_vec
;
paddle
::
framework
::
TensorToVector
(
*
tensor_out
,
ctx
,
&
out_vec
);
ctx
.
Wait
();
PrintDebugInfo
(
"output data"
,
out_vec
);
EXPECT_EQ
(
out_vec
.
size
(),
init
.
size
()
/
2
);
for
(
uint32_t
i
=
0
;
i
<
out_vec
.
size
();
i
++
)
{
EXPECT_EQ
(
out_vec
[
i
],
2.0
);
}
}
TEST
(
c_reducescatter
,
NPU
)
{
f
::
Scope
scope
;
HcclRootInfo
hccl_id
;
// only support one device, if more than one device, use first default
p
::
NPUDeviceContext
ctx
(
p
::
NPUPlace
(
atoi
(
FLAGS_selected_npus
.
c_str
())));
PrepareUniqueId
(
&
scope
,
ctx
,
&
hccl_id
);
Prepare
(
&
scope
,
ctx
,
&
hccl_id
);
TestHCCLReduceScatterOp
(
&
scope
,
ctx
);
}
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef _WIN32
#include <unistd.h>
#endif
#include <stdio.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/collective/c_allgather_op.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
#include "paddle/fluid/operators/collective/c_reducescatter_op.h"
#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace
f
=
paddle
::
framework
;
namespace
p
=
paddle
::
platform
;
USE_OP
(
c_reducescatter
);
USE_NO_KERNEL_OP
(
c_gen_hccl_id
);
USE_NO_KERNEL_OP
(
c_comm_init_hccl
);
USE_OP_DEVICE_KERNEL
(
c_reducescatter
,
NPU
);
DECLARE_string
(
selected_npus
);
template
<
typename
T
>
void
PrintDebugInfo
(
const
std
::
string
preStr
,
const
std
::
vector
<
T
>&
data
)
{
std
::
string
debugstring
=
""
;
for
(
auto
ele
:
data
)
{
debugstring
+=
std
::
to_string
(
ele
)
+
std
::
string
(
","
);
}
VLOG
(
2
)
<<
preStr
<<
":"
<<
std
::
endl
<<
debugstring
;
}
void
PrepareUniqueId
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
std
::
vector
<
int
>
rank_ids
{
0
,
1
};
f
::
AttributeMap
gen_hccl_id
;
std
::
vector
<
std
::
string
>
endpointList
=
{
"127.0.0.1:6175"
,
"127.0.0.1:6177"
};
gen_hccl_id
[
"rank"
]
=
rank_id
;
gen_hccl_id
[
"endpoint"
]
=
endpointList
[
rank_id
];
std
::
vector
<
std
::
string
>
other_endpoints
=
{
endpointList
[
rank_id
==
0
?
1
:
0
]};
gen_hccl_id
[
"other_endpoints"
]
=
other_endpoints
;
auto
out
=
scope
->
Var
(
"Out"
);
auto
id
=
out
->
GetMutable
<
HcclRootInfo
>
();
VLOG
(
3
)
<<
"break"
;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_gen_hccl_id"
,
{},
{{
"Out"
,
{
"Out"
}}},
gen_hccl_id
);
VLOG
(
3
)
<<
"break"
;
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
memcpy
(
hccl_id
,
id
,
1024
);
}
void
Prepare
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
,
HcclRootInfo
*
hccl_id
)
{
auto
x
=
scope
->
Var
(
"X"
);
auto
id
=
x
->
GetMutable
<
HcclRootInfo
>
();
memcpy
(
id
,
hccl_id
,
1024
);
int
rank_id
=
atoi
(
getenv
(
"RANK_ID"
));
int
device_id
=
atoi
(
getenv
(
"DEVICE_ID"
));
VLOG
(
2
)
<<
"rank_id = "
<<
rank_id
<<
"; device_id = "
<<
device_id
<<
"; rank_id = "
<<
rank_id
<<
"; RANK_TABLE_FILE = "
<<
atoi
(
getenv
(
"DEVICE_ID"
));
// std::vector<int> rank_ids{0, 1};
f
::
AttributeMap
comm_init_attrs
;
comm_init_attrs
[
"ring_id"
]
=
0
;
comm_init_attrs
[
"rank_ids"
]
=
2
;
comm_init_attrs
[
"rank"
]
=
rank_id
;
comm_init_attrs
[
"device_id"
]
=
device_id
;
// comm_init_attrs["rank_ids"] = rank_ids;
auto
comm_init_op
=
f
::
OpRegistry
::
CreateOp
(
"c_comm_init_hccl"
,
{{
"X"
,
{
"X"
}}},
{},
comm_init_attrs
);
auto
place
=
ctx
.
GetPlace
();
comm_init_op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
}
void
TestHCCLReduceScatterOp
(
f
::
Scope
*
scope
,
const
p
::
DeviceContext
&
ctx
)
{
// init
auto
x
=
scope
->
Var
(
"Data"
);
auto
tensor_x
=
x
->
GetMutable
<
f
::
LoDTensor
>
();
std
::
vector
<
float
>
init
;
int
num1
=
4
;
int
num2
=
1
;
for
(
int64_t
i
=
0
;
i
<
num1
*
num2
;
++
i
)
{
init
.
push_back
(
1.0
);
}
PrintDebugInfo
(
"input data"
,
init
);
paddle
::
framework
::
TensorFromVector
(
init
,
ctx
,
tensor_x
);
tensor_x
->
Resize
({
num1
,
num2
});
ctx
.
Wait
();
auto
place
=
ctx
.
GetPlace
();
auto
out
=
scope
->
Var
(
"OutData"
);
auto
tensor_out
=
out
->
GetMutable
<
f
::
LoDTensor
>
();
tensor_out
->
Resize
({
num1
,
num2
});
tensor_out
->
mutable_data
<
float
>
(
place
);
// allocate
ctx
.
Wait
();
// run
f
::
AttributeMap
attrs
;
attrs
[
"tag"
]
=
std
::
string
(
"tagx"
);
attrs
[
"ring_id"
]
=
0
;
attrs
[
"nranks"
]
=
2
;
auto
op
=
f
::
OpRegistry
::
CreateOp
(
"c_reducescatter"
,
{{
"X"
,
{
"Data"
}}},
{{
"Out"
,
{
"OutData"
}}},
attrs
);
int
iter_num
=
10
;
for
(
int
i
=
0
;
i
<
iter_num
;
i
++
)
{
op
->
Run
(
*
scope
,
place
);
ctx
.
Wait
();
}
std
::
vector
<
float
>
out_vec
;
paddle
::
framework
::
TensorToVector
(
*
tensor_out
,
ctx
,
&
out_vec
);
ctx
.
Wait
();
PrintDebugInfo
(
"output data"
,
out_vec
);
EXPECT_EQ
(
out_vec
.
size
(),
init
.
size
()
/
2
);
for
(
uint32_t
i
=
0
;
i
<
out_vec
.
size
();
i
++
)
{
EXPECT_EQ
(
out_vec
[
i
],
2.0
);
}
}
TEST
(
c_reducescatter
,
NPU
)
{
f
::
Scope
scope
;
HcclRootInfo
hccl_id
;
// only support one device, if more than one device, use first default
p
::
NPUDeviceContext
ctx
(
p
::
NPUPlace
(
atoi
(
FLAGS_selected_npus
.
c_str
())));
PrepareUniqueId
(
&
scope
,
ctx
,
&
hccl_id
);
Prepare
(
&
scope
,
ctx
,
&
hccl_id
);
TestHCCLReduceScatterOp
(
&
scope
,
ctx
);
}
paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc
浏览文件 @
846c7e70
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
namespace
paddle
{
namespace
operators
{
using
framework
::
Tensor
;
template
<
typename
T
>
class
FillConstantMKLDNNHandler
:
public
platform
::
MKLDNNHandlerNoCachingT
<
T
,
dnnl
::
binary
>
{
public:
FillConstantMKLDNNHandler
(
Tensor
*
out
,
dnnl
::
engine
engine
,
platform
::
Place
cpu_place
)
:
platform
::
MKLDNNHandlerNoCachingT
<
T
,
dnnl
::
binary
>
(
engine
,
cpu_place
)
{
const
auto
src0_md
=
dnnl
::
memory
::
desc
({
out
->
numel
(),
sizeof
(
T
)},
platform
::
MKLDNNGetDataType
<
uint8_t
>
(),
dnnl
::
memory
::
format_tag
::
ab
);
dnnl
::
primitive_attr
attrs
;
attrs
.
set_scales
(
DNNL_ARG_SRC_0
,
/* mask = */
0
,
{
0.0
f
});
this
->
AcquireForwardPrimitiveDescriptor
(
attrs
,
dnnl
::
algorithm
::
binary_add
,
src0_md
,
src1_md
,
src0_md
);
}
static
const
dnnl
::
memory
::
desc
src1_md
;
};
template
<
typename
T
>
const
dnnl
::
memory
::
desc
FillConstantMKLDNNHandler
<
T
>::
src1_md
(
{
1
,
sizeof
(
T
)},
platform
::
MKLDNNGetDataType
<
uint8_t
>
(),
dnnl
::
memory
::
format_tag
::
ab
);
template
<
typename
T
>
class
FillConstantMKLDNNKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
this
->
RunKernel
(
ctx
);
}
void
RunKernel
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
const
auto
&
dnnl_engine
=
dev_ctx
.
GetEngine
();
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
T
fill_value
=
CalculateFillValue
(
ctx
);
auto
shape
=
GetShape
(
ctx
);
out
->
Resize
(
shape
);
FillConstantMKLDNNHandler
<
T
>
handler
(
out
,
dnnl_engine
,
ctx
.
GetPlace
());
dnnl
::
memory
constant_value_memory
=
dnnl
::
memory
(
FillConstantMKLDNNHandler
<
T
>::
src1_md
,
dnnl_engine
,
reinterpret_cast
<
uint8_t
*>
(
&
fill_value
));
auto
src0_memory_p
=
handler
.
AcquireDstMemory
(
out
);
auto
fill_constant_p
=
handler
.
AcquireForwardPrimitive
();
auto
&
astream
=
platform
::
MKLDNNDeviceContext
::
tls
().
get_stream
();
fill_constant_p
->
execute
(
astream
,
{{
DNNL_ARG_SRC_0
,
*
src0_memory_p
},
{
DNNL_ARG_SRC_1
,
constant_value_memory
},
{
DNNL_ARG_DST
,
*
src0_memory_p
}});
astream
.
wait
();
// src0_memory_p's md was just to allow the usage of a binary
// primitive as a memset, and now we need to create a real one
out
->
set_mem_desc
({
phi
::
vectorize
(
shape
),
platform
::
MKLDNNGetDataType
<
T
>
(),
platform
::
GetPlainMKLDNNFormat
(
shape
.
size
())});
}
T
CalculateFillValue
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
const
auto
str_value
=
ctx
.
Attr
<
std
::
string
>
(
"str_value"
);
const
auto
float_value
=
ctx
.
Attr
<
float
>
(
"value"
);
T
value
;
if
(
str_value
.
empty
())
{
value
=
static_cast
<
T
>
(
float_value
);
}
else
{
// handle NaN/Inf first, which cannot be read from stream
if
(
str_value
==
"inf"
)
{
value
=
static_cast
<
T
>
(
std
::
numeric_limits
<
float
>::
infinity
());
}
else
if
(
str_value
==
"-inf"
)
{
value
=
static_cast
<
T
>
(
-
std
::
numeric_limits
<
float
>::
infinity
());
}
else
if
(
str_value
==
"nan"
)
{
value
=
static_cast
<
T
>
(
std
::
numeric_limits
<
float
>::
quiet_NaN
());
}
else
{
std
::
stringstream
convert_stream
(
str_value
);
double
tmp_value
;
convert_stream
>>
tmp_value
;
value
=
static_cast
<
T
>
(
tmp_value
);
}
}
if
(
ctx
.
HasInput
(
"ValueTensor"
))
{
const
auto
*
value_tensor
=
ctx
.
Input
<
Tensor
>
(
"ValueTensor"
);
PADDLE_ENFORCE_EQ
(
value_tensor
->
numel
(),
1
,
platform
::
errors
::
InvalidArgument
(
"When use Tensor as value to set Tensor value in fill_constant, "
"value input(ValueTensor) size must be 1, but got %d"
,
value_tensor
->
numel
()));
value
=
value_tensor
->
data
<
T
>
()[
0
];
}
return
value
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_KERNEL
(
fill_constant
,
MKLDNN
,
paddle
::
platform
::
CPUPlace
,
ops
::
FillConstantMKLDNNKernel
<
float
>
);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
namespace
paddle
{
namespace
operators
{
using
framework
::
Tensor
;
template
<
typename
T
>
class
FillConstantMKLDNNHandler
:
public
platform
::
MKLDNNHandlerNoCachingT
<
T
,
dnnl
::
binary
>
{
public:
FillConstantMKLDNNHandler
(
Tensor
*
out
,
dnnl
::
engine
engine
,
platform
::
Place
cpu_place
)
:
platform
::
MKLDNNHandlerNoCachingT
<
T
,
dnnl
::
binary
>
(
engine
,
cpu_place
)
{
const
auto
src0_md
=
dnnl
::
memory
::
desc
({
out
->
numel
(),
sizeof
(
T
)},
platform
::
MKLDNNGetDataType
<
uint8_t
>
(),
dnnl
::
memory
::
format_tag
::
ab
);
dnnl
::
primitive_attr
attrs
;
attrs
.
set_scales
(
DNNL_ARG_SRC_0
,
/* mask = */
0
,
{
0.0
f
});
this
->
AcquireForwardPrimitiveDescriptor
(
attrs
,
dnnl
::
algorithm
::
binary_add
,
src0_md
,
src1_md
,
src0_md
);
}
static
const
dnnl
::
memory
::
desc
src1_md
;
};
template
<
typename
T
>
const
dnnl
::
memory
::
desc
FillConstantMKLDNNHandler
<
T
>::
src1_md
(
{
1
,
sizeof
(
T
)},
platform
::
MKLDNNGetDataType
<
uint8_t
>
(),
dnnl
::
memory
::
format_tag
::
ab
);
template
<
typename
T
>
class
FillConstantMKLDNNKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
this
->
RunKernel
(
ctx
);
}
void
RunKernel
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
const
auto
&
dnnl_engine
=
dev_ctx
.
GetEngine
();
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
T
fill_value
=
CalculateFillValue
(
ctx
);
auto
shape
=
GetShape
(
ctx
);
out
->
Resize
(
shape
);
FillConstantMKLDNNHandler
<
T
>
handler
(
out
,
dnnl_engine
,
ctx
.
GetPlace
());
dnnl
::
memory
constant_value_memory
=
dnnl
::
memory
(
FillConstantMKLDNNHandler
<
T
>::
src1_md
,
dnnl_engine
,
reinterpret_cast
<
uint8_t
*>
(
&
fill_value
));
auto
src0_memory_p
=
handler
.
AcquireDstMemory
(
out
);
auto
fill_constant_p
=
handler
.
AcquireForwardPrimitive
();
auto
&
astream
=
platform
::
MKLDNNDeviceContext
::
tls
().
get_stream
();
fill_constant_p
->
execute
(
astream
,
{{
DNNL_ARG_SRC_0
,
*
src0_memory_p
},
{
DNNL_ARG_SRC_1
,
constant_value_memory
},
{
DNNL_ARG_DST
,
*
src0_memory_p
}});
astream
.
wait
();
// src0_memory_p's md was just to allow the usage of a binary
// primitive as a memset, and now we need to create a real one
out
->
set_mem_desc
({
phi
::
vectorize
(
shape
),
platform
::
MKLDNNGetDataType
<
T
>
(),
platform
::
GetPlainMKLDNNFormat
(
shape
.
size
())});
}
T
CalculateFillValue
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
const
auto
str_value
=
ctx
.
Attr
<
std
::
string
>
(
"str_value"
);
const
auto
float_value
=
ctx
.
Attr
<
float
>
(
"value"
);
T
value
;
if
(
str_value
.
empty
())
{
value
=
static_cast
<
T
>
(
float_value
);
}
else
{
// handle NaN/Inf first, which cannot be read from stream
if
(
str_value
==
"inf"
)
{
value
=
static_cast
<
T
>
(
std
::
numeric_limits
<
float
>::
infinity
());
}
else
if
(
str_value
==
"-inf"
)
{
value
=
static_cast
<
T
>
(
-
std
::
numeric_limits
<
float
>::
infinity
());
}
else
if
(
str_value
==
"nan"
)
{
value
=
static_cast
<
T
>
(
std
::
numeric_limits
<
float
>::
quiet_NaN
());
}
else
{
std
::
stringstream
convert_stream
(
str_value
);
double
tmp_value
;
convert_stream
>>
tmp_value
;
value
=
static_cast
<
T
>
(
tmp_value
);
}
}
if
(
ctx
.
HasInput
(
"ValueTensor"
))
{
const
auto
*
value_tensor
=
ctx
.
Input
<
Tensor
>
(
"ValueTensor"
);
PADDLE_ENFORCE_EQ
(
value_tensor
->
numel
(),
1
,
platform
::
errors
::
InvalidArgument
(
"When use Tensor as value to set Tensor value in fill_constant, "
"value input(ValueTensor) size must be 1, but got %d"
,
value_tensor
->
numel
()));
value
=
value_tensor
->
data
<
T
>
()[
0
];
}
return
value
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_KERNEL
(
fill_constant
,
MKLDNN
,
paddle
::
platform
::
CPUPlace
,
ops
::
FillConstantMKLDNNKernel
<
float
>
);
paddle/fluid/operators/unbind_op.cc
浏览文件 @
846c7e70
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/unbind_op.h"
#include <string>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/unary.h"
namespace
paddle
{
namespace
operators
{
using
framework
::
Tensor
;
class
UnbindOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"X"
),
true
,
platform
::
errors
::
NotFound
(
"Input(X) of UnbindOp is not found."
));
PADDLE_ENFORCE_GE
(
ctx
->
Outputs
(
"Out"
).
size
(),
1UL
,
platform
::
errors
::
NotFound
(
"Outputs(Out) of UnbindOp is not found."
));
auto
in_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
outs_names
=
ctx
->
Outputs
(
"Out"
);
int
axis
=
ctx
->
Attrs
().
Get
<
int
>
(
"axis"
);
const
size_t
outs_number
=
outs_names
.
size
();
auto
out_dims
=
UnbindOutsDims
(
in_dims
,
axis
);
std
::
vector
<
framework
::
DDim
>
outs_dims
(
outs_number
,
out_dims
);
ctx
->
SetOutputsDim
(
"Out"
,
outs_dims
);
for
(
size_t
i
=
0
;
i
<
outs_number
;
++
i
)
{
ctx
->
ShareLoD
(
"X"
,
"Out"
,
0
,
i
);
}
}
};
class
UnbindOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(Tensor) Input tensor of the split operator."
);
AddOutput
(
"Out"
,
"(Tensor) Output tensors of the unbind operator."
)
.
AsDuplicable
();
AddComment
(
R"DOC(
Unbind operator
Remove a tensor dimension.
Example:
Input = [[1,2],
[3,4],
[5,6]]
axis = 0
Output[0] = [1,2]
Output[1] = [3,4]
Output[2] = [5,6]
)DOC"
);
AddAttr
<
int
>
(
"axis"
,
"(int, default 0) "
"dimension to remove."
)
.
SetDefault
(
0
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
unbind
,
ops
::
UnbindOp
,
ops
::
UnbindOpMaker
,
ops
::
UnbindGradMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
UnbindGradMaker
<
paddle
::
imperative
::
OpBase
>
);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/unbind_op.h"
#include <string>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/unary.h"
namespace
paddle
{
namespace
operators
{
using
framework
::
Tensor
;
class
UnbindOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"X"
),
true
,
platform
::
errors
::
NotFound
(
"Input(X) of UnbindOp is not found."
));
PADDLE_ENFORCE_GE
(
ctx
->
Outputs
(
"Out"
).
size
(),
1UL
,
platform
::
errors
::
NotFound
(
"Outputs(Out) of UnbindOp is not found."
));
auto
in_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
outs_names
=
ctx
->
Outputs
(
"Out"
);
int
axis
=
ctx
->
Attrs
().
Get
<
int
>
(
"axis"
);
const
size_t
outs_number
=
outs_names
.
size
();
auto
out_dims
=
UnbindOutsDims
(
in_dims
,
axis
);
std
::
vector
<
framework
::
DDim
>
outs_dims
(
outs_number
,
out_dims
);
ctx
->
SetOutputsDim
(
"Out"
,
outs_dims
);
for
(
size_t
i
=
0
;
i
<
outs_number
;
++
i
)
{
ctx
->
ShareLoD
(
"X"
,
"Out"
,
0
,
i
);
}
}
};
class
UnbindOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(Tensor) Input tensor of the split operator."
);
AddOutput
(
"Out"
,
"(Tensor) Output tensors of the unbind operator."
)
.
AsDuplicable
();
AddComment
(
R"DOC(
Unbind operator
Remove a tensor dimension.
Example:
Input = [[1,2],
[3,4],
[5,6]]
axis = 0
Output[0] = [1,2]
Output[1] = [3,4]
Output[2] = [5,6]
)DOC"
);
AddAttr
<
int
>
(
"axis"
,
"(int, default 0) "
"dimension to remove."
)
.
SetDefault
(
0
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
unbind
,
ops
::
UnbindOp
,
ops
::
UnbindOpMaker
,
ops
::
UnbindGradMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
UnbindGradMaker
<
paddle
::
imperative
::
OpBase
>
);
paddle/fluid/operators/unbind_op.h
浏览文件 @
846c7e70
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <chrono> // NOLINT
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/operators/utils.h"
namespace
paddle
{
namespace
operators
{
static
inline
framework
::
DDim
UnbindOutsDims
(
const
framework
::
DDim
in_dims
,
int
axis
)
{
std
::
vector
<
int
>
out_dims
;
axis
=
axis
<
0
?
in_dims
.
size
()
+
axis
:
axis
;
for
(
int
i
=
0
;
i
<
in_dims
.
size
();
i
++
)
{
if
(
i
!=
axis
)
out_dims
.
push_back
(
in_dims
[
i
]);
}
return
phi
::
make_ddim
(
out_dims
);
}
template
<
typename
T
>
class
UnbindGradMaker
:
public
framework
::
SingleGradOpMaker
<
T
>
{
public:
using
framework
::
SingleGradOpMaker
<
T
>::
SingleGradOpMaker
;
protected:
void
Apply
(
GradOpPtr
<
T
>
op
)
const
override
{
op
->
SetType
(
"stack"
);
op
->
SetInput
(
"X"
,
this
->
OutputGrad
(
"Out"
));
op
->
SetOutput
(
"Y"
,
this
->
InputGrad
(
"X"
));
op
->
SetAttrMap
(
this
->
Attrs
());
}
};
}
// namespace operators
}
// namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <chrono> // NOLINT
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/operators/utils.h"
namespace
paddle
{
namespace
operators
{
static
inline
framework
::
DDim
UnbindOutsDims
(
const
framework
::
DDim
in_dims
,
int
axis
)
{
std
::
vector
<
int
>
out_dims
;
axis
=
axis
<
0
?
in_dims
.
size
()
+
axis
:
axis
;
for
(
int
i
=
0
;
i
<
in_dims
.
size
();
i
++
)
{
if
(
i
!=
axis
)
out_dims
.
push_back
(
in_dims
[
i
]);
}
return
phi
::
make_ddim
(
out_dims
);
}
template
<
typename
T
>
class
UnbindGradMaker
:
public
framework
::
SingleGradOpMaker
<
T
>
{
public:
using
framework
::
SingleGradOpMaker
<
T
>::
SingleGradOpMaker
;
protected:
void
Apply
(
GradOpPtr
<
T
>
op
)
const
override
{
op
->
SetType
(
"stack"
);
op
->
SetInput
(
"X"
,
this
->
OutputGrad
(
"Out"
));
op
->
SetOutput
(
"Y"
,
this
->
InputGrad
(
"X"
));
op
->
SetAttrMap
(
this
->
Attrs
());
}
};
}
// namespace operators
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录