提交 4059c9ca 编写于 作者: Y Yang Yu

Polish GetPlacesOp

上级 fe341bac
...@@ -39,17 +39,19 @@ class GetPlacesOp : public framework::OperatorBase { ...@@ -39,17 +39,19 @@ class GetPlacesOp : public framework::OperatorBase {
: OperatorBase(type, inputs, outputs, attrs) {} : OperatorBase(type, inputs, outputs, attrs) {}
void Run(const framework::Scope &scope, void Run(const framework::Scope &scope,
const platform::Place &place) const override { const platform::Place &place) const override {
std::string device_type = Attr<std::string>("device_type"); bool is_gpu;
if (Attr<std::string>("device_type") == "AUTO") {
is_gpu = platform::is_gpu_place(place);
} else {
is_gpu = Attr<std::string>("device_type") == "CUDA";
}
auto device_count = static_cast<size_t>(Attr<int>("device_count")); auto device_count = static_cast<size_t>(Attr<int>("device_count"));
if (device_count == 0) { if (device_count == 0) {
if (device_type == "CUDA") { device_count =
device_count = CUDADevCount(); is_gpu ? CUDADevCount() : std::thread::hardware_concurrency();
} else if (device_type == "CPU") {
device_count = std::thread::hardware_concurrency();
}
} }
PADDLE_ENFORCE_NE(device_count, 0, "Cannot indicate %s device count", PADDLE_ENFORCE_NE(device_count, 0, "Cannot indicate %s device count",
device_type); is_gpu ? "GPU" : "CPU");
auto out_var_name = Output("Out"); auto out_var_name = Output("Out");
auto &places = auto &places =
...@@ -57,14 +59,14 @@ class GetPlacesOp : public framework::OperatorBase { ...@@ -57,14 +59,14 @@ class GetPlacesOp : public framework::OperatorBase {
"Output variable %s cannot be found", out_var_name) "Output variable %s cannot be found", out_var_name)
.GetMutable<platform::PlaceList>()); .GetMutable<platform::PlaceList>());
places.reserve(device_count); places.reserve(device_count);
if (device_type == "CUDA") { if (is_gpu) {
PADDLE_ENFORCE_LE(device_count, CUDADevCount(), PADDLE_ENFORCE_LE(device_count, CUDADevCount(),
"Only %d CUDA devices found, cannot set to %d", "Only %d CUDA devices found, cannot set to %d",
CUDADevCount(), device_count); CUDADevCount(), device_count);
for (size_t i = 0; i < device_count; ++i) { for (size_t i = 0; i < device_count; ++i) {
places.emplace_back(platform::CUDAPlace(i)); places.emplace_back(platform::CUDAPlace(static_cast<int>(i)));
} }
} else if (device_type == "CPU") { } else {
for (size_t i = 0; i < device_count; ++i) { for (size_t i = 0; i < device_count; ++i) {
places.emplace_back(platform::CPUPlace()); places.emplace_back(platform::CPUPlace());
} }
...@@ -77,10 +79,10 @@ class GetPlacesOpProtoMaker : public framework::OpProtoAndCheckerMaker { ...@@ -77,10 +79,10 @@ class GetPlacesOpProtoMaker : public framework::OpProtoAndCheckerMaker {
GetPlacesOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker) GetPlacesOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddOutput("Out", "vector of Place"); AddOutput("Out", "vector of Place");
AddAttr<int>("device_count", "device count").SetDefault(1); AddAttr<int>("device_count", "device count").SetDefault(0);
AddAttr<std::string>("device_type", AddAttr<std::string>("device_type", "device type")
R"(device type must be in ["CPU", "CUDA"])") .InEnum({"CUDA", "CPU", "AUTO"})
.InEnum({"CPU", "CUDA"}); .SetDefault("AUTO");
AddComment(R"DOC( AddComment(R"DOC(
Returns a list of places based on flags. The list will be used for parallel Returns a list of places based on flags. The list will be used for parallel
execution. execution.
...@@ -111,4 +113,5 @@ class GetPlacesInferShape : public framework::InferShapeBase { ...@@ -111,4 +113,5 @@ class GetPlacesInferShape : public framework::InferShapeBase {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(get_places, ops::GetPlacesOp, ops::GetPlacesOpProtoMaker, REGISTER_OPERATOR(get_places, ops::GetPlacesOp, ops::GetPlacesOpProtoMaker,
ops::GetPlacesInferVarType, ops::GetPlacesInferShape); ops::GetPlacesInferVarType, ops::GetPlacesInferShape,
paddle::framework::EmptyGradOpMaker);
...@@ -4,19 +4,22 @@ All util layers. ...@@ -4,19 +4,22 @@ All util layers.
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
from ..framework import unique_name from ..framework import unique_name
from ..registry import autodoc
__all__ = ['get_places'] __all__ = ['get_places']
def get_places(device_count=0, device_type="CPU"): @autodoc
def get_places(device_count=None, device_type=None):
helper = LayerHelper('get_places', **locals()) helper = LayerHelper('get_places', **locals())
out_places = helper.create_variable(name=unique_name(helper.name + ".out")) out_places = helper.create_variable(name=unique_name(helper.name + ".out"))
attrs = dict()
if device_count is not None:
attrs['device_count'] = int(device_count)
if device_type is not None:
attrs['device_type'] = str(device_type)
helper.append_op( helper.append_op(
type='get_places', type='get_places', outputs={"Out": [out_places]}, attrs=attrs)
outputs={"Out": [out_places]},
attrs={
"device_type": device_type,
'device_count': device_count,
})
return out_places return out_places
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册