pybind.cc 18.0 KB
Newer Older
1 2 3 4 5 6
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

7
http://www.apache.org/licenses/LICENSE-2.0
8 9 10 11 12 13 14

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Q
qijun 已提交
15 16
#include "paddle/pybind/protobuf.h"

Q
QI JUN 已提交
17
#include <mutex>  // for call_once
18
#include <unordered_map>
Q
Qiao Longfei 已提交
19
#include "paddle/framework/backward.h"
F
fengjiayi 已提交
20
#include "paddle/framework/executor.h"
Q
qijun 已提交
21
#include "paddle/framework/feed_fetch_method.h"
22
#include "paddle/framework/framework.pb.h"
D
dzhwinter 已提交
23
#include "paddle/framework/init.h"
Y
Yu Yang 已提交
24
#include "paddle/framework/lod_rank_table.h"
D
dangqingqing 已提交
25
#include "paddle/framework/lod_tensor.h"
Y
Yu Yang 已提交
26
#include "paddle/framework/lod_tensor_array.h"
27
#include "paddle/framework/prune.h"
Q
qijun 已提交
28
#include "paddle/framework/selected_rows.h"
Z
zchen0211 已提交
29
#include "paddle/operators/cond_op.h"
Y
Yan Chunwei 已提交
30
#include "paddle/operators/net_op.h"
Q
qijun 已提交
31
#include "paddle/platform/enforce.h"
Q
qijun 已提交
32
#include "paddle/platform/place.h"
Q
qiaolongfei 已提交
33
#include "paddle/pybind/const_value.h"
Y
Yu Yang 已提交
34
#include "paddle/pybind/exception.h"
Q
qijun 已提交
35
#include "paddle/pybind/pybind.h"
36
#include "paddle/pybind/tensor_py.h"
37
#include "paddle/string/to_string.h"
38

D
Dong Zhihong 已提交
39 40
#ifdef PADDLE_WITH_CUDA
#include "paddle/operators/nccl/nccl_gpu_common.h"
D
dangqingqing 已提交
41
#include "paddle/platform/cuda_profiler.h"
D
Dong Zhihong 已提交
42
#include "paddle/platform/gpu_info.h"
D
Dong Zhihong 已提交
43 44
#endif

Q
Qiao Longfei 已提交
45 46 47
// disable auto conversion to list in Python
PYBIND11_MAKE_OPAQUE(paddle::framework::LoDTensorArray);

48
namespace paddle {
49
namespace pybind {
50 51 52
static size_t UniqueIntegerGenerator(const std::string &prefix) {
  static std::unordered_map<std::string, std::atomic<size_t>> generators;
  return generators[prefix].fetch_add(1);
53 54
}

Q
qijun 已提交
55
bool IsCompileGPU() {
56
#ifndef PADDLE_WITH_CUDA
Q
qijun 已提交
57 58 59 60 61 62
  return false;
#else
  return true;
#endif
}

63
PYBIND11_PLUGIN(core) {
Y
Yu Yang 已提交
64
  py::module m("core", "C++ core of PaddlePaddle");
65

66 67 68 69
  // using framework in this function. Since it is inside a function, it will
  // not cause namespace pollution.
  using namespace paddle::framework;  // NOLINT

Y
Yu Yang 已提交
70 71
  BindException(m);

72 73 74
  py::class_<Tensor>(m, "Tensor", py::buffer_protocol())
      .def_buffer(
          [](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); })
Y
Yu Yang 已提交
75
      .def("get_dims",
76
           [](const Tensor &self) { return vectorize(self.dims()); })
Y
Yu Yang 已提交
77
      .def("set_dims",
Q
qijun 已提交
78
           [](Tensor &self, const std::vector<int64_t> &dim) {
Y
Yu Yang 已提交
79
             self.Resize(make_ddim(dim));
Y
Yu Yang 已提交
80 81
           })
      .def("alloc_float",
Y
Yu Yang 已提交
82
           [](Tensor &self, paddle::platform::GPUPlace &place) {
Q
qijun 已提交
83
             self.mutable_data<float>(place);
Y
Yu Yang 已提交
84
           })
Q
qijun 已提交
85
      .def("alloc_float",
Y
Yu Yang 已提交
86
           [](Tensor &self, paddle::platform::CPUPlace &place) {
Q
qijun 已提交
87
             self.mutable_data<float>(place);
Y
Yu Yang 已提交
88 89
           })
      .def("alloc_int",
Y
Yu Yang 已提交
90
           [](Tensor &self, paddle::platform::CPUPlace &place) {
Q
qijun 已提交
91
             self.mutable_data<int>(place);
Y
Yu Yang 已提交
92
           })
Q
qijun 已提交
93
      .def("alloc_int",
Y
Yu Yang 已提交
94
           [](Tensor &self, paddle::platform::GPUPlace &place) {
Q
qijun 已提交
95
             self.mutable_data<int>(place);
Q
qijun 已提交
96
           })
Y
Yu Yang 已提交
97 98
      .def("set", PyCPUTensorSetFromArray<float>)
      .def("set", PyCPUTensorSetFromArray<int>)
99
      .def("set", PyCPUTensorSetFromArray<double>)
100
      .def("set", PyCPUTensorSetFromArray<int64_t>)
Y
Yu Yang 已提交
101
      .def("set", PyCPUTensorSetFromArray<bool>)
102
#ifdef PADDLE_WITH_CUDA
Y
Yu Yang 已提交
103 104
      .def("set", PyCUDATensorSetFromArray<float>)
      .def("set", PyCUDATensorSetFromArray<int>)
105
      .def("set", PyCUDATensorSetFromArray<double>)
106
      .def("set", PyCUDATensorSetFromArray<int64_t>)
Y
Yu Yang 已提交
107
      .def("set", PyCUDATensorSetFromArray<bool>)
Q
qijun 已提交
108
#endif
109
      .def("shape", [](Tensor &self) { return vectorize(self.dims()); })
110 111 112 113 114
      .def("set_float_element", TensorSetElement<float>)
      .def("get_float_element", TensorGetElement<float>)
      .def("set_double_element", TensorSetElement<double>)
      .def("get_double_element", TensorGetElement<double>)
      .def("dtype", [](Tensor &self) { return ToDataType(self.type()); });
Y
Yu Yang 已提交
115

116
  py::class_<LoDTensor, Tensor>(m, "LoDTensor")
117 118
      .def_buffer(
          [](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); })
119 120 121
      .def(
          "__init__",
          [](LoDTensor &instance, const std::vector<std::vector<size_t>> &lod) {
122
#ifndef PADDLE_WITH_CUDA
123
            new (&instance) LoDTensor(lod);
124
#else
Y
Yu Yang 已提交
125
             LoD new_lod;
126 127
             new_lod.reserve(lod.size());
             std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
128
             new (&instance) LoDTensor(new_lod);
129
#endif
130
          })
Y
Yu Yang 已提交
131
      .def("__init__", [](LoDTensor &instance) { new (&instance) LoDTensor(); })
D
dangqingqing 已提交
132
      .def("set_lod",
133
           [](LoDTensor &self, const std::vector<std::vector<size_t>> &lod) {
134
#ifndef PADDLE_WITH_CUDA
D
dangqingqing 已提交
135
             self.set_lod(lod);
136
#else
Y
Yu Yang 已提交
137
             LoD new_lod;
138 139 140 141
             new_lod.reserve(lod.size());
             std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
             self.set_lod(new_lod);
#endif
D
dangqingqing 已提交
142
           })
143
      .def("lod", [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
144
#ifndef PADDLE_WITH_CUDA
D
dangqingqing 已提交
145
        return self.lod();
146 147 148 149 150
#else
           auto lod = self.lod();
           std::vector<std::vector<size_t>> new_lod;
           new_lod.reserve(lod.size());
           std::transform(lod.begin(), lod.end(), std::back_inserter(new_lod),
Y
Yu Yang 已提交
151
               [](Vector<size_t> item) ->
152 153 154 155 156 157 158 159
                   std::vector<size_t> {
                 std::vector<size_t> v;
                 v.reserve(item.size());
                 std::copy(item.begin(), item.end(), std::back_inserter(v));
                 return v;
               });
           return new_lod;
#endif
D
dangqingqing 已提交
160 161
      });

Q
qijun 已提交
162 163 164 165 166 167 168 169 170 171 172 173 174
  py::class_<SelectedRows>(m, "SelectedRows")
      .def("__init__",
           [](SelectedRows &instance) { new (&instance) SelectedRows(); })
      .def("__init__",
           [](SelectedRows &instance, const std::vector<int64_t> rows,
              const int64_t &height) {
             new (&instance) SelectedRows(rows, height);
           })
      .def("get_tensor",
           [](SelectedRows &self) { return self.mutable_value(); },
           py::return_value_policy::reference)
      .def("set_height", &SelectedRows::set_height)
      .def("height", &SelectedRows::height)
Q
qijun 已提交
175 176 177 178 179 180 181 182 183
      .def("set_rows",
           [](SelectedRows &self, std::vector<int64_t> rows) {
#ifndef PADDLE_WITH_CUDA
             self.set_rows(rows);
#else
        Vector<int64_t> new_rows(rows);
        self.set_rows(new_rows);
#endif
           })
184 185 186 187 188 189 190 191 192 193 194
      .def("rows", [](SelectedRows &self) {
#ifndef PADDLE_WITH_CUDA
        return self.rows();
#else
         auto rows = self.rows();
         std::vector<int64_t> new_rows;
         new_rows.reserve(rows.size());
         std::copy(rows.begin(), rows.end(), std::back_inserter(new_rows));
         return new_rows;
#endif
      });
Q
qijun 已提交
195

196
  py::class_<Variable>(m, "Variable", R"DOC(Variable Class.
197 198 199

All parameter, weight, gradient are variables in Paddle.
)DOC")
200
      .def("is_int", [](const Variable &var) { return var.IsType<int>(); })
201
      .def("set_int",
202 203
           [](Variable &var, int val) -> void { *var.GetMutable<int>() = val; })
      .def("get_int", [](const Variable &var) -> int { return var.Get<int>(); })
204 205 206 207 208 209 210
      .def("is_float", [](const Variable &var) { return var.IsType<float>(); })
      .def("set_float",
           [](Variable &var, float val) -> void {
             *var.GetMutable<float>() = val;
           })
      .def("get_float",
           [](const Variable &var) -> float { return var.Get<float>(); })
Y
Yu Yang 已提交
211
      .def("get_tensor",
212 213
           [](Variable &self) -> LoDTensor * {
             return self.GetMutable<LoDTensor>();
D
dangqingqing 已提交
214 215
           },
           py::return_value_policy::reference)
Y
Yu Yang 已提交
216 217 218
      .def("get_lod_rank_table",
           [](Variable &self) { return self.GetMutable<LoDRankTable>(); },
           py::return_value_policy::reference)
Q
qijun 已提交
219 220 221 222 223
      .def("get_selected_rows",
           [](Variable &self) -> SelectedRows * {
             return self.GetMutable<SelectedRows>();
           },
           py::return_value_policy::reference)
Y
Yu Yang 已提交
224 225 226
      .def("get_lod_tensor_array",
           [](Variable &self) { return self.GetMutable<LoDTensorArray>(); },
           py::return_value_policy::reference)
D
Dong Zhihong 已提交
227 228 229 230 231 232 233
#ifdef PADDLE_WITH_CUDA
      .def("get_communicator",
           [](Variable &self) -> platform::Communicator * {
             return self.GetMutable<platform::Communicator>();
           },
           py::return_value_policy::reference)
#endif
Y
Yan Chunwei 已提交
234
      .def("get_net",
D
dongzhihong 已提交
235 236
           [](Variable &self) -> operators::NetOp * {
             return self.GetMutable<operators::NetOp>();
Y
Yan Chunwei 已提交
237
           },
Y
Yu Yang 已提交
238
           py::return_value_policy::reference);
239

240
  py::class_<Scope>(m, "Scope", "")
D
dongzhihong 已提交
241
      .def("var",
242
           [](Scope &self, const std::string &name) -> Variable * {
D
dongzhihong 已提交
243
             return self.Var(name);
Y
Yu Yang 已提交
244
           },
245
           py::return_value_policy::reference)
246
      .def("find_var", &Scope::FindVar, py::return_value_policy::reference)
Y
Yu Yang 已提交
247
      .def(py::init<>())
248
      .def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); },
249
           py::return_value_policy::reference)
Y
Yu Yang 已提交
250
      .def("drop_kids", &Scope::DropKids);
251

Y
Yu Yang 已提交
252 253
  //! @note: Be careful! PyBind will return std::string as an unicode, not
  //! Python str. If you want a str object, you should cast them in Python.
Y
Yu Yang 已提交
254 255
  m.def("get_all_op_protos", []() -> std::vector<py::bytes> {
    std::vector<py::bytes> ret_values;
256 257 258 259 260 261 262 263 264 265
    for (auto &iter : OpInfoMap::Instance().map()) {
      auto &info = iter.second;
      if (info.HasOpProtoAndChecker()) {
        std::string str;
        PADDLE_ENFORCE(
            info.Proto().SerializeToString(&str),
            "Serialize OpProto Error. This could be a bug of Paddle.");
        ret_values.emplace_back(str);
      }
    }
Y
Yu Yang 已提交
266 267
    return ret_values;
  });
F
fengjiayi 已提交
268
  m.def("get_grad_op_descs",
F
update  
fengjiayi 已提交
269 270 271 272
        [](const OpDescBind &op_desc,
           const std::unordered_set<std::string> &no_grad_set,
           std::unordered_map<std::string, std::string> &grad_to_var,
           const std::vector<BlockDescBind *> &grad_sub_block) {
F
fengjiayi 已提交
273 274 275 276 277 278 279 280 281 282 283
          std::vector<std::unique_ptr<OpDescBind>> grad_op_descs =
              framework::OpInfoMap::Instance()
                  .Get(op_desc.Type())
                  .GradOpMaker()(op_desc, no_grad_set, &grad_to_var,
                                 grad_sub_block);
          std::vector<OpDescBind *> grad_op_desc_ptrs(grad_op_descs.size());
          std::transform(
              grad_op_descs.begin(), grad_op_descs.end(),
              grad_op_desc_ptrs.begin(),
              [](std::unique_ptr<OpDescBind> &p) { return p.release(); });
          return grad_op_desc_ptrs;
F
update  
fengjiayi 已提交
284
        });
285 286 287 288
  m.def("prune", [](const ProgramDescBind &origin,
                    const std::vector<std::array<size_t, 2>> &targets) {
    ProgramDescBind prog_with_targets(origin);
    for (const auto &t : targets) {
289
      prog_with_targets.MutableBlock(t[0])->Op(t[1])->MarkAsTarget();
290
    }
291
    proto::ProgramDesc pruned_desc;
292 293 294
    Prune(*prog_with_targets.Proto(), &pruned_desc);
    return new ProgramDescBind(pruned_desc);
  });
295
  m.def("inference_optimize", [](ProgramDescBind &origin) {
296
    proto::ProgramDesc pruned_desc;
297 298 299
    InferenceOptimize(*(origin.Proto()), &pruned_desc);
    return new ProgramDescBind(pruned_desc);
  });
300 301 302
  m.def_submodule(
       "var_names",
       "The module will return special predefined variable name in Paddle")
Y
Yi Wang 已提交
303 304
      .def("empty", []() { return kEmptyVarName; })
      .def("temp", []() { return kTempVarName; });
Q
qijun 已提交
305
  // clang-format off
Y
Yu Yang 已提交
306
  py::class_<paddle::platform::DeviceContext>(m, "DeviceContext")
Q
qijun 已提交
307 308
      .def_static("create",
                  [](paddle::platform::CPUPlace& place)
Q
qijun 已提交
309
                      -> paddle::platform::DeviceContext* {
Q
qijun 已提交
310 311 312 313 314
                    return new paddle::platform::CPUDeviceContext();
                  })
      .def_static("create",
                  [](paddle::platform::GPUPlace& place)
                      -> paddle::platform::DeviceContext* {
315
#ifndef PADDLE_WITH_CUDA
Q
qijun 已提交
316
                    PADDLE_THROW("GPUPlace is not supported in CPU device.");
Q
qijun 已提交
317
#else
Q
qijun 已提交
318
                    return new paddle::platform::CUDADeviceContext(place);
Q
qijun 已提交
319
#endif
Q
qijun 已提交
320
                  });
D
Dong Zhihong 已提交
321
// clang-format on
Q
qijun 已提交
322

D
Dong Zhihong 已提交
323 324 325
#ifdef PADDLE_WITH_CUDA
  py::class_<platform::Communicator>(m, "Communicator").def(py::init<>());
#endif
326 327 328
  py::class_<platform::GPUPlace>(m, "GPUPlace")
      .def(py::init<int>())
      .def("__str__", string::to_string<const platform::GPUPlace &>);
Q
qijun 已提交
329

330 331 332
  py::class_<paddle::platform::CPUPlace>(m, "CPUPlace")
      .def(py::init<>())
      .def("__str__", string::to_string<const platform::CPUPlace &>);
Y
Yu Yang 已提交
333

Y
Yu Yang 已提交
334 335 336 337 338 339 340 341 342 343 344
  py::class_<platform::Place>(m, "Place")
      .def(py::init<>())
      .def("set_place",
           [](platform::Place &self, const platform::CPUPlace &cpu_place) {
             self = cpu_place;
           })
      .def("set_place",
           [](platform::Place &self, const platform::GPUPlace &gpu_place) {
             self = gpu_place;
           });

Y
Yu Yang 已提交
345 346 347
  py::class_<OperatorBase>(m, "Operator")
      .def_static("create",
                  [](py::bytes protobin) {
348
                    proto::OpDesc desc;
Y
Yu Yang 已提交
349 350 351 352 353
                    PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
                                   "Cannot parse user input to OpDesc");
                    PADDLE_ENFORCE(desc.IsInitialized(),
                                   "User OpDesc is not initialized, reason %s",
                                   desc.InitializationErrorString());
354
                    return OpRegistry::CreateOp(desc);
Y
Yu Yang 已提交
355 356 357 358 359 360
                  })
      .def("backward",
           [](const OperatorBase &forwardOp,
              const std::unordered_set<std::string> &no_grad_vars) {
             return Backward(forwardOp, no_grad_vars).release();
           })
361
      .def("run",
362
           [](OperatorBase &self, const Scope &scope,
363 364 365 366
              const platform::DeviceContext &dev_ctx) {
             self.Run(scope, dev_ctx);
             dev_ctx.Wait();
           })
Y
Yu Yang 已提交
367 368 369 370 371 372 373
      .def("type",
           [](const OperatorBase &op) -> std::string { return op.Type(); })
      .def("outputs",
           [](const OperatorBase &op)
               -> std::map<std::string, std::vector<std::string>> {
                 return op.Outputs();
               })
Q
qijun 已提交
374 375
      .def("output_vars",
           [](const OperatorBase &op) { return op.OutputVars(true); })
Y
Yu Yang 已提交
376
      .def("inputs", [](const OperatorBase &op) { return op.Inputs(); })
Q
qijun 已提交
377
      .def("input_vars", [](const OperatorBase &op) { return op.InputVars(); })
Y
Yu Yang 已提交
378 379 380 381
      .def("__str__", &OperatorBase::DebugString)
      .def("no_intermediate_outputs",
           [](const OperatorBase &op) { return op.OutputVars(false); })
      .def("support_gpu", &OperatorBase::SupportGPU);
Y
Yu Yang 已提交
382

Y
Yu Yang 已提交
383 384 385 386 387 388 389
  py::class_<operators::NetOp, OperatorBase>(m, "Net")
      .def_static("create",
                  []() -> operators::NetOp * {
                    auto *retv = new operators::NetOp;
                    retv->SetType("plain_net");
                    return retv;
                  })
390 391
      .def("append_op", [](operators::NetOp &self,
                           const OperatorBase &op) { self.AppendOp(op); })
D
dongzhihong 已提交
392 393 394 395
      .def("complete_add_op", &operators::NetOp::CompleteAddOp)
      .def("complete_add_op", [](std::shared_ptr<operators::NetOp> &self) {
        self->CompleteAddOp();
      });
Y
Yan Chunwei 已提交
396

Z
cond op  
zchen0211 已提交
397 398 399 400
  // cond_op
  py::class_<operators::CondOp, OperatorBase>(m, "CondOp")
      .def_static("create",
                  [](py::bytes protobin) -> operators::CondOp * {
401
                    proto::OpDesc desc;
Z
cond op  
zchen0211 已提交
402 403 404 405 406
                    PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
                                   "Cannot parse user input to OpDesc");
                    PADDLE_ENFORCE(desc.IsInitialized(),
                                   "User OpDesc is not initialized, reason %s",
                                   desc.InitializationErrorString());
407
                    auto cond_op = OpRegistry::CreateOp(desc);
Z
cond op  
zchen0211 已提交
408 409 410 411 412 413 414 415 416 417 418
                    return static_cast<operators::CondOp *>(cond_op.release());
                  })
      .def("set_truenet",
           [](operators::CondOp &self, const operators::NetOp &net) -> void {
             self.set_truenet(net.Clone());
           })
      .def("set_falsenet",
           [](operators::CondOp &self, const operators::NetOp &net) -> void {
             self.set_falsenet(net.Clone());
           });

F
fengjiayi 已提交
419 420
  py::class_<framework::Executor>(m, "Executor")
      .def(py::init<std::vector<platform::Place> &>())
421
      .def("run", &Executor::Run);
F
fengjiayi 已提交
422

423
  m.def("unique_integer", UniqueIntegerGenerator);
D
dzhwinter 已提交
424 425
  m.def("init_gflags", framework::InitGflags);
  m.def("init_devices", &framework::InitDevices);
426

Q
qijun 已提交
427
  m.def("is_compile_gpu", IsCompileGPU);
428
  m.def("set_feed_variable", framework::SetFeedVariable);
Q
qijun 已提交
429
  m.def("get_fetch_variable", framework::GetFetchVariable);
Q
qijun 已提交
430

F
fengjiayi 已提交
431 432 433 434
  BindProgramDesc(m);
  BindBlockDesc(m);
  BindVarDsec(m);
  BindOpDesc(m);
Q
qiaolongfei 已提交
435
  BindConstValue(m);
Y
Yu Yang 已提交
436

Y
Yu Yang 已提交
437 438 439 440 441 442 443 444 445
  py::class_<framework::LoDRankTable>(m, "LodRankTable")
      .def("items", [](framework::LoDRankTable &table) {
        std::vector<std::pair<size_t, size_t>> res;
        for (auto &item : table.items()) {
          res.push_back({item.index, item.length});
        }
        return res;
      });

Y
Yu Yang 已提交
446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462
  py::class_<LoDTensorArray>(m, "LoDTensorArray")
      .def("__getitem__",
           [](LoDTensorArray &self, size_t i) { return &self.at(i); },
           py::return_value_policy::reference)
      .def("__len__", [](LoDTensorArray &self) { return self.size(); })
      .def("__setitem__",
           [](LoDTensorArray &self, size_t i, const LoDTensor &t) {
             PADDLE_ENFORCE_LT(i, self.size());
             self[i].ShareDataWith(t);
             self[i].set_lod(t.lod());
           })
      .def("append", [](LoDTensorArray &self, const LoDTensor &t) {
        self.emplace_back();
        self.back().ShareDataWith(t);
        self.back().set_lod(t.lod());
      });

Y
Yu Yang 已提交
463
  m.def("op_support_gpu", OpSupportGPU);
D
Dong Zhihong 已提交
464
#ifdef PADDLE_WITH_CUDA
D
Dong Zhihong 已提交
465
  m.def("get_cuda_device_count", platform::GetCUDADeviceCount);
D
dangqingqing 已提交
466 467 468 469

  m.def("nvprof_init", platform::CudaProfilerInit);
  m.def("nvprof_start", platform::CudaProfilerStart);
  m.def("nvprof_stop", platform::CudaProfilerStop);
D
Dong Zhihong 已提交
470
#endif
Y
Yu Yang 已提交
471

472
  return m.ptr();
L
Luo Tao 已提交
473
}
474
}  // namespace pybind
475
}  // namespace paddle