// Copyright (c) 2021 CINN Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include #include #include "paddle/cinn/common/cinn_value.h" #include "paddle/cinn/frontend/interpreter.h" #include "paddle/cinn/hlir/framework/graph_compiler.h" #include "paddle/cinn/hlir/framework/node.h" #include "paddle/cinn/hlir/framework/op.h" #include "paddle/cinn/hlir/framework/op_strategy.h" #include "paddle/cinn/hlir/framework/scope.h" #include "paddle/cinn/hlir/op/use_ops.h" #include "paddle/cinn/pybind/bind.h" #include "paddle/cinn/runtime/flags.h" DECLARE_bool(cinn_ir_schedule); namespace cinn::pybind { namespace py = pybind11; using namespace cinn::hlir::framework; // NOLINT void BindFramework(pybind11::module *m) { py::class_(*m, "Operator") .def("get_op_attrs", [](const std::string &key) { return Operator::GetAttrs(key); }) .def("get_op_shape_attrs", [](const std::string &key) { return Operator::GetAttrs(key); }); py::class_>(*m, "OpValueType") .def("apply_strategy", [](OpValueType &self, const std::string &key, const NodeAttr &attrs, const std::vector &inputs, const std::vector &out_types, const std::vector> &output_shapes, const common::Target &target) { const Operator *op_ptr = Operator::Get(key); auto impl = OpStrategy::SelectImpl(self[op_ptr](attrs, inputs, out_types, output_shapes, target)); std::vector temp_inputs; std::vector res; for (auto &tensor : inputs) { res.push_back(tensor); temp_inputs.push_back(common::CINNValue(tensor)); } ir::LoweredFunc func; if (FLAGS_cinn_ir_schedule) { std::string output_name = "out"; temp_inputs.emplace_back(output_name); std::vector input_output_names; for (const auto &input : inputs) { input_output_names.push_back(input->name); } input_output_names.push_back(output_name); std::vector funcs = hlir::framework::GetFuncFromImpl( impl, common::CINNValuePack{temp_inputs}, res, input_output_names, key, target); CHECK_EQ(funcs.size(), 1U); func = funcs[0]; } else { common::CINNValuePack C = impl->fcompute(common::CINNValuePack{temp_inputs}); poly::StageMap stages = C.back(); // make sure all the tensors in the stages before schedule launch. for (int i = 0; i < C->size() - 1; i++) { ir::Expr temp = C[i]; stages->InsertLazily(temp.as_tensor_ref()); } C = impl->fschedule(C); for (int i = 0; i < C->size() - 1; i++) { ir::Expr temp = C[i]; res.push_back(temp.as_tensor_ref()); } func = Lower(key, stages, res); } return func; }); py::class_>(*m, "OpValueType1") .def("infer_shape", [](OpValueType &self, const std::string &key, const std::vector> &input_shapes, const AttrMapType &attrs) { const Operator *op_ptr = Operator::Get(key); auto shapes = self[op_ptr](input_shapes, attrs); return shapes; }); py::class_(*m, "NodeAttr") .def(py::init<>()) .def_readwrite("attr_store", &NodeAttr::attr_store) .def("set_attr", [](NodeAttr &self, const std::string &key, NodeAttr::attr_t value) { self.attr_store[key] = value; }) .def("get_attr", [](NodeAttr &self, const std::string &key) { CHECK_EQ(self.attr_store.count(key), 1) << "Didn't find value with key [" << key << "]."; return self.attr_store[key]; }) .def("__str__", [](NodeAttr &self) { return utils::GetStreamCnt(self); }); py::class_>(*m, "Scope") .def(py::init<>()) // .def("get_tensor", [](Scope &self, const std::string &name, const Target &target) { auto t = self.GetTensor(name); py::dtype dt(common::Type2Str(t->type())); py::array::ShapeContainer shape(t->shape().data().begin(), t->shape().data().end()); py::array array(std::move(dt), std::move(shape)); auto *mutable_data = array.mutable_data(); if (target.arch == Target::Arch::X86) { std::memcpy(mutable_data, t->data(), t->shape().numel() * t->type().bytes()); } else if (target.arch == Target::Arch::NVGPU) { #ifdef CINN_WITH_CUDA CUDA_CALL(cudaMemcpy(mutable_data, reinterpret_cast(t->mutable_data(target, t->type())), t->shape().numel() * t->type().bytes(), cudaMemcpyDeviceToHost)); #else LOG(FATAL) <<"To use CUDA backends, you need to set WITH_CUDA ON!"; #endif } else { CINN_NOT_IMPLEMENTED } return array; }) .def("var_names", &Scope::var_names); py::class_>(*m, "SharedTensor"); py::class_>(*m, "Tensor") .def(py::init<>()) .def("shape", [](hlir::framework::Tensor &self) { return self->shape().data(); }) .def("set_type", [](hlir::framework::Tensor &self, Type type) { self->set_type(type); }) .def("numpy", [](hlir::framework::Tensor &self, const common::Target &target) { std::string type_str = common::Type2Str(self->type()); if (type_str == "bfloat16") { type_str = "uint16"; } py::dtype dt(type_str); py::array::ShapeContainer shape(self->shape().data().begin(), self->shape().data().end()); py::array array(std::move(dt), std::move(shape)); void *array_data = array.mutable_data(); if (target.arch == Target::Arch::X86) { std::memcpy(array_data, self->data(), self->shape().numel() * self->type().bytes()); } else if (target.arch == Target::Arch::NVGPU) { #ifdef CINN_WITH_CUDA CUDA_CALL(cudaMemcpy(array_data, self->data(), self->shape().numel() * self->type().bytes(), cudaMemcpyDeviceToHost)); #else LOG(FATAL) <<"To use CUDA backends, you need to set WITH_CUDA ON!"; #endif } else { CINN_NOT_IMPLEMENTED } return array; }) .def("from_numpy", [](hlir::framework::Tensor &self, py::array array, const common::Target &target) { CHECK(array.dtype().is(py::dtype(common::Type2Str(self->type())))) << "currently only support float32 data type as input"; hlir::framework::shape_t shape; std::copy_n(array.shape(), array.ndim(), std::back_inserter(shape)); CHECK_EQ(std::accumulate(shape.begin(), shape.end(), 1, [](int32_t a, int32_t b) { return a * b; }), self->shape().numel()); auto *data = self->mutable_data(target, self->type()); if (target.arch == Target::Arch::X86) { std::memcpy(data, array.data(), self->shape().numel() * self->type().bytes()); } else if (target.arch == Target::Arch::NVGPU) { #ifdef CINN_WITH_CUDA CUDA_CALL(cudaMemcpy(reinterpret_cast(data), reinterpret_cast(array.data()), self->shape().numel() * self->type().bytes(), cudaMemcpyHostToDevice)); #else LOG(FATAL) <<"To use CUDA backends, you need to set WITH_CUDA ON!"; #endif } else { CINN_NOT_IMPLEMENTED } }); } } // namespace cinn::pybind