// Copyright (c) 2021 CINN Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/cinn/ir/tensor.h" #include #include "paddle/cinn/cinn.h" #include "paddle/cinn/common/arithmatic.h" #include "paddle/cinn/common/axis.h" #include "paddle/cinn/common/cas.h" #include "paddle/cinn/common/common.h" #include "paddle/cinn/common/ir_util.h" #include "paddle/cinn/ir/buffer.h" #include "paddle/cinn/ir/op/ir_operators.h" #include "paddle/cinn/ir/operation.h" #include "paddle/cinn/ir/utils/ir_printer.h" #include "paddle/cinn/ir/utils/ir_visitor.h" #include "paddle/cinn/lang/compute.h" #include "paddle/cinn/poly/isl_utils.h" #include "paddle/cinn/poly/stage.h" namespace cinn { namespace ir { Tensor _Tensor_::Make(const std::string &name, Type dtype, const std::vector &shape, const std::vector &domain, FunctionRef fn, const std::vector &reduce_axis) { CHECK(!name.empty()) << "Tensor name is set empty"; auto n = make_shared<_Tensor_>(); n->name = name; n->shape = shape; n->domain = domain; n->reduce_axis = reduce_axis; n->set_type(dtype); n->operation = fn; n->InitAxis(); return Tensor(n); } size_t Tensor::ndims() const { return operator->()->shape.size(); } std::set _Tensor_::GetDependTensorNames() const { std::set names; auto add_depend_tensors_from_expr = [&](Expr expr) { auto tensors = CollectIRNodes(expr, [&](const Expr *x) { return x->as_tensor() && x->as_tensor()->name != this->name; }); for (auto &e : tensors) { names.insert(e.as_tensor()->name); } }; if (is_compute_node()) { add_depend_tensors_from_expr(body()); } else if (is_call_node()) { add_depend_tensors_from_expr(body()); } else if (is_extern_call_node()) { add_depend_tensors_from_expr(body()); } else if (is_placeholder_node()) { return names; } else { CINN_NOT_IMPLEMENTED } return names; } Expr Tensor::operator()(const std::vector &indices) const { CHECK(!self()->is_tuple()) << "should extract a specific value from the " "tuple and operate on that instead"; auto *node = operator->(); CHECK_EQ(indices.size(), ndims()) << "number of indices not match the dimension"; return Load::Make(*this, indices); } Expr _Tensor_::inline_expanded(const std::vector &indices) { CHECK(is_compute_node()); return get_compute_op()->producer_fn(indices); } const char *_Tensor_::operation_type() const { if (!operation.defined()) return ""; return operation->as()->func_type(); } bool _Tensor_::is_compute_node() const { return std::strcmp(operation_type(), ir::ComputeOp::__func_type__) == 0; } bool _Tensor_::is_placeholder_node() const { return std::strcmp(operation_type(), ir::PlaceholderOp::__func_type__) == 0; } bool _Tensor_::is_call_node() const { return std::strcmp(operation_type(), ir::CallOp::__func_type__) == 0; } bool _Tensor_::is_extern_call_node() const { if (std::strcmp(operation_type(), ir::CallOp::__func_type__) == 0) { auto *op = operation->as(); auto *call = op->call_expr.As(); if (call) { return call->is_extern_call(); } } return false; } bool _Tensor_::is_buffer_shared_node() const { return std::strcmp(operation_type(), ir::BufferShareOp::__func_type__) == 0; } bool _Tensor_::is_preceding_view_node() const { return std::strcmp(operation_type(), ir::PrecedingViewOp::__func_type__) == 0; } ComputeOp *_Tensor_::get_compute_op() const { if (!is_compute_node()) return nullptr; return operation->as(); } PlaceholderOp *_Tensor_::get_placeholder_op() const { if (!is_placeholder_node()) return nullptr; return operation->as(); } void _Tensor_::InitAxis() const { // CHECK(!domain_without_reduce_axis().empty()); axis_ = common::GenDefaultAxis(domain_without_reduce_axis().size()); } bool _Tensor_::has_expression() const { return (!is_placeholder_node()) && (!is_tuple_get()) && (!is_buffer_shared_node()); } isl::set _Tensor_::GenerateIslDomain() const { // include the reduce axis. std::vector dims; if (has_expression()) { if (axis_.empty()) InitAxis(); auto domain = domain_with_reduce_axis(); CHECK_EQ(axis_with_reduce().size(), domain.size()); auto _axis_with_reduce = axis_with_reduce(); for (int i = 0; i < domain.size(); i++) { auto dim = domain[i]; if (dim.is_constant()) { dims.emplace_back(_axis_with_reduce[i]->name, 0, dim.as_int32() - 1); } else { dims.emplace_back(_axis_with_reduce[i]->name, Expr(0), Sub::Make(dim, common::make_const(1))); } } } poly::Domain isl_domain(Context::isl_ctx(), name, dims); VLOG(1) << "name:" << this->name << ", domain: " << isl_domain.__str__(); return isl_domain.to_isl(); } std::vector _Tensor_::expr_fields() { std::vector res; const char *func_type = operation->as()->func_type(); if (operation.defined()) { if (is_compute_node()) { auto *op = operation->as(); for (auto &expr : op->body) res.push_back(&expr); } else if (is_placeholder_node()) { auto *op = operation->as(); } else if (is_call_node()) { auto *op = operation->as(); for (auto &expr : op->read_args()) res.push_back(&expr); } else if (is_buffer_shared_node()) { } else { CINN_NOT_IMPLEMENTED } } for (auto &e : shape) { res.push_back(&e); } for (auto &e : domain) { res.push_back(&e); } return res; } std::vector _Tensor_::expr_fields() const { std::vector res; const char *func_type = operation->as()->func_type(); if (operation.defined()) { if (is_compute_node()) { auto *op = operation->as(); for (auto &expr : op->body) res.push_back(&expr); } else if (is_placeholder_node()) { auto *op = operation->as(); } else if (is_call_node()) { auto *op = operation->as(); for (auto &expr : op->read_args()) res.push_back(&expr); } else if (is_buffer_shared_node()) { } else { LOG(ERROR) << "func_type: " << func_type; CINN_NOT_IMPLEMENTED } } for (auto &e : shape) { res.push_back(&e); } for (auto &e : domain) { res.push_back(&e); } return res; } _Tensor_::~_Tensor_() {} Expr _Tensor_::body() const { if (is_placeholder_node()) return Expr(); if (is_buffer_shared_node()) return Expr(); if (is_compute_node()) return operation->as()->body.front(); if (is_call_node()) return operation->as()->call_expr; CINN_NOT_IMPLEMENTED; } Expr *_Tensor_::mutable_body() { if (is_placeholder_node()) return nullptr; if (is_buffer_shared_node()) return nullptr; if (is_compute_node()) return &operation->as()->body.front(); if (is_call_node()) return &operation->as()->call_expr; CINN_NOT_IMPLEMENTED } ir::Tensor _Tensor_::InitReduction(poly::StageMap stages, const Target &target) const { CHECK(contains_reduce_axis()) << "InitReduction only works on a reduce tensor"; // return if already rexists. std::string init_reduce_tensor_name = GenReduceInitTensorNameOf(name); if (stages->Lookup(init_reduce_tensor_name)) return stages[this]->LookupCtrlDepend(init_reduce_tensor_name); // create a new init tensor. auto init_tensor = lang::Compute( domain, [=](const std::vector &axis) { return GetReduceInitVal(); }, init_reduce_tensor_name); stages->InsertLazily(init_tensor); std::string this_transform = isl_map_to_str(stages[this]->transform().get()); isl::ctx this_ctx = stages[this]->transform().ctx(); isl::map temp_transform(this_ctx, this_transform); int reduce_axis_num = this->reduce_axis.size(); auto dim_out_names = poly::isl_get_dim_names(stages[this]->transform(), isl_dim_out); auto dim_in_size = isl_map_dim(stages[this]->transform().get(), isl_dim_in); auto dim_in_names = poly::isl_get_dim_names(stages[this]->transform(), isl_dim_in); std::vector reduce_axis_input = stages[this]->origin_reduce_axis_names(); auto origin_domain = stages[this]->domain(); auto reduce_axis_output = poly::GetRelatedOutputAxies( temp_transform, origin_domain, reduce_axis_input); std::set reduce_axis_output_set; for (auto &i : reduce_axis_output) { reduce_axis_output_set.insert(i); } int compute_at_axis = -1; for (auto &i : dim_out_names) { if (reduce_axis_output_set.count(i) == 0) { compute_at_axis++; } else { break; } } temp_transform = poly::RemoveAxiesByOutputNames( temp_transform, origin_domain, reduce_axis_output); //! When the first axis is not reduce axis, do ComputeAt. if (compute_at_axis >= 0) { stages[init_tensor]->ComputeAt2(stages[this], compute_at_axis); init_tensor->new_indices = this->new_indices; stages[this]->CtrlDepend(init_tensor); stages[init_tensor]->ShareBufferWith(stages[this]); init_tensor->shape = shape; return init_tensor; } //! When reduce axies are reordered to front, ComputeAt is illegal. //! So we just copy transform and forloopInfo. isl_map_set_tuple_name( temp_transform.get(), isl_dim_in, init_reduce_tensor_name.c_str()); isl_map_set_tuple_name( temp_transform.get(), isl_dim_out, init_reduce_tensor_name.c_str()); stages[init_tensor]->SetTransform(temp_transform); auto init_dim_out_names = poly::isl_get_dim_names(temp_transform, isl_dim_out); std::map temp_forloop_info = stages[this]->forloop_infos(); std::map init_forloop_info; for (auto &i : temp_forloop_info) { for (int j = 0; j < init_dim_out_names.size(); j++) { if (i.first < 0) continue; int new_i = poly::isl_get_original_axes_from_optimized_level( stages[this]->transformed_domain().get(), i.first); if (dim_out_names[new_i] == init_dim_out_names[j]) { stages[init_tensor]->AddForloopInfo(j, i.second); } } } init_tensor->new_indices = this->new_indices; stages[this]->CtrlDepend(init_tensor); stages[init_tensor]->ShareBufferWith(stages[this]); init_tensor->shape = shape; return init_tensor; } ir::Tensor _Tensor_::GetInitTensor(poly::StageMap stages, const Target &target) const { return InitReduction(stages, target); } Expr _Tensor_::tensor_store_expanded_body() { CHECK(!is_placeholder_node()) << "placeholder should not expand store"; Expr final_body = body(); if (shape.empty()) return final_body; std::vector g_axis = common::GenDefaultAxisAsExpr(shape.size()); if (!new_indices.empty()) { g_axis = new_indices; } auto *reduce_node = body().As(); if (reduce_node) { final_body = reduce_node->body; switch (reduce_node->reduce_type) { case ir::Reduce::kSum: final_body = Tensor(this)(g_axis) + final_body; break; case ir::Reduce::kMul: final_body = Tensor(this)(g_axis) * final_body; break; case ir::Reduce::kMax: final_body = Max::Make(Tensor(this)(g_axis), final_body); break; case ir::Reduce::kMin: final_body = Min::Make(Tensor(this)(g_axis), final_body); break; case ir::Reduce::kAll: final_body = Tensor(this)(g_axis) && final_body; break; case ir::Reduce::kAny: final_body = Tensor(this)(g_axis) || final_body; break; default: CINN_NOT_IMPLEMENTED } } if (is_tuple()) return final_body; return ir::Store::Make(Expr(Buffer(this)), final_body, g_axis); } void _Tensor_::Bind(lang::Buffer &buffer) { // CHECK(!inlined()) << "Inlined tensor should bing buffer"; CHECK(!buffer->type().is_void()); if (this->buffer.defined()) { // remove the old buffer if (this->buffer == buffer.buffer()) return; this->buffer->Unbind(this); } // Extract the tensors thouse has binded to this buffer. buffer_depended_tensor_names_ = buffer.buffer()->binded_tensor_names(); buffer.buffer()->BindTo(this); CHECK(!buffer->binded_tensor_names().empty()); this->buffer = buffer.buffer(); CHECK(this->buffer.defined()); } void _Tensor_::Bind(const Buffer &buffer) { lang::Buffer buf(buffer); Bind(buf); } void _Tensor_::WithBuffer(const Type &type) { Type buf_type = type.is_void() ? type_ : type; lang::Buffer buf(buf_type); buf->target = common::DefaultHostTarget(); Bind(buf); } void _Tensor_::WithBuffer(const std::string &memory_type, const std::string &buffer_name, const Type &type) { Type buf_type = type.is_void() ? type_ : type; if (this->buffer.defined()) { this->buffer->dtype = buf_type; this->buffer->name = buffer_name; if (memory_type == "shared") { this->buffer->memory_type = MemoryType::GPUShared; } else if (memory_type == "local") { this->buffer->memory_type = MemoryType::GPULocal; } else if (memory_type == "global") { this->buffer->memory_type = MemoryType::Heap; } else { LOG(FATAL) << "Not supported memory type " << memory_type; } } else { lang::Buffer buf(buf_type, buffer_name); buf->target = common::DefaultHostTarget(); Bind(buf); if (memory_type == "shared") { buf->memory_type = MemoryType::GPUShared; } else if (memory_type == "local") { buf->memory_type = MemoryType::GPULocal; } else if (memory_type == "global") { buf->memory_type = MemoryType::Heap; } else { LOG(FATAL) << "Not supported memory type " << memory_type; } } } bool _Tensor_::HasSameShapeWith(const Tensor &other) const { if (shape.size() != other->shape.size()) return false; for (int i = 0; i < shape.size(); i++) { Expr dim0 = common::AutoSimplify(shape[i]); Expr dim1 = common::AutoSimplify(other->shape[i]); if (dim0 != dim1) return false; } return true; } Tensor _Tensor_::TupleGet(int offset) const { CHECK(is_tuple()); auto *call = body().As(); CHECK_LT(offset, call->write_args.size()); auto tensor = call->write_args[offset].as_tensor_ref(); tensor->WithBuffer(); return tensor; } bool _Tensor_::is_tuple() const { if (!has_expression()) return false; auto *call = body().As(); if (call && call->is_extern_call() && !call->write_args.empty()) return true; return false; } std::vector _Tensor_::domain_with_reduce_axis() const { if (reduce_axis.empty()) return domain; auto res = domain; for (const Var &axis : reduce_axis) { CHECK(axis->upper_bound.type().is_int(32)) << axis->upper_bound; res.push_back(axis->upper_bound); } return res; } bool operator<(const Tensor &a, const Tensor &b) { return a->name < b->name; } Tensor::Tensor(const std::string &name, Type dtype, const std::vector &shape, const std::vector &domain, FunctionRef fn, const std::vector &reduce_axis) : IrNodeRef( _Tensor_::Make(name, dtype, shape, domain, fn, reduce_axis).self()) {} bool _Tensor_::is_tuple_get() const { return is_call_node() && operation.defined() && operation->as()->func_type() == ir::CallOp::__func_type__ && operation->as()->is_tuple_get; } bool _Tensor_::IsDependOnStatement(absl::string_view statement) { if (!is_compute_node()) { return false; } auto depend_tensors = DependingTensorNames(); for (const auto &x : depend_tensors) { if (x == statement) return true; } return false; } std::set _Tensor_::DependingTensorNames() { std::set res; if (body().defined()) { auto depend_tensors = ir::CollectIRNodes( body(), [](const Expr *x) -> bool { return x->as_tensor(); }); for (const auto &x : depend_tensors) { if (x.get() != this) { res.insert(x.as_tensor()->name); } } } return res; } const std::vector &_Tensor_::axis() const { CHECK_EQ(axis_.size(), domain_without_reduce_axis().size()); return axis_; } std::vector _Tensor_::axis_with_reduce() const { auto axis = axis_; axis.insert(axis.end(), reduce_axis.begin(), reduce_axis.end()); return axis; } bool _Tensor_::Uses(const Tensor &other) const { auto loads = ir::CollectIRNodes(body(), [&](const Expr *x) { auto *loadn = x->As(); if (!loadn) return false; return loadn->tensor.as_tensor()->name == other->name; }); return !loads.empty(); } ir::Tensor _Tensor_::Reshape(const std::vector &shape, poly::StageMap stages) const { CHECK(!stages[this]->inlined()); auto op = BufferShareOp::Make(); auto n = make_shared<_Tensor_>(); auto selft = Tensor(const_cast(this)); { int32_t this_num_elements = 1; for (auto &e : this->shape) { this_num_elements = this_num_elements * e.as_int32(); } int32_t num_elements = 1; for (auto &e : shape) { num_elements = num_elements * e.as_int32(); } CHECK_EQ(this_num_elements, num_elements) << "number of elements mismatch."; } n->name = Context::Global().NewName(name + "_reshape"); n->shape = shape; n->domain = shape; n->set_type(type()); n->operation = op; n->InitAxis(); auto t = Tensor(n); stages->InsertLazily(t); stages[n]->ShareBufferWith(stages[this]); stages[n]->CtrlDepend(selft); return t; } ir::Tensor _Tensor_::ReshapeCopied(const std::vector &shape, poly::StageMap stages) const { auto t = ir::Tensor(const_cast(this)); auto copied = Compute( domain, [=](const std::vector &axis) { return t(axis); }, Context::Global().NewName(this->name + "_copied")); stages->InsertLazily(copied); auto res = copied->Reshape(shape, stages); stages->InsertLazily(res); return res; } Shared CreateStage(Tensor tensor) { auto isl_domain = tensor->GenerateIslDomain(); return poly::Stage::New(isl_domain, tensor->body(), tensor.self()); } static constexpr char kReduceInitSuffix[] = "__reduce_init"; std::string GenReduceInitTensorNameOf(const std::string &tensor_name) { return tensor_name + kReduceInitSuffix; } bool IsReduceInitTensorName(const std::string &tensor_name) { std::string reduce_init_suffix(kReduceInitSuffix); return tensor_name.length() > reduce_init_suffix.size() && tensor_name.substr(tensor_name.length() - reduce_init_suffix.size(), reduce_init_suffix.size()) == reduce_init_suffix; } std::string GetOriginalReduceTensorName(const std::string &tensor_name) { std::string reduce_init_suffix(kReduceInitSuffix); if (IsReduceInitTensorName(tensor_name)) { return tensor_name.substr(0, tensor_name.length() - reduce_init_suffix.size()); } return tensor_name; } bool _Tensor_::is_reduce_sum() const { if (!contains_reduce_axis()) return false; return body().As() && body().As()->reduce_type == ir::Reduce::ReduceType::kSum; } bool _Tensor_::is_reduce_mul() const { if (!contains_reduce_axis()) return false; return body().As() && body().As()->reduce_type == ir::Reduce::ReduceType::kMul; } Expr _Tensor_::GetReduceInitVal() const { CHECK(is_reduce_tensor()); return body().As()->init; } bool _Tensor_::IsReduceInited(poly::StageMap stages) const { return stages->Lookup(GenReduceInitTensorNameOf(name)); } void _Tensor_::Verify() const { CHECK(!shape.empty()); CHECK(!domain.empty()); CHECK(!name.empty()) << "Name of tensor should be set"; } } // namespace ir } // namespace cinn