[NewIR]New ir support c concat (#56243)

* support new ir load combine * update * polish code * remove print * support c concat * update * polish code * fix bug * polish code * fix compile bug * polish code * remove useless code

[NewIR]New ir support c concat (#56243)
* support new ir load combine * update * polish code * remove print * support c concat * update * polish code * fix bug * polish code * fix compile bug * polish code * remove useless code
fcde3991 · hong · GitHub · 9dfd5d60 · fcde3991 · fcde3991
6 changed file
--- a/paddle/fluid/ir/dialect/utils.cc
+++ b/paddle/fluid/ir/dialect/utils.cc
@@ -20,6 +20,8 @@ namespace dialect {
 const std::unordered_set<std::string> LegacyOpList = {
    "pd.fused_softmax_mask_upper_triangle",
    "pd.fused_softmax_mask_upper_triangle_grad",
+    "pd.load_combine",
+    "pd.c_concat",
    "pd.load_combine"};

 enum class AttrType {

--- a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc
+++ b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc
@@ -605,9 +605,7 @@ void BuildRuntimeContext(
    auto legacy_arg_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
    if (type.isa<paddle::dialect::AllocatedDenseTensorType>() ||
        type.isa<paddle::dialect::AllocatedSelectedRowsType>()) {
-      std::vector<paddle::framework::Variable*> vec_tmp = {var};
-
-      runtime_ctx->outputs[legacy_arg_name] = vec_tmp;
+      runtime_ctx->outputs[legacy_arg_name] = {var};
    } else if (type.isa<ir::VectorType>()) {
      auto var_ref = var->Get<paddle::framework::VariableRefArray>();
      std::vector<paddle::framework::Variable*> vec_tmp;

--- a/paddle/phi/api/yaml/legacy_ops.yaml
+++ b/paddle/phi/api/yaml/legacy_ops.yaml
@@ -123,6 +123,15 @@
  backward : batch_norm_grad
  optional : reserve_space

+- op : c_concat
+  args : (Tensor x, int rank, int nranks, int ring_id, bool use_calc_stream, bool use_model_parallel)
+  output : Tensor(out)
+  infer_meta :
+    func : CConcatInferMeta
+    param : [x, nranks]
+  kernel :
+    func : c_concat
+
 - op : cast
  args : (Tensor x, DataType dtype)
  output : Tensor

--- a/paddle/phi/api/yaml/op_compat.yaml
+++ b/paddle/phi/api/yaml/op_compat.yaml
@@ -429,6 +429,12 @@
    out : Out
  drop_empty_grad : [input_grad]

+- op : c_concat
+  inputs :
+    x : X
+  outputs :
+    out : Out
+
 - op : cast
  inputs :
    x : X

--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
@@ -389,6 +389,15 @@ void CastInferMeta(const MetaTensor& x, DataType out_dtype, MetaTensor* out) {
  out->share_lod(x);
 }

+void CConcatInferMeta(const MetaTensor& x, int nranks, MetaTensor* out) {
+  phi::DDim dim = x.dims();
+  dim[dim.size() - 1] = dim[dim.size() - 1] * nranks;
+  if (dim[dim.size() - 1] < 0) dim[dim.size() - 1] = -1;
+  out->set_dims(dim);
+  out->set_layout(x.layout());
+  out->set_dtype(x.dtype());
+}
+
 void CholeskyInferMeta(const MetaTensor& x, bool upper, MetaTensor* out) {
  auto dims = x.dims();
  auto rank = dims.size();

--- a/paddle/phi/infermeta/unary.h
+++ b/paddle/phi/infermeta/unary.h
@@ -71,6 +71,8 @@ void BatchSizeLikeInferMeta(const MetaTensor& x,

 void CastInferMeta(const MetaTensor& x, DataType out_dtype, MetaTensor* out);

+void CConcatInferMeta(const MetaTensor& x, int nranks, MetaTensor* out);
+
 void ChannelShuffleInferMeta(const MetaTensor& x,
                             int groups,
                             const std::string& data_format,