fix cache key in concat oneDNN kernel (#31820)

* fix cache key in concat oneDNN kernel * key simplified

fix cache key in concat oneDNN kernel (#31820)
* fix cache key in concat oneDNN kernel * key simplified
e5f7a834 · Wojciech Uss · GitHub · f2cfc0f4 · e5f7a834
隐藏空白更改
内联并排

Showing with 14 addition and 6 deletion

paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +14 -6

未找到文件。
--- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
@@ -71,6 +71,15 @@ static const std::vector<const Tensor*> ReduceMultiInput(
  return reduced;
 }
+static const std::vector<int> GetDimsForKey(
+    const std::vector<const Tensor*>& inputs) {
+  auto dims_key = paddle::framework::vectorize<int>(inputs[0]->dims());
+  for (auto it = std::next(inputs.begin()); it != inputs.end(); ++it) {
+    dims_key.push_back((*it)->dims()[0]);
+  }
+  return dims_key;
+}
 template <typename T>
 class ConcatPrimitiveFactory {
 public:
@@ -134,6 +143,8 @@ template <typename T>
 class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
 public:
  void Compute(const paddle::framework::ExecutionContext& ctx) const override {
+    // If any of the multiple inputs of concat has an input size of 0, the
+    // actual size of the multi_input will change
    auto multi_input = ReduceMultiInput(ctx.MultiInput<Tensor>("X"));
    EnforceLayouts(multi_input);
    Tensor* output = ctx.Output<Tensor>("Out");
@@ -156,12 +167,9 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
        paddle::framework::ToMKLDNNDataType(multi_input[0]->type());
    ConcatPrimitiveFactory<T> prim_creator;
-    // If one of the multiple inputs of concat has an input size of 0, the
+    std::string key =
-    // actual size of the multi_input will change
+        platform::CreateKey(dev_ctx, GetDimsForKey(multi_input),
-    std::string key = platform::CreateKey(
+                            multi_input.size(), ctx.OutputName("Out"), dt);
-        dev_ctx, paddle::framework::vectorize<int>(multi_input[0]->dims()),
-        multi_input.size(), ctx.OutputName("Out"), dt,
-        platform::ThreadIDasStr());
    key = platform::ExtendKeyWithThreadInfoIfNeeded(dev_ctx, key);
    const std::string key_prim = key + "@concat_p";