Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/exec_strategy

c06b4483 · yuyang18 · e5281b3c · 8c7d2e29 · c06b4483 · c06b4483
隐藏空白更改
内联并排

Showing with 76 addition and 30 deletion

paddle/fluid/operators/softmax_mkldnn_op.cc paddle/fluid/operators/softmax_mkldnn_op.cc +54 -19

paddle/gserver/layers/PriorBox.cpp paddle/gserver/layers/PriorBox.cpp +22 -11

未找到文件。
--- a/paddle/fluid/operators/softmax_mkldnn_op.cc
+++ b/paddle/fluid/operators/softmax_mkldnn_op.cc
@@ -53,25 +53,60 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
                   "Softmax input and output dimensions should match");
    // Same memory descriptor to be used for input and output
    memory::dims softmax_tz = {src_tz[0], src_tz[1]};
-    // Currently only supports NC data format
+    // Generate keys for storing/retriving primitives for this operator
-    // TODO(jczaja-intel): support more formats
+    // TODO(jczaja): Each MKLDNN operator may have diffrent hashing function
-    auto softmax_md =
+    auto gethash = [](memory::dims& operand_dims) {
-        MKLDNNMemDesc({softmax_tz}, memory::f32, memory::format::nc);
+      return std::string(std::to_string(operand_dims[0]) + "-" +
-    // Normalization is made after innermost dimension eg. C out of NC
+                         std::to_string(operand_dims[1]));
-    auto softmax_desc = softmax_forward::desc(prop_kind::forward_scoring,
+    };
-                                              softmax_md, 1 /*dim: C*/);
+    const std::string key = gethash(softmax_tz);
-    // create memory primitives
+    const std::string key_softmax_p = key + "@softmax_p";
-    auto softmax_src_memory =
+    const std::string key_softmax_src_mem_p = key + "@softmax_src_mem_p";
-        memory({softmax_md, mkldnn_engine},
+    const std::string key_softmax_dst_mem_p = key + "@softmax_dst_mem_p";
-               static_cast<void*>(const_cast<T*>(input_data)));
-    auto softmax_dst_memory =
+    std::shared_ptr<void> softmax_p = dev_ctx.GetBlob(key_softmax_p);
-        memory({softmax_md, mkldnn_engine},
+    if (softmax_p == nullptr) {
-               static_cast<void*>(const_cast<T*>(output_data)));
+      // Currently only NC data format is supported
-    auto softmax_prim_desc =
+      auto softmax_md =
-        softmax_forward::primitive_desc(softmax_desc, mkldnn_engine);
+          MKLDNNMemDesc({softmax_tz}, memory::f32, memory::format::nc);
-    auto softmax = softmax_forward(softmax_prim_desc, softmax_src_memory,
+      // Normalization is made after innermost dimension eg. C out of NC
-                                   softmax_dst_memory);
+      auto softmax_desc = softmax_forward::desc(prop_kind::forward_scoring,
-    std::vector<primitive> pipeline{softmax};
+                                                softmax_md, 1 /*dim: C*/);
+      // create memory primitives
+      auto softmax_src_memory_p = std::make_shared<memory>(
+          memory::primitive_desc{softmax_md, mkldnn_engine},
+          static_cast<void*>(const_cast<T*>(input_data)));
+      dev_ctx.SetBlob(key_softmax_src_mem_p, softmax_src_memory_p);
+      auto softmax_dst_memory_p = std::make_shared<memory>(
+          memory::primitive_desc{softmax_md, mkldnn_engine},
+          static_cast<void*>(output_data));
+      dev_ctx.SetBlob(key_softmax_dst_mem_p, softmax_dst_memory_p);
+      auto softmax_forward_pd =
+          std::make_shared<softmax_forward::primitive_desc>(softmax_desc,
+                                                            mkldnn_engine);
+      softmax_p = std::make_shared<softmax_forward>(
+          *(softmax_forward_pd.get()),
+          *(static_cast<memory*>(softmax_src_memory_p.get())),
+          *(static_cast<memory*>(softmax_dst_memory_p.get())));
+      dev_ctx.SetBlob(key_softmax_p, softmax_p);
+    } else {
+      // Primitives already exist
+      auto src_memory_p = std::static_pointer_cast<memory>(
+          dev_ctx.GetBlob(key_softmax_src_mem_p));
+      PADDLE_ENFORCE(src_memory_p != nullptr,
+                     "Fail to find softmax src mem_p in device context");
+      auto dst_memory_p = std::static_pointer_cast<memory>(
+          dev_ctx.GetBlob(key_softmax_dst_mem_p));
+      PADDLE_ENFORCE(dst_memory_p != nullptr,
+                     "Fail to find softmax dst mem_p in device context");
+      src_memory_p->set_data_handle(
+          reinterpret_cast<void*>(const_cast<T*>(input_data)));
+      dst_memory_p->set_data_handle(output_data);
+    }
+    std::vector<primitive> pipeline{
+        *(static_cast<softmax_forward::primitive*>(softmax_p.get()))};
    stream(stream::kind::eager).submit(pipeline).wait();
    const bool is_test = ctx.Attr<bool>("is_test");

--- a/paddle/gserver/layers/PriorBox.cpp
+++ b/paddle/gserver/layers/PriorBox.cpp
@@ -28,7 +28,7 @@ namespace paddle {
 */
 class PriorBoxLayer : public Layer {
-public:
+public:  // NOLINT
  explicit PriorBoxLayer(const LayerConfig& config) : Layer(config) {}
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
@@ -36,7 +36,7 @@ public:
  void forward(PassType passType) override;
  void backward(const UpdateCallback& callback) override {}
-protected:
+protected:  // NOLINT
  int numPriors_;
  std::vector<int> minSize_;
  std::vector<int> maxSize_;
@@ -109,11 +109,18 @@ void PriorBoxLayer::forward(PassType passType) {
        real boxWidth = minSize;
        real boxHeight = minSize;
-        // priors with different aspect ratios
+        // first prior: aspect_ratio == 1.0, compatible to old logic
-        for (size_t r = 0; r < aspectRatio_.size(); r++) {
+        tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth;
-          real ar = aspectRatio_[r];
+        tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight;
-          boxWidth = minSize * sqrt(ar);
+        tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth;
-          boxHeight = minSize / sqrt(ar);
+        tmpPtr[idx++] = (centerY + boxHeight / 2.) / imageHeight;
+        // set the variance.
+        for (int t = 0; t < 4; t++) tmpPtr[idx++] = variance_[t];
+        if (maxSize_.size() > 0) {
+          // square prior with size sqrt(minSize * maxSize)
+          real maxSize = maxSize_[s];
+          boxWidth = boxHeight = sqrt(minSize * maxSize);
          tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth;
          tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight;
          tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth;
@@ -122,10 +129,14 @@ void PriorBoxLayer::forward(PassType passType) {
          for (int t = 0; t < 4; t++) tmpPtr[idx++] = variance_[t];
        }
-        if (maxSize_.size() > 0) {
+        // priors with different aspect ratios
-          // square prior with size sqrt(minSize * maxSize)
+        for (size_t r = 0; r < aspectRatio_.size(); r++) {
-          real maxSize = maxSize_[s];
+          real ar = aspectRatio_[r];
-          boxWidth = boxHeight = sqrt(minSize * maxSize);
+          if (fabs(ar - 1.0) < 1e-6) {
+            continue;
+          }
+          boxWidth = minSize * sqrt(ar);
+          boxHeight = minSize / sqrt(ar);
          tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth;
          tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight;
          tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth;