diff --git a/paddle/fluid/operators/softmax_mkldnn_op.cc b/paddle/fluid/operators/softmax_mkldnn_op.cc index 71b541d98f6e0d3e12601c9988ca6ffb8bb7554d..14b57b11fefb2b726531cb164dbf479f8df26b24 100644 --- a/paddle/fluid/operators/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/softmax_mkldnn_op.cc @@ -53,25 +53,60 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { "Softmax input and output dimensions should match"); // Same memory descriptor to be used for input and output memory::dims softmax_tz = {src_tz[0], src_tz[1]}; - // Currently only supports NC data format - // TODO(jczaja-intel): support more formats - auto softmax_md = - MKLDNNMemDesc({softmax_tz}, memory::f32, memory::format::nc); - // Normalization is made after innermost dimension eg. C out of NC - auto softmax_desc = softmax_forward::desc(prop_kind::forward_scoring, - softmax_md, 1 /*dim: C*/); - // create memory primitives - auto softmax_src_memory = - memory({softmax_md, mkldnn_engine}, - static_cast(const_cast(input_data))); - auto softmax_dst_memory = - memory({softmax_md, mkldnn_engine}, - static_cast(const_cast(output_data))); - auto softmax_prim_desc = - softmax_forward::primitive_desc(softmax_desc, mkldnn_engine); - auto softmax = softmax_forward(softmax_prim_desc, softmax_src_memory, - softmax_dst_memory); - std::vector pipeline{softmax}; + // Generate keys for storing/retriving primitives for this operator + // TODO(jczaja): Each MKLDNN operator may have diffrent hashing function + auto gethash = [](memory::dims& operand_dims) { + return std::string(std::to_string(operand_dims[0]) + "-" + + std::to_string(operand_dims[1])); + }; + const std::string key = gethash(softmax_tz); + const std::string key_softmax_p = key + "@softmax_p"; + const std::string key_softmax_src_mem_p = key + "@softmax_src_mem_p"; + const std::string key_softmax_dst_mem_p = key + "@softmax_dst_mem_p"; + + std::shared_ptr softmax_p = dev_ctx.GetBlob(key_softmax_p); + if (softmax_p == nullptr) { + // Currently only NC data format is supported + auto softmax_md = + MKLDNNMemDesc({softmax_tz}, memory::f32, memory::format::nc); + // Normalization is made after innermost dimension eg. C out of NC + auto softmax_desc = softmax_forward::desc(prop_kind::forward_scoring, + softmax_md, 1 /*dim: C*/); + // create memory primitives + auto softmax_src_memory_p = std::make_shared( + memory::primitive_desc{softmax_md, mkldnn_engine}, + static_cast(const_cast(input_data))); + dev_ctx.SetBlob(key_softmax_src_mem_p, softmax_src_memory_p); + auto softmax_dst_memory_p = std::make_shared( + memory::primitive_desc{softmax_md, mkldnn_engine}, + static_cast(output_data)); + dev_ctx.SetBlob(key_softmax_dst_mem_p, softmax_dst_memory_p); + + auto softmax_forward_pd = + std::make_shared(softmax_desc, + mkldnn_engine); + softmax_p = std::make_shared( + *(softmax_forward_pd.get()), + *(static_cast(softmax_src_memory_p.get())), + *(static_cast(softmax_dst_memory_p.get()))); + dev_ctx.SetBlob(key_softmax_p, softmax_p); + } else { + // Primitives already exist + auto src_memory_p = std::static_pointer_cast( + dev_ctx.GetBlob(key_softmax_src_mem_p)); + PADDLE_ENFORCE(src_memory_p != nullptr, + "Fail to find softmax src mem_p in device context"); + auto dst_memory_p = std::static_pointer_cast( + dev_ctx.GetBlob(key_softmax_dst_mem_p)); + PADDLE_ENFORCE(dst_memory_p != nullptr, + "Fail to find softmax dst mem_p in device context"); + src_memory_p->set_data_handle( + reinterpret_cast(const_cast(input_data))); + dst_memory_p->set_data_handle(output_data); + } + + std::vector pipeline{ + *(static_cast(softmax_p.get()))}; stream(stream::kind::eager).submit(pipeline).wait(); const bool is_test = ctx.Attr("is_test"); diff --git a/paddle/gserver/layers/PriorBox.cpp b/paddle/gserver/layers/PriorBox.cpp index af2cc05a954b3a6857c1015104a57339282840b8..56a4d942f0fdcb981f52f6ce0f644ec57a0e3c9a 100644 --- a/paddle/gserver/layers/PriorBox.cpp +++ b/paddle/gserver/layers/PriorBox.cpp @@ -28,7 +28,7 @@ namespace paddle { */ class PriorBoxLayer : public Layer { -public: +public: // NOLINT explicit PriorBoxLayer(const LayerConfig& config) : Layer(config) {} bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; @@ -36,7 +36,7 @@ public: void forward(PassType passType) override; void backward(const UpdateCallback& callback) override {} -protected: +protected: // NOLINT int numPriors_; std::vector minSize_; std::vector maxSize_; @@ -109,11 +109,18 @@ void PriorBoxLayer::forward(PassType passType) { real boxWidth = minSize; real boxHeight = minSize; - // priors with different aspect ratios - for (size_t r = 0; r < aspectRatio_.size(); r++) { - real ar = aspectRatio_[r]; - boxWidth = minSize * sqrt(ar); - boxHeight = minSize / sqrt(ar); + // first prior: aspect_ratio == 1.0, compatible to old logic + tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth; + tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight; + tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth; + tmpPtr[idx++] = (centerY + boxHeight / 2.) / imageHeight; + // set the variance. + for (int t = 0; t < 4; t++) tmpPtr[idx++] = variance_[t]; + + if (maxSize_.size() > 0) { + // square prior with size sqrt(minSize * maxSize) + real maxSize = maxSize_[s]; + boxWidth = boxHeight = sqrt(minSize * maxSize); tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth; tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight; tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth; @@ -122,10 +129,14 @@ void PriorBoxLayer::forward(PassType passType) { for (int t = 0; t < 4; t++) tmpPtr[idx++] = variance_[t]; } - if (maxSize_.size() > 0) { - // square prior with size sqrt(minSize * maxSize) - real maxSize = maxSize_[s]; - boxWidth = boxHeight = sqrt(minSize * maxSize); + // priors with different aspect ratios + for (size_t r = 0; r < aspectRatio_.size(); r++) { + real ar = aspectRatio_[r]; + if (fabs(ar - 1.0) < 1e-6) { + continue; + } + boxWidth = minSize * sqrt(ar); + boxHeight = minSize / sqrt(ar); tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth; tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight; tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth;