diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index ebfe0573cfdbfb2ef54a29b038e8b85356cc6c27..4ab54a5022a3d30215c7557bca2e69c4d011fc5f 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -233,6 +233,13 @@ public: (void)numProcessed; } + /** + * @brief Release the middle layer's output memory. + * + * @note This function is used for memory optimization in inference. + */ + virtual void releaseOutput() {} + protected: virtual void onLoadParameter() {} diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 68bf37d59db65ddc8096e2db3391be25c37b57e6..3b6234a6e5e07c506e0c86d0e32a77b5e134b744 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -187,6 +187,31 @@ void NeuralNetwork::init(const ModelConfig& config, CHECK(it != layerMap_.end()); outputLayers_.push_back(it->second); } + + for (const auto& layer : layers_) { + const auto name& = layer->getName(); + bool isMiddleLayer = true; + + // if data layer + for (const auto& dataLayer : dataLayers_) { + if (name == dataLayer->getName()) { + isMiddleLayer = false; + break; + } + } + + // if output layer + for (const auto& dataLayer : outputLayers_) { + if (name == dataLayer->getName()) { + isMiddleLayer = false; + break; + } + } + + if (isMiddleLayer) { + middleLayers_.push_back(layer); + } + } } void NeuralNetwork::connect(LayerPtr agentLayer, @@ -327,6 +352,13 @@ void NeuralNetwork::onPassEnd() { } } +void NeuralNetwork::releaseOutput() { + for (auto& layer : middleLayers_) { + Argument& arg = layer->getOutput(); + arg.value.reset(); + } +} + #ifndef PADDLE_MOBILE_INFERENCE class CombinedEvaluator : public Evaluator { diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.h b/paddle/gserver/gradientmachines/NeuralNetwork.h index 6888380290074318fe7f94d168b2931e776dda47..968e198cf6608c47089d40fc98c6691f9e5bda5c 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.h +++ b/paddle/gserver/gradientmachines/NeuralNetwork.h @@ -137,6 +137,13 @@ public: /// some finish work, like convert the weight format of MKLDNNLayers void finish(); + /** + * @brief Release the middle layer's output memory. + * + * @note This function is used for memory optimization in inference. + */ + void releaseOutput(); + protected: /** * The constructor of NeuralNetwork. @@ -158,6 +165,7 @@ protected: std::vector dataLayers_; std::vector outputLayers_; + std::vector middleLayers_; static std::map dllInitMap;