diff --git a/paddle/capi/gradient_machine.cpp b/paddle/capi/gradient_machine.cpp index 482b51e8a8430863c3e13df2298f6979d3959461..1f0e033c5b7ab713f1747d58f4fcdaad001f0715 100644 --- a/paddle/capi/gradient_machine.cpp +++ b/paddle/capi/gradient_machine.cpp @@ -168,3 +168,13 @@ paddle_error paddle_gradient_machine_get_layer_output( out->args.push_back(layerOutput); return kPD_NO_ERROR; } + +paddle_error paddle_gradient_machine_release_layer_output( + paddle_gradient_machine machine) { + auto m = cast(machine); + if (m == nullptr || m->machine == nullptr) { + return kPD_NULLPTR; + } + m->machine->releaseOutput(); + return kPD_NO_ERROR; +} diff --git a/paddle/capi/gradient_machine.h b/paddle/capi/gradient_machine.h index 28eeb23e3bbdd4cc22a25c14170bf56c294f8cd7..7e37dea00b27b6cb955486b4210c1373decbcfa5 100644 --- a/paddle/capi/gradient_machine.h +++ b/paddle/capi/gradient_machine.h @@ -113,6 +113,14 @@ paddle_gradient_machine_get_layer_output(paddle_gradient_machine machine, const char* layerName, paddle_arguments args); +/** + * @brief Release the middle layer's output memory of the gradient machine. + * @param [in] gradient machine that have run a inference + * @return paddle_error + */ +PD_API paddle_error +paddle_gradient_machine_release_layer_output(paddle_gradient_machine machine); + #ifdef __cplusplus } #endif diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index ebfe0573cfdbfb2ef54a29b038e8b85356cc6c27..4ab54a5022a3d30215c7557bca2e69c4d011fc5f 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -233,6 +233,13 @@ public: (void)numProcessed; } + /** + * @brief Release the middle layer's output memory. + * + * @note This function is used for memory optimization in inference. + */ + virtual void releaseOutput() {} + protected: virtual void onLoadParameter() {} diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 68bf37d59db65ddc8096e2db3391be25c37b57e6..1f2aa61b6f86eebf7b002f1e48aa56d1d14d4820 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -187,6 +187,31 @@ void NeuralNetwork::init(const ModelConfig& config, CHECK(it != layerMap_.end()); outputLayers_.push_back(it->second); } + + for (const auto& layer : layers_) { + const auto& name = layer->getName(); + bool isMiddleLayer = true; + + // if data layer + for (const auto& dataLayer : dataLayers_) { + if (name == dataLayer->getName()) { + isMiddleLayer = false; + break; + } + } + + // if output layer + for (const auto& dataLayer : outputLayers_) { + if (name == dataLayer->getName()) { + isMiddleLayer = false; + break; + } + } + + if (isMiddleLayer) { + middleLayers_.push_back(layer); + } + } } void NeuralNetwork::connect(LayerPtr agentLayer, @@ -327,6 +352,13 @@ void NeuralNetwork::onPassEnd() { } } +void NeuralNetwork::releaseOutput() { + for (auto& layer : middleLayers_) { + Argument& arg = layer->getOutput(); + arg.value.reset(); + } +} + #ifndef PADDLE_MOBILE_INFERENCE class CombinedEvaluator : public Evaluator { diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.h b/paddle/gserver/gradientmachines/NeuralNetwork.h index 6888380290074318fe7f94d168b2931e776dda47..968e198cf6608c47089d40fc98c6691f9e5bda5c 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.h +++ b/paddle/gserver/gradientmachines/NeuralNetwork.h @@ -137,6 +137,13 @@ public: /// some finish work, like convert the weight format of MKLDNNLayers void finish(); + /** + * @brief Release the middle layer's output memory. + * + * @note This function is used for memory optimization in inference. + */ + void releaseOutput(); + protected: /** * The constructor of NeuralNetwork. @@ -158,6 +165,7 @@ protected: std::vector dataLayers_; std::vector outputLayers_; + std::vector middleLayers_; static std::map dllInitMap;