未验证 提交 e8a96a8e 编写于 作者: H hedaoyuan 提交者: GitHub

Merge pull request #7159 from hedaoyuan/inference

Release the middle layer's output memory
...@@ -168,3 +168,13 @@ paddle_error paddle_gradient_machine_get_layer_output( ...@@ -168,3 +168,13 @@ paddle_error paddle_gradient_machine_get_layer_output(
out->args.push_back(layerOutput); out->args.push_back(layerOutput);
return kPD_NO_ERROR; return kPD_NO_ERROR;
} }
paddle_error paddle_gradient_machine_release_layer_output(
paddle_gradient_machine machine) {
auto m = cast(machine);
if (m == nullptr || m->machine == nullptr) {
return kPD_NULLPTR;
}
m->machine->releaseOutput();
return kPD_NO_ERROR;
}
...@@ -113,6 +113,14 @@ paddle_gradient_machine_get_layer_output(paddle_gradient_machine machine, ...@@ -113,6 +113,14 @@ paddle_gradient_machine_get_layer_output(paddle_gradient_machine machine,
const char* layerName, const char* layerName,
paddle_arguments args); paddle_arguments args);
/**
* @brief Release the middle layer's output memory of the gradient machine.
* @param [in] gradient machine that have run a inference
* @return paddle_error
*/
PD_API paddle_error
paddle_gradient_machine_release_layer_output(paddle_gradient_machine machine);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
......
...@@ -233,6 +233,13 @@ public: ...@@ -233,6 +233,13 @@ public:
(void)numProcessed; (void)numProcessed;
} }
/**
* @brief Release the middle layer's output memory.
*
* @note This function is used for memory optimization in inference.
*/
virtual void releaseOutput() {}
protected: protected:
virtual void onLoadParameter() {} virtual void onLoadParameter() {}
......
...@@ -187,6 +187,31 @@ void NeuralNetwork::init(const ModelConfig& config, ...@@ -187,6 +187,31 @@ void NeuralNetwork::init(const ModelConfig& config,
CHECK(it != layerMap_.end()); CHECK(it != layerMap_.end());
outputLayers_.push_back(it->second); outputLayers_.push_back(it->second);
} }
for (const auto& layer : layers_) {
const auto& name = layer->getName();
bool isMiddleLayer = true;
// if data layer
for (const auto& dataLayer : dataLayers_) {
if (name == dataLayer->getName()) {
isMiddleLayer = false;
break;
}
}
// if output layer
for (const auto& dataLayer : outputLayers_) {
if (name == dataLayer->getName()) {
isMiddleLayer = false;
break;
}
}
if (isMiddleLayer) {
middleLayers_.push_back(layer);
}
}
} }
void NeuralNetwork::connect(LayerPtr agentLayer, void NeuralNetwork::connect(LayerPtr agentLayer,
...@@ -327,6 +352,13 @@ void NeuralNetwork::onPassEnd() { ...@@ -327,6 +352,13 @@ void NeuralNetwork::onPassEnd() {
} }
} }
void NeuralNetwork::releaseOutput() {
for (auto& layer : middleLayers_) {
Argument& arg = layer->getOutput();
arg.value.reset();
}
}
#ifndef PADDLE_MOBILE_INFERENCE #ifndef PADDLE_MOBILE_INFERENCE
class CombinedEvaluator : public Evaluator { class CombinedEvaluator : public Evaluator {
......
...@@ -137,6 +137,13 @@ public: ...@@ -137,6 +137,13 @@ public:
/// some finish work, like convert the weight format of MKLDNNLayers /// some finish work, like convert the weight format of MKLDNNLayers
void finish(); void finish();
/**
* @brief Release the middle layer's output memory.
*
* @note This function is used for memory optimization in inference.
*/
void releaseOutput();
protected: protected:
/** /**
* The constructor of NeuralNetwork. * The constructor of NeuralNetwork.
...@@ -158,6 +165,7 @@ protected: ...@@ -158,6 +165,7 @@ protected:
std::vector<DataLayerPtr> dataLayers_; std::vector<DataLayerPtr> dataLayers_;
std::vector<LayerPtr> outputLayers_; std::vector<LayerPtr> outputLayers_;
std::vector<LayerPtr> middleLayers_;
static std::map<std::string, bool> dllInitMap; static std::map<std::string, bool> dllInitMap;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册