提交 8ab543ff 编写于 作者: Z zp7 提交者: Yanzhan Yang

fix gpu load memory leak,test=develop (#2027)

上级 e0b4b5c9
...@@ -25,6 +25,35 @@ bool DensityPriorBoxKernel<GPU_CL, float>::Init( ...@@ -25,6 +25,35 @@ bool DensityPriorBoxKernel<GPU_CL, float>::Init(
*param) { *param) {
this->cl_helper_.AddKernel("density_prior_box", this->cl_helper_.AddKernel("density_prior_box",
"density_prior_box_kernel.cl"); "density_prior_box_kernel.cl");
vector<float> fixed_sizes = param->FixedSizes();
vector<float> fixed_ratios = param->FixedRatios();
vector<int> densities = param->Densities();
vector<float> variances = param->Variances();
int fix_ratio_size = fixed_ratios.size();
int total_size = densities.size() + fixed_sizes.size() + fix_ratio_size;
float *densities_data = new float[total_size];
for (int i = 0; i < densities.size(); ++i) {
float density = densities[i];
densities_data[i] = density;
}
for (int k = 0; k < fixed_sizes.size(); ++k) {
densities_data[k + densities.size()] = fixed_sizes[k];
}
for (int j = 0; j < fixed_ratios.size(); ++j) {
float sqrt_ratios = sqrt(fixed_ratios[j]);
densities_data[j + densities.size() + fixed_sizes.size()] = sqrt_ratios;
}
framework::CLImage *new_density = new framework::CLImage();
new_density->SetTensorData(densities_data, {1, 1, 1, total_size});
new_density->InitCLImage(this->cl_helper_.CLContext(),
this->cl_helper_.CLCommandQueue());
param->setNewDensity(new_density);
delete[](densities_data);
return true; return true;
} }
...@@ -39,6 +68,7 @@ void DensityPriorBoxKernel<GPU_CL, float>::Compute( ...@@ -39,6 +68,7 @@ void DensityPriorBoxKernel<GPU_CL, float>::Compute(
auto output_boxes = param.OutputBoxes()->GetCLImage(); auto output_boxes = param.OutputBoxes()->GetCLImage();
auto output_var = param.OutputVariances()->GetCLImage(); auto output_var = param.OutputVariances()->GetCLImage();
auto new_deensity = param.getNewDensity()->GetCLImage();
float step_w = param.StepW(); float step_w = param.StepW();
float step_h = param.StepH(); float step_h = param.StepH();
...@@ -73,43 +103,17 @@ void DensityPriorBoxKernel<GPU_CL, float>::Compute( ...@@ -73,43 +103,17 @@ void DensityPriorBoxKernel<GPU_CL, float>::Compute(
auto default_work = this->cl_helper_.DefaultWorkSize(*param.OutputBoxes()); auto default_work = this->cl_helper_.DefaultWorkSize(*param.OutputBoxes());
float *densities_data[densities.size() + fixed_sizes.size() + fix_ratio_size];
int status;
for (int i = 0; i < densities.size(); ++i) {
float density = densities[i];
densities_data[i] = &density;
}
for (int k = 0; k < fixed_sizes.size(); ++k) {
densities_data[k + densities.size()] = &fixed_sizes[k];
}
for (int j = 0; j < fixed_ratios.size(); ++j) {
float sqrt_ratios = sqrt(fixed_ratios[j]);
densities_data[j + densities.size() + fixed_sizes.size()] = &sqrt_ratios;
}
cl_mem densities_memobj = clCreateBuffer(
this->cl_helper_.CLContext(), CL_MEM_READ_WRITE,
sizeof(float) * (densities.size() * 2 + fix_ratio_size), NULL, &status);
status = clEnqueueWriteBuffer(
this->cl_helper_.CLCommandQueue(), densities_memobj, CL_FALSE, 0,
(densities.size() * 2 + fix_ratio_size) * sizeof(float), densities_data,
0, NULL, NULL);
CL_CHECK_ERRORS(status);
float variances0 = variances[0]; float variances0 = variances[0];
float variances1 = variances[1]; float variances1 = variances[1];
float variances2 = variances[2]; float variances2 = variances[2];
float variances3 = variances[3]; float variances3 = variances[3];
cl_int status;
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &output_boxes); status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &output_boxes);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output_var); status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output_var);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &densities_memobj); status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &new_deensity);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 3, sizeof(float), &step_h); status = clSetKernelArg(kernel, 3, sizeof(float), &step_h);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
......
...@@ -184,6 +184,8 @@ void Transpose2Compute(const Transpose2Param<GPU_CL> &param, cl_context context, ...@@ -184,6 +184,8 @@ void Transpose2Compute(const Transpose2Param<GPU_CL> &param, cl_context context,
output->InitEmptyImage(context, commandQueue, output_tensor->dims()); output->InitEmptyImage(context, commandQueue, output_tensor->dims());
framework::TensorToCLImage(output_tensor, output, context, commandQueue, framework::TensorToCLImage(output_tensor, output, context, commandQueue,
kernel1); kernel1);
delete (input_tensor);
delete (output_tensor);
} }
template <> template <>
......
...@@ -77,6 +77,12 @@ class DensityPriorBoxParam : public OpParam { ...@@ -77,6 +77,12 @@ class DensityPriorBoxParam : public OpParam {
densities_ = GetAttr<vector<int>>("densities", attrs); densities_ = GetAttr<vector<int>>("densities", attrs);
} }
~DensityPriorBoxParam() {
if (new_density) {
delete new_density;
}
}
const GType *Input() const { return input_; } const GType *Input() const { return input_; }
const GType *InputImage() const { return input_image_; } const GType *InputImage() const { return input_image_; }
GType *OutputBoxes() const { return output_boxes_; } GType *OutputBoxes() const { return output_boxes_; }
...@@ -90,6 +96,8 @@ class DensityPriorBoxParam : public OpParam { ...@@ -90,6 +96,8 @@ class DensityPriorBoxParam : public OpParam {
const vector<float> &FixedRatios() const { return fixed_ratios_; } const vector<float> &FixedRatios() const { return fixed_ratios_; }
const vector<int> &Densities() const { return densities_; } const vector<int> &Densities() const { return densities_; }
const vector<float> &Variances() const { return variances_; } const vector<float> &Variances() const { return variances_; }
GType *getNewDensity() const { return new_density; }
void setNewDensity(GType *newDensity) { new_density = newDensity; }
public: public:
GType *input_; GType *input_;
...@@ -105,6 +113,7 @@ class DensityPriorBoxParam : public OpParam { ...@@ -105,6 +113,7 @@ class DensityPriorBoxParam : public OpParam {
vector<float> fixed_ratios_; vector<float> fixed_ratios_;
vector<int> densities_; vector<int> densities_;
vector<float> variances_; vector<float> variances_;
GType *new_density;
}; };
DECLARE_KERNEL(DensityPriorBox, DensityPriorBoxParam); DECLARE_KERNEL(DensityPriorBox, DensityPriorBoxParam);
......
...@@ -850,6 +850,15 @@ class BatchNormParam : public OpParam { ...@@ -850,6 +850,15 @@ class BatchNormParam : public OpParam {
// is_test_ = GetAttr<bool>("is_test", attrs); // is_test_ = GetAttr<bool>("is_test", attrs);
} }
~BatchNormParam() {
if (new_bias_) {
delete new_bias_;
}
if (new_scale_) {
delete new_scale_;
}
}
const GType *InputX() const { return input_x_; } const GType *InputX() const { return input_x_; }
GType *OutputY() const { return output_y_; } GType *OutputY() const { return output_y_; }
...@@ -2076,6 +2085,16 @@ class FusionConvAddBNReluParam : public ConvParam<Dtype> { ...@@ -2076,6 +2085,16 @@ class FusionConvAddBNReluParam : public ConvParam<Dtype> {
momentum_ = OpParam::GetAttr<float>("momentum", attrs); momentum_ = OpParam::GetAttr<float>("momentum", attrs);
this->output_ = OpParam::OutFrom<GType>(outputs, *scope); this->output_ = OpParam::OutFrom<GType>(outputs, *scope);
} }
~FusionConvAddBNReluParam() {
if (new_bias_) {
delete new_bias_;
}
if (new_scale_) {
delete new_scale_;
}
}
GType *Bias() const { return bias_; } GType *Bias() const { return bias_; }
const int &Axis() const { return axis_; } const int &Axis() const { return axis_; }
...@@ -2143,6 +2162,15 @@ class FusionConvBNAddReluParam : public ConvParam<Dtype> { ...@@ -2143,6 +2162,15 @@ class FusionConvBNAddReluParam : public ConvParam<Dtype> {
} }
this->output_ = OpParam::OutFrom<GType>(outputs, *scope); this->output_ = OpParam::OutFrom<GType>(outputs, *scope);
} }
~FusionConvBNAddReluParam() {
if (new_bias_) {
delete new_bias_;
}
if (new_scale_) {
delete new_scale_;
}
}
GType *Bias() const { return bias_; } GType *Bias() const { return bias_; }
const int &Axis() const { return axis_; } const int &Axis() const { return axis_; }
...@@ -2315,6 +2343,15 @@ class FusionDWConvBNReluParam : public ConvParam<Dtype> { ...@@ -2315,6 +2343,15 @@ class FusionDWConvBNReluParam : public ConvParam<Dtype> {
this->output_ = OpParam::OutFrom<GType>(outputs, *scope); this->output_ = OpParam::OutFrom<GType>(outputs, *scope);
} }
~FusionDWConvBNReluParam() {
if (new_bias_) {
delete new_bias_;
}
if (new_scale_) {
delete new_scale_;
}
}
const GType *InputBias() const { return input_bias_; } const GType *InputBias() const { return input_bias_; }
const GType *InputMean() const { return input_mean_; } const GType *InputMean() const { return input_mean_; }
...@@ -2384,6 +2421,15 @@ class FusionConvBNReluParam : public ConvParam<Dtype> { ...@@ -2384,6 +2421,15 @@ class FusionConvBNReluParam : public ConvParam<Dtype> {
this->output_ = OpParam::OutFrom<GType>(outputs, *scope); this->output_ = OpParam::OutFrom<GType>(outputs, *scope);
} }
~FusionConvBNReluParam() {
if (new_bias_) {
delete new_bias_;
}
if (new_scale_) {
delete new_scale_;
}
}
const GType *InputBias() const { return input_bias_; } const GType *InputBias() const { return input_bias_; }
const GType *InputMean() const { return input_mean_; } const GType *InputMean() const { return input_mean_; }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册