提交 7389dd09 编写于 作者: Y yangfei

add some function

上级 fea3f17d
...@@ -28,4 +28,4 @@ inline double time_diff(Time t1, Time t2) { ...@@ -28,4 +28,4 @@ inline double time_diff(Time t1, Time t2) {
return counter.count() / 1000.0; return counter.count() / 1000.0;
} }
} } // namespace paddle_mobile
...@@ -118,7 +118,20 @@ class CLImage { ...@@ -118,7 +118,20 @@ class CLImage {
cl_image_format cf = {.image_channel_order = CL_RGBA, cl_image_format cf = {.image_channel_order = CL_RGBA,
.image_channel_data_type = CL_HALF_FLOAT}; .image_channel_data_type = CL_HALF_FLOAT};
// NCHW -> [W * (C+3)/4, H * N] // NCHW -> [W * (C+3)/4, H * N]
DLOG << tensor_dims_; tensor_dims_ = dim;
if (tensor_data) {
tensor_data_ = tensor_data;
} else {
int numel = 1;
for (int i = 0; i < dim.size(); i++) {
numel *= dim[i];
}
tensor_data_ = static_cast<float *>(
paddle_mobile::memory::Alloc(sizeof(float) * numel));
for (int i = 0; i < numel; i++) {
tensor_data_[i] = 0;
}
}
size_t N, C, H, W; size_t N, C, H, W;
if (tensor_dims_.size() == 4) { if (tensor_dims_.size() == 4) {
N = tensor_dims_[0]; N = tensor_dims_[0];
......
...@@ -936,7 +936,7 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() { ...@@ -936,7 +936,7 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() {
cl_image->SetTensorData(tensorInput, ddim); cl_image->SetTensorData(tensorInput, ddim);
delete origin_data; delete origin_data;
paddle_mobile::memory::Free(tensorInput); // paddle_mobile::memory::Free(tensorInput);
} else { } else {
if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
auto cl_image = var->template GetMutable<framework::CLImage>(); auto cl_image = var->template GetMutable<framework::CLImage>();
......
...@@ -72,13 +72,16 @@ void OperatorBase<Dtype>::Run() { ...@@ -72,13 +72,16 @@ void OperatorBase<Dtype>::Run() {
if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor; if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor;
} else { } else {
CLImage *cl_image = vari->template GetMutable<framework::CLImage>(); CLImage *cl_image = vari->template GetMutable<framework::CLImage>();
// cl_command_queue commandQueue = // cl_command_queue commandQueue =
// scope_->GetCLScpoe()->CommandQueue(); Tensor *tmp ; // scope_->GetCLScpoe()->CommandQueue(); Tensor
// CLImageToTensor(cl_image,tmp,commandQueue); // *tmp ;
// tmp->Resize(cl_image->dims()); // CLImageToTensor(cl_image,tmp,commandQueue);
// tmp->Resize(cl_image->dims());
const float *input = cl_image->data<float>();
if (cl_image) { if (cl_image) {
// DLOG<<type_<<" input- "<<key<<"="<<*tmp;
DLOG << type_ << " input- " << key << "=" << cl_image->dims(); DLOG << type_ << " input- " << key << "=" << cl_image->dims();
// if(input)
// DLOG<<type_<<" input- "<<key<<"="<<*input;
} }
} }
...@@ -95,15 +98,24 @@ void OperatorBase<Dtype>::Run() { ...@@ -95,15 +98,24 @@ void OperatorBase<Dtype>::Run() {
auto vari = scope_->FindVar(var_vec_out[i]); auto vari = scope_->FindVar(var_vec_out[i]);
if (vari->IsInitialized()) { if (vari->IsInitialized()) {
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
CLImage *cl_image = vari->template GetMutable<framework::CLImage>(); if (type_ == "fetch") {
// cl_command_queue commandQueue = Tensor *tensor = vari->template GetMutable<framework::LoDTensor>();
// scope_->GetCLScpoe()->CommandQueue(); Tensor *tmp ; if (tensor)
// CLImageToTensor(cl_image,tmp,commandQueue); DLOG << type_ << " output- " << key << "=" << tensor->dims();
// tmp->Resize(cl_image->dims()); } else {
if (cl_image) { CLImage *cl_image = vari->template GetMutable<framework::CLImage>();
// DLOG<<type_<<" output- "<<key<<"="<<*tmp; // cl_command_queue commandQueue =
DLOG << type_ << " output- " << key << "=" << cl_image->dims(); // scope_->GetCLScpoe()->CommandQueue(); Tensor *tmp ;
// CLImageToTensor(cl_image,tmp,commandQueue);
// tmp->Resize(cl_image->dims());
if (cl_image) {
const float *output = cl_image->data<float>();
DLOG << type_ << " output- " << key << "=" << cl_image->dims();
// if(output)
// DLOG<<type_<<" output- "<<key<<"="<<*output;
}
} }
#else #else
Tensor *tensor = vari->template GetMutable<framework::LoDTensor>(); Tensor *tensor = vari->template GetMutable<framework::LoDTensor>();
if (tensor) DLOG << type_ << " output- " << key << "=" << *tensor; if (tensor) DLOG << type_ << " output- " << key << "=" << *tensor;
......
...@@ -27,17 +27,17 @@ bool FeedKernel<GPU_CL, float>::Init(FeedParam<GPU_CL> *param) { ...@@ -27,17 +27,17 @@ bool FeedKernel<GPU_CL, float>::Init(FeedParam<GPU_CL> *param) {
template <> template <>
void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) { void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
DLOG << "feed_kernel";
auto kernel = this->cl_helper_.KernelAt(0); auto kernel = this->cl_helper_.KernelAt(0);
cl_int status; cl_int status;
auto output = param.Out(); auto output = param.Out();
auto input = param.InputX(); const Tensor *input = param.InputX();
DLOG << " input: " << input; const float *input_data = nullptr;
input_data = input->data<float>();
const float *input_data = input->data<float>();
cl_mem cl_image = output->GetCLImage(); cl_mem cl_image = output->GetCLImage();
int height = output->dims()[2]; int height = output->dims()[2];
int width = output->dims()[3]; int width = output->dims()[3];
DLOG << output->dims();
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_data); status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_data);
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &cl_image); status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &cl_image);
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &width); status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &width);
......
...@@ -30,7 +30,8 @@ int main() { ...@@ -30,7 +30,8 @@ int main() {
auto time1 = time(); auto time1 = time();
if (paddle_mobile.Load(g_googlenet, optimize)) { if (paddle_mobile.Load(g_googlenet, optimize)) {
auto time2 = paddle_mobile::time(); auto time2 = paddle_mobile::time();
std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms" << std::endl; std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms"
<< std::endl;
std::vector<float> input; std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224}; std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224, &input, dims); GetInput<float>(g_test_image_1x3x224x224, &input, dims);
......
...@@ -26,7 +26,8 @@ int main() { ...@@ -26,7 +26,8 @@ int main() {
auto isok = paddle_mobile.Load(g_mobilenet, false); auto isok = paddle_mobile.Load(g_mobilenet, false);
if (isok) { if (isok) {
auto time2 = paddle_mobile::time(); auto time2 = paddle_mobile::time();
std::cout << "load cost :" << paddle_mobile::time_diff(time1, time1) << "ms" << std::endl; std::cout << "load cost :" << paddle_mobile::time_diff(time1, time1) << "ms"
<< std::endl;
std::vector<float> input; std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224}; std::vector<int64_t> dims{1, 3, 224, 224};
...@@ -48,8 +49,8 @@ int main() { ...@@ -48,8 +49,8 @@ int main() {
} }
DLOG << vec_result; DLOG << vec_result;
auto time4 = paddle_mobile::time(); auto time4 = paddle_mobile::time();
std::cout << "predict cost :" << paddle_mobile::time_diff(time3, time4) / 10 << "ms" std::cout << "predict cost :" << paddle_mobile::time_diff(time3, time4) / 10
<< std::endl; << "ms" << std::endl;
} }
std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana " std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册