未验证 提交 ac8369f3 编写于 作者: R Ray Liu 提交者: GitHub

Merge pull request #1076 from codeWorm2015/opencl

 add debug code
...@@ -498,3 +498,15 @@ float half2float(half_t h) { ...@@ -498,3 +498,15 @@ float half2float(half_t h) {
exponenttable[h >> 10]; exponenttable[h >> 10];
return *reinterpret_cast<float*>(&v); return *reinterpret_cast<float*>(&v);
} }
void FloatArray2HalfArray(float *f_array, half_t *h_array, int count) {
for (int i = 0; i < count; ++i) {
h_array[i] = float2half(f_array[i]);
}
}
void HalfArray2FloatArray(half_t *h_array, float *f_array, int count) {
for (int i = 0; i < count; ++i) {
f_array[i] = float2half(h_array[i]);
}
}
...@@ -18,4 +18,9 @@ limitations under the License. */ ...@@ -18,4 +18,9 @@ limitations under the License. */
typedef uint16_t half_t; typedef uint16_t half_t;
half_t float2half(float f); half_t float2half(float f);
float half2float(half_t h); float half2float(half_t h);
void FloatArray2HalfArray(float *f_array, half_t *h_array, int count);
void HalfArray2FloatArray(half_t *h_array, float *f_array, int count);
...@@ -226,5 +226,9 @@ void TensorToCLImage(Tensor *tensor, CLImage *image); ...@@ -226,5 +226,9 @@ void TensorToCLImage(Tensor *tensor, CLImage *image);
void CLImageToTensor(CLImage *image, Tensor *tensor); void CLImageToTensor(CLImage *image, Tensor *tensor);
#ifdef PADDLE_MOBILE_DEBUG
Print &operator<<(Print &printer, const CLImage &image);
#endif
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -414,7 +414,7 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict( ...@@ -414,7 +414,7 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
} }
} }
#else #else
for (int i = 0; i < ops.size(); i++) { for (int i = 0; i < 1; i++) {
#ifdef PADDLE_MOBILE_PROFILE #ifdef PADDLE_MOBILE_PROFILE
struct timespec ts; struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts); clock_gettime(CLOCK_MONOTONIC, &ts);
...@@ -428,6 +428,9 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict( ...@@ -428,6 +428,9 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
#endif #endif
} }
#endif #endif
DLOG << " predict return nullptr";
return nullptr;
auto last_op = ops.rbegin(); auto last_op = ops.rbegin();
auto output_map = (*last_op)->Outputs(); auto output_map = (*last_op)->Outputs();
std::vector<std::string> out_keys = (*last_op)->GetOutKeys(); std::vector<std::string> out_keys = (*last_op)->GetOutKeys();
...@@ -647,13 +650,18 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict( ...@@ -647,13 +650,18 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
const std::vector<Ptype> &input, const std::vector<int64_t> &dims) { const std::vector<Ptype> &input, const std::vector<int64_t> &dims) {
framework::Tensor tensor(input, framework::make_ddim(dims)); framework::Tensor tensor(input, framework::make_ddim(dims));
std::shared_ptr<framework::Tensor> output_tensor = Predict(tensor, 0); std::shared_ptr<framework::Tensor> output_tensor = Predict(tensor, 0);
Executor<Dtype, P>::Ptype *output_ptr = if (output_tensor != nullptr) {
output_tensor->data<typename Executor<Dtype, P>::Ptype>(); Executor<Dtype, P>::Ptype *output_ptr =
std::vector<typename Executor<Dtype, P>::Ptype> result_vector; output_tensor->data<typename Executor<Dtype, P>::Ptype>();
for (int j = 0; j < output_tensor->numel(); ++j) { std::vector<typename Executor<Dtype, P>::Ptype> result_vector;
result_vector.push_back(output_ptr[j]); for (int j = 0; j < output_tensor->numel(); ++j) {
result_vector.push_back(output_ptr[j]);
}
return result_vector;
} else {
DLOG << "return empty vector";
return {};
} }
return result_vector;
} }
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
......
...@@ -57,7 +57,9 @@ void OperatorBase<Dtype>::CheckAllInputOutputSet() const {} ...@@ -57,7 +57,9 @@ void OperatorBase<Dtype>::CheckAllInputOutputSet() const {}
template <typename Dtype> template <typename Dtype>
void OperatorBase<Dtype>::Run() { void OperatorBase<Dtype>::Run() {
DLOG << " begin run " << type_;
RunImpl(); RunImpl();
DLOG << " end run " << type_;
#ifdef PADDLE_MOBILE_DEBUG #ifdef PADDLE_MOBILE_DEBUG
DLOG << "-------------" << type_ << "----------------------------"; DLOG << "-------------" << type_ << "----------------------------";
vector<string> input_keys = GetInputKeys(); vector<string> input_keys = GetInputKeys();
...@@ -100,8 +102,9 @@ void OperatorBase<Dtype>::Run() { ...@@ -100,8 +102,9 @@ void OperatorBase<Dtype>::Run() {
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
if (type_ == "fetch") { if (type_ == "fetch") {
Tensor *tensor = vari->template GetMutable<framework::LoDTensor>(); Tensor *tensor = vari->template GetMutable<framework::LoDTensor>();
if (tensor) if (tensor){
DLOG << type_ << " output- " << key << "=" << tensor->dims(); DLOG << type_ << " output- " << key << "=" << tensor->dims();
}
} else { } else {
CLImage *cl_image = vari->template GetMutable<framework::CLImage>(); CLImage *cl_image = vari->template GetMutable<framework::CLImage>();
// cl_command_queue commandQueue = // cl_command_queue commandQueue =
......
...@@ -948,6 +948,7 @@ class FetchParam : public OpParam { ...@@ -948,6 +948,7 @@ class FetchParam : public OpParam {
input_x_ = InputXFrom<GType>(inputs, scope); input_x_ = InputXFrom<GType>(inputs, scope);
out_ = OutFrom(outputs, scope); out_ = OutFrom(outputs, scope);
} }
const RType *InputX() const { return input_x_; } const RType *InputX() const { return input_x_; }
Tensor *Out() const { return out_; } Tensor *Out() const { return out_; }
......
...@@ -34,23 +34,23 @@ int main() { ...@@ -34,23 +34,23 @@ int main() {
GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims); GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
auto vec_result = paddle_mobile.Predict(input, dims); auto vec_result = paddle_mobile.Predict(input, dims);
std::vector<float>::iterator biggest = // std::vector<float>::iterator biggest =
std::max_element(std::begin(vec_result), std::end(vec_result)); // std::max_element(std::begin(vec_result), std::end(vec_result));
std::cout << " Max element is " << *biggest << " at position " // std::cout << " Max element is " << *biggest << " at position "
<< std::distance(std::begin(vec_result), biggest) << std::endl; // << std::distance(std::begin(vec_result), biggest) << std::endl;
// 预热十次
for (int i = 0; i < 10; ++i) { // for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims); // auto vec_result = paddle_mobile.Predict(input, dims);
} // }
auto time3 = paddle_mobile::time(); // auto time3 = paddle_mobile::time();
for (int i = 0; i < 10; ++i) { // for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims); // auto vec_result = paddle_mobile.Predict(input, dims);
} // }
DLOG << vec_result; // DLOG << vec_result;
auto time4 = paddle_mobile::time(); // auto time4 = paddle_mobile::time();
std::cout << "predict cost :" << paddle_mobile::time_diff(time3, time4) / 10 // std::cout << "predict cost :" << paddle_mobile::time_diff(time3, time4) / 10 << "ms"
<< "ms" << std::endl; // << std::endl;
} }
std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana " std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册