cxx_api_bin.cc 4.1 KB
Newer Older
S
Superjomn 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

S
superjomn 已提交
15
#include "paddle/fluid/lite/api/cxx_api.h"
T
tensor-tang 已提交
16
#include <chrono>  // NOLINT
C
Chunwei 已提交
17
#include "paddle/fluid/lite/api/paddle_use_passes.h"
S
superjomn 已提交
18
#include "paddle/fluid/lite/core/op_registry.h"
C
Chunwei 已提交
19

S
superjomn 已提交
20 21 22
namespace paddle {
namespace lite {

23 24 25 26 27 28 29 30 31
using Time = decltype(std::chrono::high_resolution_clock::now());
Time time() { return std::chrono::high_resolution_clock::now(); }
double time_diff(Time t1, Time t2) {
  typedef std::chrono::microseconds ms;
  auto diff = t2 - t1;
  ms counter = std::chrono::duration_cast<ms>(diff);
  return counter.count() / 1000.0;
}

X
xingzhaolong 已提交
32
void Run(const char* model_dir, int repeat) {
33 34 35
#ifdef LITE_WITH_ARM
  DeviceInfo::Init();
#endif
C
Chunwei 已提交
36
  lite::Predictor predictor;
X
xingzhaolong 已提交
37 38 39 40 41
  std::vector<Place> valid_places({
      Place{TARGET(kHost), PRECISION(kFloat)},
      Place{TARGET(kARM), PRECISION(kFloat)},
      Place{TARGET(kARM), PRECISION(kInt8)},
  });
S
superjomn 已提交
42

X
xingzhaolong 已提交
43
  predictor.Build(model_dir, Place{TARGET(kARM), PRECISION(kInt8)},
S
superjomn 已提交
44 45 46
                  valid_places);

  auto* input_tensor = predictor.GetInput(0);
47
  input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
S
superjomn 已提交
48
  auto* data = input_tensor->mutable_data<float>();
49
  for (int i = 0; i < input_tensor->dims().production(); i++) {
50
    data[i] = 1;
S
superjomn 已提交
51 52
  }

53 54 55 56 57
  auto time1 = time();
  for (int i = 0; i < repeat; i++) predictor.Run();
  auto time2 = time();
  std::cout << " predict cost: " << time_diff(time1, time2) / repeat << "ms"
            << std::endl;
S
superjomn 已提交
58 59 60 61 62 63

  auto* out = predictor.GetOutput(0);
  LOG(INFO) << out << " memory size " << out->data_size();
  LOG(INFO) << "out " << out->data<float>()[0];
  LOG(INFO) << "out " << out->data<float>()[1];
  LOG(INFO) << "dims " << out->dims();
64
  LOG(INFO) << "out data size: " << out->data_size();
S
superjomn 已提交
65 66 67 68 69
}

}  // namespace lite
}  // namespace paddle

S
Superjomn 已提交
70
int main(int argc, char** argv) {
X
xingzhaolong 已提交
71 72
  CHECK_EQ(argc, 3) << "usage: ./cmd <model_dir> <repeat>";
  paddle::lite::Run(argv[1], std::stoi(argv[2]));
S
superjomn 已提交
73 74 75 76 77 78 79 80 81

  return 0;
}

USE_LITE_OP(mul);
USE_LITE_OP(fc);
USE_LITE_OP(scale);
USE_LITE_OP(feed);
USE_LITE_OP(fetch);
82 83
USE_LITE_OP(io_copy);

N
nhzlx 已提交
84
USE_LITE_OP(conv2d);
85
USE_LITE_OP(batch_norm);
86 87 88 89 90
USE_LITE_OP(relu);
USE_LITE_OP(depthwise_conv2d);
USE_LITE_OP(pool2d);
USE_LITE_OP(elementwise_add);
USE_LITE_OP(softmax);
N
nhzlx 已提交
91 92
USE_LITE_OP(fake_quantize_moving_average_abs_max);
USE_LITE_OP(fake_dequantize_max_abs);
93

94 95
USE_LITE_KERNEL(feed, kHost, kAny, kAny, def);
USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def);
X
xingzhaolong 已提交
96
USE_LITE_OP(calib);
97 98

#ifdef LITE_WITH_ARM
99
USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def);
X
xingzhaolong 已提交
100 101
USE_LITE_KERNEL(fc, kARM, kInt8, kNCHW, int8out);
USE_LITE_KERNEL(fc, kARM, kInt8, kNCHW, fp32out);
102 103
USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def);
104

N
nhzlx 已提交
105
USE_LITE_KERNEL(conv2d, kARM, kFloat, kNCHW, def);
X
xingzhaolong 已提交
106 107
USE_LITE_KERNEL(conv2d, kARM, kInt8, kNCHW, int8_out);
USE_LITE_KERNEL(conv2d, kARM, kInt8, kNCHW, fp32_out);
108 109
USE_LITE_KERNEL(batch_norm, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(relu, kARM, kFloat, kNCHW, def);
N
nhzlx 已提交
110
USE_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW, def);
111 112
USE_LITE_KERNEL(pool2d, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(elementwise_add, kARM, kFloat, kNCHW, def);
113
USE_LITE_KERNEL(softmax, kARM, kFloat, kNCHW, def);
114

X
xingzhaolong 已提交
115 116 117
USE_LITE_KERNEL(calib, kARM, kInt8, kNCHW, fp32_to_int8);
USE_LITE_KERNEL(calib, kARM, kInt8, kNCHW, int8_to_fp32);

118 119 120
// USE_LITE_KERNEL(feed, kARM, kAny, kAny, def);
// USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def);
#endif  // LITE_WITH_ARM
S
superjomn 已提交
121 122 123 124 125 126

#ifdef LITE_WITH_CUDA
USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def);
USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, host_to_device);
USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, device_to_host);
#endif