提交 12c3af21 编写于 作者: L Liangliang He

Sync mace changes and add latency logging

上级 6bef90cd
...@@ -15,11 +15,10 @@ cc_binary( ...@@ -15,11 +15,10 @@ cc_binary(
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
"//codegen:generated_models", "//codegen:generated_models",
"@mace//:mace_headers",
"//external:gflags_nothreads", "//external:gflags_nothreads",
] + if_hexagon_enabled([ ] + if_hexagon_enabled([
"//lib/hexagon:hexagon", "//lib/hexagon:hexagon",
])+ if_production_mode([ ]) + if_production_mode([
"@mace//:mace_prod", "@mace//:mace_prod",
"//codegen:generated_opencl_prod", "//codegen:generated_opencl_prod",
"//codegen:generated_tuning_params", "//codegen:generated_tuning_params",
......
...@@ -14,17 +14,19 @@ ...@@ -14,17 +14,19 @@
* --model_data_file=model_data.data \ * --model_data_file=model_data.data \
* --device=OPENCL * --device=OPENCL
*/ */
#include <cstdlib>
#include <fstream>
#include <malloc.h> #include <malloc.h>
#include <numeric>
#include <iostream>
#include <stdint.h> #include <stdint.h>
#include <sys/time.h> #include <sys/time.h>
#include <time.h> #include <time.h>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <numeric>
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "mace/core/public/mace.h" #include "mace/public/mace.h"
#include "mace/utils/env_time.h"
#include "mace/utils/logging.h"
using namespace std; using namespace std;
using namespace mace; using namespace mace;
...@@ -32,22 +34,16 @@ using namespace mace; ...@@ -32,22 +34,16 @@ using namespace mace;
namespace mace { namespace mace {
namespace MACE_MODEL_TAG { namespace MACE_MODEL_TAG {
extern unsigned char *LoadModelData(const char *model_data_file); extern const unsigned char *LoadModelData(const char *model_data_file);
extern void UnloadModelData(unsigned char *model_data); extern void UnloadModelData(const unsigned char *model_data);
extern NetDef CreateNet(const unsigned char *model_data); extern NetDef CreateNet(const unsigned char *model_data);
extern const std::string ModelChecksum(); extern const std::string ModelChecksum();
} } // namespace MACE_MODEL_TAG
} } // namespace mace
inline int64_t NowMicros() {
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
}
void ParseShape(const string &str, vector<int64_t> *shape) { void ParseShape(const string &str, vector<int64_t> *shape) {
string tmp = str; string tmp = str;
...@@ -80,56 +76,46 @@ DeviceType ParseDeviceType(const string &device_str) { ...@@ -80,56 +76,46 @@ DeviceType ParseDeviceType(const string &device_str) {
struct mallinfo LogMallinfoChange(struct mallinfo prev) { struct mallinfo LogMallinfoChange(struct mallinfo prev) {
struct mallinfo curr = mallinfo(); struct mallinfo curr = mallinfo();
if (prev.arena != curr.arena) { if (prev.arena != curr.arena) {
std::cout << "Non-mmapped space allocated (bytes): " << curr.arena LOG(INFO) << "Non-mmapped space allocated (bytes): " << curr.arena
<< ", diff: " << ((int64_t)curr.arena - (int64_t)prev.arena) << ", diff: " << ((int64_t)curr.arena - (int64_t)prev.arena);
<< std::endl;
} }
if (prev.ordblks != curr.ordblks) { if (prev.ordblks != curr.ordblks) {
std::cout << "Number of free chunks: " << curr.ordblks LOG(INFO) << "Number of free chunks: " << curr.ordblks
<< ", diff: " << ((int64_t)curr.ordblks - (int64_t)prev.ordblks) << ", diff: " << ((int64_t)curr.ordblks - (int64_t)prev.ordblks);
<< std::endl;
} }
if (prev.smblks != curr.smblks) { if (prev.smblks != curr.smblks) {
std::cout << "Number of free fastbin blocks: " << curr.smblks LOG(INFO) << "Number of free fastbin blocks: " << curr.smblks
<< ", diff: " << ((int64_t)curr.smblks - (int64_t)prev.smblks) << ", diff: " << ((int64_t)curr.smblks - (int64_t)prev.smblks);
<< std::endl;
} }
if (prev.hblks != curr.hblks) { if (prev.hblks != curr.hblks) {
std::cout << "Number of mmapped regions: " << curr.hblks LOG(INFO) << "Number of mmapped regions: " << curr.hblks
<< ", diff: " << ((int64_t)curr.hblks - (int64_t)prev.hblks) << ", diff: " << ((int64_t)curr.hblks - (int64_t)prev.hblks);
<< std::endl;
} }
if (prev.hblkhd != curr.hblkhd) { if (prev.hblkhd != curr.hblkhd) {
std::cout << "Space allocated in mmapped regions (bytes): " << curr.hblkhd LOG(INFO) << "Space allocated in mmapped regions (bytes): " << curr.hblkhd
<< ", diff: " << ((int64_t)curr.hblkhd - (int64_t)prev.hblkhd) << ", diff: " << ((int64_t)curr.hblkhd - (int64_t)prev.hblkhd);
<< std::endl;
} }
if (prev.usmblks != curr.usmblks) { if (prev.usmblks != curr.usmblks) {
std::cout << "Maximum total allocated space (bytes): " << curr.usmblks LOG(INFO) << "Maximum total allocated space (bytes): " << curr.usmblks
<< ", diff: " << ((int64_t)curr.usmblks - (int64_t)prev.usmblks) << ", diff: " << ((int64_t)curr.usmblks - (int64_t)prev.usmblks);
<< std::endl;
} }
if (prev.fsmblks != curr.fsmblks) { if (prev.fsmblks != curr.fsmblks) {
std::cout << "Space in freed fastbin blocks (bytes): " << curr.fsmblks LOG(INFO) << "Space in freed fastbin blocks (bytes): " << curr.fsmblks
<< ", diff: " << ((int64_t)curr.fsmblks - (int64_t)prev.fsmblks) << ", diff: " << ((int64_t)curr.fsmblks - (int64_t)prev.fsmblks);
<< std::endl;
} }
if (prev.uordblks != curr.uordblks) { if (prev.uordblks != curr.uordblks) {
std::cout << "Total allocated space (bytes): " << curr.uordblks LOG(INFO) << "Total allocated space (bytes): " << curr.uordblks
<< ", diff: " << ", diff: "
<< ((int64_t)curr.uordblks - (int64_t)prev.uordblks) << ((int64_t)curr.uordblks - (int64_t)prev.uordblks);
<< std::endl;
} }
if (prev.fordblks != curr.fordblks) { if (prev.fordblks != curr.fordblks) {
std::cout << "Total free space (bytes): " << curr.fordblks << ", diff: " LOG(INFO) << "Total free space (bytes): " << curr.fordblks << ", diff: "
<< ((int64_t)curr.fordblks - (int64_t)prev.fordblks) << ((int64_t)curr.fordblks - (int64_t)prev.fordblks);
<< std::endl;
} }
if (prev.keepcost != curr.keepcost) { if (prev.keepcost != curr.keepcost) {
std::cout << "Top-most, releasable space (bytes): " << curr.keepcost LOG(INFO) << "Top-most, releasable space (bytes): " << curr.keepcost
<< ", diff: " << ", diff: "
<< ((int64_t)curr.keepcost - (int64_t)prev.keepcost) << ((int64_t)curr.keepcost - (int64_t)prev.keepcost);
<< std::endl;
} }
return curr; return curr;
} }
...@@ -138,7 +124,9 @@ DEFINE_string(input_shape, "1,224,224,3", "input shape, separated by comma"); ...@@ -138,7 +124,9 @@ DEFINE_string(input_shape, "1,224,224,3", "input shape, separated by comma");
DEFINE_string(output_shape, "1,224,224,2", "output shape, separated by comma"); DEFINE_string(output_shape, "1,224,224,2", "output shape, separated by comma");
DEFINE_string(input_file, "", "input file name"); DEFINE_string(input_file, "", "input file name");
DEFINE_string(output_file, "", "output file name"); DEFINE_string(output_file, "", "output file name");
DEFINE_string(model_data_file, "", "model data file name, used when EMBED_MODEL_DATA set to 0"); DEFINE_string(model_data_file,
"",
"model data file name, used when EMBED_MODEL_DATA set to 0");
DEFINE_string(device, "OPENCL", "CPU/NEON/OPENCL/HEXAGON"); DEFINE_string(device, "OPENCL", "CPU/NEON/OPENCL/HEXAGON");
DEFINE_int32(round, 1, "round"); DEFINE_int32(round, 1, "round");
DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable"); DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable");
...@@ -147,16 +135,15 @@ int main(int argc, char **argv) { ...@@ -147,16 +135,15 @@ int main(int argc, char **argv) {
gflags::SetUsageMessage("some usage message"); gflags::SetUsageMessage("some usage message");
gflags::ParseCommandLineFlags(&argc, &argv, true); gflags::ParseCommandLineFlags(&argc, &argv, true);
std::cout << "mace version: " << MaceVersion() << std::endl LOG(INFO) << "mace version: " << MaceVersion()
<< "mace git version: " << MaceGitVersion() << std::endl << "mace git version: " << MaceGitVersion()
<< "model checksum: " << mace::MACE_MODEL_TAG::ModelChecksum() << std::endl << "model checksum: " << mace::MACE_MODEL_TAG::ModelChecksum()
<< "input_shape: " << FLAGS_input_shape << std::endl << "input_shape: " << FLAGS_input_shape
<< "output_shape: " << FLAGS_output_shape << std::endl << "output_shape: " << FLAGS_output_shape
<< "input_file: " << FLAGS_input_file << std::endl << "input_file: " << FLAGS_input_file
<< "output_file: " << FLAGS_output_file << std::endl << "output_file: " << FLAGS_output_file
<< "model_data_file: " << FLAGS_model_data_file << std::endl << "model_data_file: " << FLAGS_model_data_file
<< "device: " << FLAGS_device << std::endl << "device: " << FLAGS_device << "round: " << FLAGS_round;
<< "round: " << FLAGS_round << std::endl;
vector<int64_t> input_shape_vec; vector<int64_t> input_shape_vec;
vector<int64_t> output_shape_vec; vector<int64_t> output_shape_vec;
...@@ -165,18 +152,21 @@ int main(int argc, char **argv) { ...@@ -165,18 +152,21 @@ int main(int argc, char **argv) {
// load model // load model
int64_t t0 = NowMicros(); int64_t t0 = NowMicros();
unsigned char *model_data = mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str()); const unsigned char *model_data =
mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str());
NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data); NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data);
int64_t t1 = NowMicros(); int64_t t1 = NowMicros();
std::cout << "CreateNetDef duration: " << t1 - t0 << " us" << std::endl; LOG(INFO) << "CreateNetDef latency: " << t1 - t0 << " us";
int64_t init_micros = t1 - t0; int64_t init_micros = t1 - t0;
DeviceType device_type = ParseDeviceType(FLAGS_device); DeviceType device_type = ParseDeviceType(FLAGS_device);
std::cout << "Device Type" << device_type << std::endl; LOG(INFO) << "Runing with device type: " << device_type;
int64_t input_size = std::accumulate(input_shape_vec.begin(), int64_t input_size =
input_shape_vec.end(), 1, std::multiplies<int64_t>()); std::accumulate(input_shape_vec.begin(), input_shape_vec.end(), 1,
int64_t output_size = std::accumulate(output_shape_vec.begin(), std::multiplies<int64_t>());
output_shape_vec.end(), 1, std::multiplies<int64_t>()); int64_t output_size =
std::accumulate(output_shape_vec.begin(), output_shape_vec.end(), 1,
std::multiplies<int64_t>());
std::unique_ptr<float[]> input_data(new float[input_size]); std::unique_ptr<float[]> input_data(new float[input_size]);
std::unique_ptr<float[]> output_data(new float[output_size]); std::unique_ptr<float[]> output_data(new float[output_size]);
...@@ -187,12 +177,12 @@ int main(int argc, char **argv) { ...@@ -187,12 +177,12 @@ int main(int argc, char **argv) {
input_size * sizeof(float)); input_size * sizeof(float));
in_file.close(); in_file.close();
} else { } else {
std::cout << "Open input file failed" << std::endl; LOG(INFO) << "Open input file failed";
return -1; return -1;
} }
// Init model // Init model
std::cout << "Run init" << std::endl; LOG(INFO) << "Run init";
t0 = NowMicros(); t0 = NowMicros();
mace::MaceEngine engine(&net_def, device_type); mace::MaceEngine engine(&net_def, device_type);
if (device_type == DeviceType::OPENCL || device_type == DeviceType::HEXAGON) { if (device_type == DeviceType::OPENCL || device_type == DeviceType::HEXAGON) {
...@@ -200,39 +190,38 @@ int main(int argc, char **argv) { ...@@ -200,39 +190,38 @@ int main(int argc, char **argv) {
} }
t1 = NowMicros(); t1 = NowMicros();
init_micros += t1 - t0; init_micros += t1 - t0;
std::cout << "Net init duration: " << t1 - t0 << " us" << std::endl; LOG(INFO) << "Net init latency: " << t1 - t0 << " us";
LOG(INFO) << "Total init latency: " << init_micros << " us";
std::cout << "Total init duration: " << init_micros << " us" << std::endl;
std::cout << "Warm up" << std::endl; LOG(INFO) << "Warm up run";
t0 = NowMicros(); t0 = NowMicros();
engine.Run(input_data.get(), input_shape_vec, output_data.get()); engine.Run(input_data.get(), input_shape_vec, output_data.get());
t1 = NowMicros(); t1 = NowMicros();
std::cout << "1st warm up run duration: " << t1 - t0 << " us" << std::endl; LOG(INFO) << "1st warm up run latency: " << t1 - t0 << " us";
if (FLAGS_round > 0) { if (FLAGS_round > 0) {
std::cout << "Run model" << std::endl; LOG(INFO) << "Run model";
t0 = NowMicros(); t0 = NowMicros();
struct mallinfo prev = mallinfo(); struct mallinfo prev = mallinfo();
for (int i = 0; i < FLAGS_round; ++i) { for (int i = 0; i < FLAGS_round; ++i) {
engine.Run(input_data.get(), input_shape_vec, output_data.get()); engine.Run(input_data.get(), input_shape_vec, output_data.get());
if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) { if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) {
std::cout << "=== check malloc info change #" << i << " ===" << std::endl; LOG(INFO) << "=== check malloc info change #" << i << " ===";
prev = LogMallinfoChange(prev); prev = LogMallinfoChange(prev);
} }
} }
t1 = NowMicros(); t1 = NowMicros();
std::cout << "Avg duration: " << (t1 - t0) / FLAGS_round << " us" << std::endl; LOG(INFO) << "Averate latency: " << (t1 - t0) / FLAGS_round << " us";
} }
if (output_data != nullptr) { if (output_data != nullptr) {
ofstream out_file(FLAGS_output_file, ios::binary); ofstream out_file(FLAGS_output_file, ios::binary);
out_file.write((const char *) (output_data.get()), out_file.write((const char *)(output_data.get()),
output_size * sizeof(float)); output_size * sizeof(float));
out_file.flush(); out_file.flush();
out_file.close(); out_file.close();
std::cout << "Write output file done." << std::endl; LOG(INFO) << "Write output file done.";
} else { } else {
std::cout << "output data is null" << std::endl; LOG(INFO) << "Output data is null";
} }
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册