未验证 提交 d3b62910 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #16643 from sfraczek/fix-repeating-passes

Cherry-pick of #16559, #16606 and #16608 
...@@ -141,7 +141,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { ...@@ -141,7 +141,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
void AnalysisConfig::EnableMKLDNN() { void AnalysisConfig::EnableMKLDNN() {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
pass_builder()->EnableMKLDNN();
use_mkldnn_ = true; use_mkldnn_ = true;
#else #else
LOG(ERROR) << "Please compile with MKLDNN first to use MKLDNN"; LOG(ERROR) << "Please compile with MKLDNN first to use MKLDNN";
...@@ -234,16 +233,13 @@ void AnalysisConfig::Update() { ...@@ -234,16 +233,13 @@ void AnalysisConfig::Update() {
} }
if (use_mkldnn_) { if (use_mkldnn_) {
#ifdef PADDLE_WITH_MKLDNN
if (!enable_ir_optim_) { if (!enable_ir_optim_) {
LOG(ERROR) LOG(ERROR)
<< "EnableMKLDNN() only works when IR optimization is enabled."; << "EnableMKLDNN() only works when IR optimization is enabled.";
} else {
pass_builder()->EnableMKLDNN();
} }
#ifdef PADDLE_WITH_MKLDNN
pass_builder()->EnableMKLDNN();
use_mkldnn_ = true;
#else
LOG(ERROR) << "Please compile with MKLDNN first to use MKLDNN";
use_mkldnn_ = false;
#endif #endif
} }
...@@ -255,9 +251,6 @@ void AnalysisConfig::Update() { ...@@ -255,9 +251,6 @@ void AnalysisConfig::Update() {
} }
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
pass_builder()->EnableMkldnnQuantizer(); pass_builder()->EnableMkldnnQuantizer();
#else
LOG(ERROR) << "Please compile with MKLDNN first to use MkldnnQuantizer";
use_mkldnn_quantizer_ = false;
#endif #endif
} }
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/port.h"
#include "paddle/fluid/string/printf.h" #include "paddle/fluid/string/printf.h"
...@@ -266,17 +267,17 @@ static std::string DescribeZeroCopyTensor(const ZeroCopyTensor &tensor) { ...@@ -266,17 +267,17 @@ static std::string DescribeZeroCopyTensor(const ZeroCopyTensor &tensor) {
} }
static void PrintTime(int batch_size, int repeat, int num_threads, int tid, static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
double latency, int epoch = 1) { double batch_latency, int epoch = 1) {
LOG(INFO) << "====== batch_size: " << batch_size << ", repeat: " << repeat PADDLE_ENFORCE(batch_size > 0, "Non-positive batch size.");
<< ", threads: " << num_threads << ", thread id: " << tid double sample_latency = batch_latency / batch_size;
<< ", latency: " << latency << "ms, fps: " << 1 / (latency / 1000.f) LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid
<< " ======"; << " ======";
if (epoch > 1) { LOG(INFO) << "====== batch_size: " << batch_size << ", iterations: " << epoch
int samples = batch_size * epoch; << ", repetitions: " << repeat << " ======";
LOG(INFO) << "====== sample number: " << samples LOG(INFO) << "====== batch latency: " << batch_latency
<< ", average latency of each sample: " << latency / samples << "ms, number of samples: " << batch_size * epoch
<< "ms ======"; << ", sample latency: " << sample_latency
} << "ms, fps: " << 1000.f / sample_latency << " ======";
} }
static bool IsFileExists(const std::string &path) { static bool IsFileExists(const std::string &path) {
......
...@@ -64,10 +64,12 @@ void PaddlePassBuilder::DeletePass(size_t idx) { ...@@ -64,10 +64,12 @@ void PaddlePassBuilder::DeletePass(size_t idx) {
passes_.erase(std::begin(passes_) + idx); passes_.erase(std::begin(passes_) + idx);
} }
void GpuPassStrategy::EnableMKLDNN() { void PaddlePassBuilder::AppendAnalysisPass(const std::string &pass) {
LOG(ERROR) << "GPU not support MKLDNN yet"; analysis_passes_.push_back(pass);
} }
void PaddlePassBuilder::ClearPasses() { passes_.clear(); }
// The following passes works for Anakin sub-graph engine. // The following passes works for Anakin sub-graph engine.
const std::vector<std::string> kAnakinSubgraphPasses({ const std::vector<std::string> kAnakinSubgraphPasses({
"infer_clean_graph_pass", // "infer_clean_graph_pass", //
...@@ -102,12 +104,12 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { ...@@ -102,12 +104,12 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
use_gpu_ = true; use_gpu_ = true;
} }
void GpuPassStrategy::EnableMkldnnQuantizer() { void GpuPassStrategy::EnableMKLDNN() {
LOG(ERROR) << "GPU not support MKL-DNN quantization"; LOG(ERROR) << "GPU not support MKLDNN yet";
} }
void PaddlePassBuilder::AppendAnalysisPass(const std::string &pass) { void GpuPassStrategy::EnableMkldnnQuantizer() {
analysis_passes_.push_back(pass); LOG(ERROR) << "GPU not support MKL-DNN quantization";
} }
CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
...@@ -135,5 +137,39 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { ...@@ -135,5 +137,39 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
}); });
use_gpu_ = false; use_gpu_ = false;
} }
void PaddlePassBuilder::ClearPasses() { passes_.clear(); }
void CpuPassStrategy::EnableMKLDNN() {
// TODO(Superjomn) Consider the way to mix CPU with GPU.
#ifdef PADDLE_WITH_MKLDNN
if (!use_mkldnn_) {
passes_.insert(passes_.begin(), "mkldnn_placement_pass");
for (auto &pass : std::vector<std::string>(
{"depthwise_conv_mkldnn_pass", //
"conv_bn_fuse_pass", // Execute BN passes again to
"conv_eltwiseadd_bn_fuse_pass", // preserve correct pass order
"conv_bias_mkldnn_fuse_pass", //
"conv3d_bias_mkldnn_fuse_pass", //
"conv_elementwise_add_mkldnn_fuse_pass",
"conv_relu_mkldnn_fuse_pass"})) {
passes_.push_back(pass);
}
}
use_mkldnn_ = true;
#else
use_mkldnn_ = false;
#endif
}
void CpuPassStrategy::EnableMkldnnQuantizer() {
#ifdef PADDLE_WITH_MKLDNN
if (!use_mkldnn_quantizer_) {
passes_.push_back("cpu_quantize_placement_pass");
}
use_mkldnn_quantizer_ = true;
#else
use_mkldnn_quantizer_ = false;
#endif
}
} // namespace paddle } // namespace paddle
...@@ -109,43 +109,16 @@ class CpuPassStrategy : public PassStrategy { ...@@ -109,43 +109,16 @@ class CpuPassStrategy : public PassStrategy {
CpuPassStrategy(); CpuPassStrategy();
explicit CpuPassStrategy(const CpuPassStrategy &other) explicit CpuPassStrategy(const CpuPassStrategy &other)
: PassStrategy(other.AllPasses()) {} : PassStrategy(other.AllPasses()) {
use_gpu_ = other.use_gpu_;
use_mkldnn_ = other.use_mkldnn_;
use_mkldnn_quantizer_ = other.use_mkldnn_quantizer_;
}
virtual ~CpuPassStrategy() = default; virtual ~CpuPassStrategy() = default;
void EnableMKLDNN() override { void EnableMKLDNN() override;
// TODO(Superjomn) Consider the way to mix CPU with GPU. void EnableMkldnnQuantizer() override;
#ifdef PADDLE_WITH_MKLDNN
if (!use_mkldnn_) {
passes_.insert(passes_.begin(), "mkldnn_placement_pass");
for (auto &pass : std::vector<std::string>(
{"depthwise_conv_mkldnn_pass", //
"conv_bn_fuse_pass", // Execute BN passes again to
"conv_eltwiseadd_bn_fuse_pass", // preserve correct pass order
"conv_bias_mkldnn_fuse_pass", //
"conv3d_bias_mkldnn_fuse_pass", //
"conv_relu_mkldnn_fuse_pass", //
"conv_elementwise_add_mkldnn_fuse_pass"})) {
passes_.push_back(pass);
}
}
use_mkldnn_ = true;
#else
use_mkldnn_ = false;
#endif
}
void EnableMkldnnQuantizer() override {
#ifdef PADDLE_WITH_MKLDNN
if (!use_mkldnn_quantizer_) {
passes_.push_back("cpu_quantize_placement_pass");
}
use_mkldnn_quantizer_ = true;
#else
use_mkldnn_quantizer_ = false;
#endif
}
protected: protected:
bool use_mkldnn_quantizer_{false}; bool use_mkldnn_quantizer_{false};
......
...@@ -26,7 +26,11 @@ endfunction() ...@@ -26,7 +26,11 @@ endfunction()
function(inference_analysis_api_int8_test target model_dir data_dir filename) function(inference_analysis_api_int8_test target model_dir data_dir filename)
inference_analysis_test(${target} SRCS ${filename} inference_analysis_test(${target} SRCS ${filename}
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} benchmark EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} benchmark
ARGS --infer_model=${model_dir}/model --infer_data=${data_dir}/data.bin --batch_size=100) ARGS --infer_model=${model_dir}/model
--infer_data=${data_dir}/data.bin
--warmup_batch_size=100
--batch_size=50
--iterations=2)
endfunction() endfunction()
function(inference_analysis_api_test_with_fake_data target install_dir filename model_name) function(inference_analysis_api_test_with_fake_data target install_dir filename model_name)
...@@ -146,22 +150,22 @@ inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet_depthwise_con ...@@ -146,22 +150,22 @@ inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet_depthwise_con
# int8 image classification tests # int8 image classification tests
if(WITH_MKLDNN) if(WITH_MKLDNN)
set(INT8_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/int8") set(INT8_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/int8v2")
if (NOT EXISTS ${INT8_DATA_DIR}) if (NOT EXISTS ${INT8_DATA_DIR})
inference_download_and_uncompress(${INT8_DATA_DIR} "https://paddle-inference-dist.bj.bcebos.com/int8" "imagenet_val_100.tar.gz") inference_download_and_uncompress(${INT8_DATA_DIR} "${INFERENCE_URL}/int8" "imagenet_val_100_tail.tar.gz")
endif() endif()
#resnet50 int8 #resnet50 int8
set(INT8_RESNET50_MODEL_DIR "${INT8_DATA_DIR}/resnet50") set(INT8_RESNET50_MODEL_DIR "${INT8_DATA_DIR}/resnet50")
if (NOT EXISTS ${INT8_RESNET50_MODEL_DIR}) if (NOT EXISTS ${INT8_RESNET50_MODEL_DIR})
inference_download_and_uncompress(${INT8_RESNET50_MODEL_DIR} "https://paddle-inference-dist.bj.bcebos.com/int8" "resnet50_int8_model.tar.gz" ) inference_download_and_uncompress(${INT8_RESNET50_MODEL_DIR} "${INFERENCE_URL}/int8" "resnet50_int8_model.tar.gz" )
endif() endif()
inference_analysis_api_int8_test(test_analyzer_int8_resnet50 ${INT8_RESNET50_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc SERIAL) inference_analysis_api_int8_test(test_analyzer_int8_resnet50 ${INT8_RESNET50_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc SERIAL)
#mobilenet int8 #mobilenet int8
set(INT8_MOBILENET_MODEL_DIR "${INT8_DATA_DIR}/mobilenet") set(INT8_MOBILENET_MODEL_DIR "${INT8_DATA_DIR}/mobilenet")
if (NOT EXISTS ${INT8_MOBILENET_MODEL_DIR}) if (NOT EXISTS ${INT8_MOBILENET_MODEL_DIR})
inference_download_and_uncompress(${INT8_MOBILENET_MODEL_DIR} "https://paddle-inference-dist.bj.bcebos.com/int8" "mobilenetv1_int8_model.tar.gz" ) inference_download_and_uncompress(${INT8_MOBILENET_MODEL_DIR} "${INFERENCE_URL}/int8" "mobilenetv1_int8_model.tar.gz" )
endif() endif()
inference_analysis_api_int8_test(test_analyzer_int8_mobilenet ${INT8_MOBILENET_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc SERIAL) inference_analysis_api_int8_test(test_analyzer_int8_mobilenet ${INT8_MOBILENET_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc SERIAL)
endif() endif()
......
...@@ -154,7 +154,7 @@ void profile(bool use_mkldnn = false) { ...@@ -154,7 +154,7 @@ void profile(bool use_mkldnn = false) {
config.EnableMKLDNN(); config.EnableMKLDNN();
} }
std::vector<PaddleTensor> outputs; std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> inputs; std::vector<std::vector<PaddleTensor>> inputs;
LoadInputData(&inputs); LoadInputData(&inputs);
TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&config), TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&config),
......
...@@ -197,7 +197,7 @@ void profile(bool use_mkldnn = false) { ...@@ -197,7 +197,7 @@ void profile(bool use_mkldnn = false) {
cfg.SetMKLDNNOp(op_list); cfg.SetMKLDNNOp(op_list);
} }
std::vector<PaddleTensor> outputs; std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
...@@ -206,9 +206,11 @@ void profile(bool use_mkldnn = false) { ...@@ -206,9 +206,11 @@ void profile(bool use_mkldnn = false) {
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
PADDLE_ENFORCE_GT(outputs.size(), 0); PADDLE_ENFORCE_GT(outputs.size(), 0);
size_t size = GetSize(outputs[0]); auto output = outputs.back();
PADDLE_ENFORCE_GT(output.size(), 0);
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0); PADDLE_ENFORCE_GT(size, 0);
float *result = static_cast<float *>(outputs[0].data.data()); float *result = static_cast<float *>(output[0].data.data());
for (size_t i = 0; i < size; i++) { for (size_t i = 0; i < size; i++) {
EXPECT_NEAR(result[i], result_data[i], 1e-3); EXPECT_NEAR(result[i], result_data[i], 1e-3);
} }
......
...@@ -17,8 +17,6 @@ limitations under the License. */ ...@@ -17,8 +17,6 @@ limitations under the License. */
#include "paddle/fluid/inference/api/paddle_analysis_config.h" #include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h" #include "paddle/fluid/inference/tests/api/tester_helper.h"
DEFINE_int32(iterations, 0, "Number of iterations");
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace analysis { namespace analysis {
...@@ -30,8 +28,13 @@ void SetConfig(AnalysisConfig *cfg) { ...@@ -30,8 +28,13 @@ void SetConfig(AnalysisConfig *cfg) {
cfg->SwitchIrOptim(); cfg->SwitchIrOptim();
cfg->SwitchSpecifyInputNames(false); cfg->SwitchSpecifyInputNames(false);
cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads); cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads);
cfg->EnableMKLDNN(); cfg->EnableMKLDNN();
cfg->pass_builder()->SetPasses(
{"infer_clean_graph_pass", "mkldnn_placement_pass",
"depthwise_conv_mkldnn_pass", "conv_bn_fuse_pass",
"conv_eltwiseadd_bn_fuse_pass", "conv_bias_mkldnn_fuse_pass",
"conv_elementwise_add_mkldnn_fuse_pass", "conv_relu_mkldnn_fuse_pass",
"fc_fuse_pass", "is_test_pass"});
} }
template <typename T> template <typename T>
...@@ -40,8 +43,8 @@ class TensorReader { ...@@ -40,8 +43,8 @@ class TensorReader {
TensorReader(std::ifstream &file, size_t beginning_offset, TensorReader(std::ifstream &file, size_t beginning_offset,
std::vector<int> shape, std::string name) std::vector<int> shape, std::string name)
: file_(file), position(beginning_offset), shape_(shape), name_(name) { : file_(file), position(beginning_offset), shape_(shape), name_(name) {
numel = numel = std::accumulate(shape_.begin(), shape_.end(), size_t{1},
std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<T>()); std::multiplies<size_t>());
} }
PaddleTensor NextBatch() { PaddleTensor NextBatch() {
...@@ -71,10 +74,14 @@ class TensorReader { ...@@ -71,10 +74,14 @@ class TensorReader {
}; };
std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData( std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
const std::vector<std::vector<PaddleTensor>> &test_data, int num_images) { const std::vector<std::vector<PaddleTensor>> &test_data,
int num_images = FLAGS_warmup_batch_size) {
int test_data_batch_size = test_data[0][0].shape[0]; int test_data_batch_size = test_data[0][0].shape[0];
CHECK_LE(static_cast<size_t>(num_images), auto iterations_max = test_data.size();
test_data.size() * test_data_batch_size); PADDLE_ENFORCE(
static_cast<size_t>(num_images) <= iterations_max * test_data_batch_size,
"The requested quantization warmup data size " +
std::to_string(num_images) + " is bigger than all test data size.");
PaddleTensor images; PaddleTensor images;
images.name = "input"; images.name = "input";
...@@ -120,20 +127,17 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs, ...@@ -120,20 +127,17 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs,
std::vector<int> image_batch_shape{batch_size, 3, 224, 224}; std::vector<int> image_batch_shape{batch_size, 3, 224, 224};
std::vector<int> label_batch_shape{batch_size, 1}; std::vector<int> label_batch_shape{batch_size, 1};
auto images_offset_in_file = static_cast<size_t>(file.tellg());
auto labels_offset_in_file = auto labels_offset_in_file =
static_cast<size_t>(file.tellg()) + images_offset_in_file + sizeof(float) * total_images * 3 * 224 * 224;
sizeof(float) * total_images *
std::accumulate(image_batch_shape.begin() + 1,
image_batch_shape.end(), 1, std::multiplies<int>());
TensorReader<float> image_reader(file, 0, image_batch_shape, "input"); TensorReader<float> image_reader(file, images_offset_in_file,
image_batch_shape, "input");
TensorReader<int64_t> label_reader(file, labels_offset_in_file, TensorReader<int64_t> label_reader(file, labels_offset_in_file,
label_batch_shape, "label"); label_batch_shape, "label");
auto iterations = total_images / batch_size; auto iterations_max = total_images / batch_size;
if (FLAGS_iterations > 0 && FLAGS_iterations < iterations) for (auto i = 0; i < iterations_max; i++) {
iterations = FLAGS_iterations;
for (auto i = 0; i < iterations; i++) {
auto images = image_reader.NextBatch(); auto images = image_reader.NextBatch();
auto labels = label_reader.NextBatch(); auto labels = label_reader.NextBatch();
inputs->emplace_back( inputs->emplace_back(
...@@ -148,20 +152,21 @@ TEST(Analyzer_int8_resnet50, quantization) { ...@@ -148,20 +152,21 @@ TEST(Analyzer_int8_resnet50, quantization) {
AnalysisConfig q_cfg; AnalysisConfig q_cfg;
SetConfig(&q_cfg); SetConfig(&q_cfg);
// read data from file and prepare batches with test data
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all, 100); SetInput(&input_slots_all);
// prepare warmup batch from input data read earlier
// warmup batch size can be different than batch size
std::shared_ptr<std::vector<PaddleTensor>> warmup_data = std::shared_ptr<std::vector<PaddleTensor>> warmup_data =
GetWarmupData(input_slots_all, 100); GetWarmupData(input_slots_all);
// configure quantizer
q_cfg.EnableMkldnnQuantizer(); q_cfg.EnableMkldnnQuantizer();
q_cfg.mkldnn_quantizer_config()->SetWarmupData(warmup_data); q_cfg.mkldnn_quantizer_config()->SetWarmupData(warmup_data);
q_cfg.mkldnn_quantizer_config()->SetWarmupBatchSize(100); q_cfg.mkldnn_quantizer_config()->SetWarmupBatchSize(FLAGS_warmup_batch_size);
CompareQuantizedAndAnalysis( CompareQuantizedAndAnalysis(&cfg, &q_cfg, input_slots_all);
reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
reinterpret_cast<const PaddlePredictor::Config *>(&q_cfg),
input_slots_all);
} }
} // namespace analysis } // namespace analysis
......
...@@ -124,7 +124,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) { ...@@ -124,7 +124,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
TEST(Analyzer_LAC, profile) { TEST(Analyzer_LAC, profile) {
AnalysisConfig cfg; AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
std::vector<PaddleTensor> outputs; std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
...@@ -137,11 +137,13 @@ TEST(Analyzer_LAC, profile) { ...@@ -137,11 +137,13 @@ TEST(Analyzer_LAC, profile) {
24, 25, 25, 25, 38, 30, 31, 14, 15, 44, 24, 25, 25, 25, 25, 25, 24, 25, 25, 25, 38, 30, 31, 14, 15, 44, 24, 25, 25, 25, 25, 25,
44, 24, 25, 25, 25, 36, 42, 43, 44, 14, 15, 44, 14, 15, 44, 14, 44, 24, 25, 25, 25, 36, 42, 43, 44, 14, 15, 44, 14, 15, 44, 14,
15, 44, 38, 39, 14, 15, 44, 22, 23, 23, 23, 23, 23, 23, 23}; 15, 44, 38, 39, 14, 15, 44, 22, 23, 23, 23, 23, 23, 23, 23};
PADDLE_ENFORCE_EQ(outputs.size(), 1UL); PADDLE_ENFORCE_GT(outputs.size(), 0);
size_t size = GetSize(outputs[0]); auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL);
size_t size = GetSize(output[0]);
size_t batch1_size = sizeof(lac_ref_data) / sizeof(int64_t); size_t batch1_size = sizeof(lac_ref_data) / sizeof(int64_t);
PADDLE_ENFORCE_GE(size, batch1_size); PADDLE_ENFORCE_GE(size, batch1_size);
int64_t *pdata = static_cast<int64_t *>(outputs[0].data.data()); int64_t *pdata = static_cast<int64_t *>(output[0].data.data());
for (size_t i = 0; i < batch1_size; ++i) { for (size_t i = 0; i < batch1_size; ++i) {
EXPECT_EQ(pdata[i], lac_ref_data[i]); EXPECT_EQ(pdata[i], lac_ref_data[i]);
} }
......
...@@ -96,7 +96,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) { ...@@ -96,7 +96,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
void profile(bool use_mkldnn = false) { void profile(bool use_mkldnn = false) {
AnalysisConfig cfg; AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
std::vector<PaddleTensor> outputs; std::vector<std::vector<PaddleTensor>> outputs;
if (use_mkldnn) { if (use_mkldnn) {
cfg.EnableMKLDNN(); cfg.EnableMKLDNN();
...@@ -108,8 +108,9 @@ void profile(bool use_mkldnn = false) { ...@@ -108,8 +108,9 @@ void profile(bool use_mkldnn = false) {
input_slots_all, &outputs, FLAGS_num_threads); input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
PADDLE_ENFORCE_EQ(outputs.size(), 2UL); PADDLE_ENFORCE_GT(outputs.size(), 0);
for (auto &output : outputs) { PADDLE_ENFORCE_EQ(outputs.back().size(), 2UL);
for (auto &output : outputs.back()) {
size_t size = GetSize(output); size_t size = GetSize(output);
PADDLE_ENFORCE_GT(size, 0); PADDLE_ENFORCE_GT(size, 0);
float *result = static_cast<float *>(output.data.data()); float *result = static_cast<float *>(output.data.data());
......
...@@ -106,7 +106,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) { ...@@ -106,7 +106,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
void profile(bool memory_load = false) { void profile(bool memory_load = false) {
AnalysisConfig cfg; AnalysisConfig cfg;
SetConfig(&cfg, memory_load); SetConfig(&cfg, memory_load);
std::vector<PaddleTensor> outputs; std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
...@@ -117,10 +117,12 @@ void profile(bool memory_load = false) { ...@@ -117,10 +117,12 @@ void profile(bool memory_load = false) {
// the first inference result // the first inference result
const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26, const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26,
48, 39, 38, 16, 25}; 48, 39, 38, 16, 25};
PADDLE_ENFORCE_EQ(outputs.size(), 1UL); PADDLE_ENFORCE_GT(outputs.size(), 0);
size_t size = GetSize(outputs[0]); auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL);
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0); PADDLE_ENFORCE_GT(size, 0);
int64_t *result = static_cast<int64_t *>(outputs[0].data.data()); int64_t *result = static_cast<int64_t *>(output[0].data.data());
for (size_t i = 0; i < std::min(11UL, size); i++) { for (size_t i = 0; i < std::min(11UL, size); i++) {
EXPECT_EQ(result[i], chinese_ner_result_data[i]); EXPECT_EQ(result[i], chinese_ner_result_data[i]);
} }
......
...@@ -127,7 +127,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) { ...@@ -127,7 +127,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
TEST(Analyzer_Pyramid_DNN, profile) { TEST(Analyzer_Pyramid_DNN, profile) {
AnalysisConfig cfg; AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
std::vector<PaddleTensor> outputs; std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
...@@ -135,10 +135,12 @@ TEST(Analyzer_Pyramid_DNN, profile) { ...@@ -135,10 +135,12 @@ TEST(Analyzer_Pyramid_DNN, profile) {
input_slots_all, &outputs, FLAGS_num_threads); input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data && !FLAGS_zero_copy) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data && !FLAGS_zero_copy) {
PADDLE_ENFORCE_EQ(outputs.size(), 1UL); PADDLE_ENFORCE_GT(outputs.size(), 0);
size_t size = GetSize(outputs[0]); auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL);
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0); PADDLE_ENFORCE_GT(size, 0);
float *result = static_cast<float *>(outputs[0].data.data()); float *result = static_cast<float *>(output[0].data.data());
// output is probability, which is in (0, 1). // output is probability, which is in (0, 1).
for (size_t i = 0; i < size; i++) { for (size_t i = 0; i < size; i++) {
EXPECT_GT(result[i], 0); EXPECT_GT(result[i], 0);
......
...@@ -40,7 +40,7 @@ void profile(bool use_mkldnn = false) { ...@@ -40,7 +40,7 @@ void profile(bool use_mkldnn = false) {
if (use_mkldnn) { if (use_mkldnn) {
cfg.EnableMKLDNN(); cfg.EnableMKLDNN();
} }
std::vector<PaddleTensor> outputs; std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
......
...@@ -229,7 +229,7 @@ TEST(Analyzer_rnn1, profile) { ...@@ -229,7 +229,7 @@ TEST(Analyzer_rnn1, profile) {
SetConfig(&cfg); SetConfig(&cfg);
cfg.DisableGpu(); cfg.DisableGpu();
cfg.SwitchIrDebug(); cfg.SwitchIrDebug();
std::vector<PaddleTensor> outputs; std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
...@@ -280,7 +280,7 @@ TEST(Analyzer_rnn1, compare_determine) { ...@@ -280,7 +280,7 @@ TEST(Analyzer_rnn1, compare_determine) {
TEST(Analyzer_rnn1, multi_thread) { TEST(Analyzer_rnn1, multi_thread) {
AnalysisConfig cfg; AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
std::vector<PaddleTensor> outputs; std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
......
...@@ -126,7 +126,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) { ...@@ -126,7 +126,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
TEST(Analyzer_rnn2, profile) { TEST(Analyzer_rnn2, profile) {
AnalysisConfig cfg; AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
std::vector<PaddleTensor> outputs; std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
...@@ -136,9 +136,11 @@ TEST(Analyzer_rnn2, profile) { ...@@ -136,9 +136,11 @@ TEST(Analyzer_rnn2, profile) {
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
// the first inference result // the first inference result
PADDLE_ENFORCE_GT(outputs.size(), 0); PADDLE_ENFORCE_GT(outputs.size(), 0);
size_t size = GetSize(outputs[0]); auto output = outputs.back();
PADDLE_ENFORCE_GT(output.size(), 0);
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0); PADDLE_ENFORCE_GT(size, 0);
float *result = static_cast<float *>(outputs[0].data.data()); float *result = static_cast<float *>(output[0].data.data());
for (size_t i = 0; i < size; i++) { for (size_t i = 0; i < size; i++) {
EXPECT_NEAR(result[i], result_data[i], 1e-3); EXPECT_NEAR(result[i], result_data[i], 1e-3);
} }
......
...@@ -110,7 +110,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) { ...@@ -110,7 +110,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
TEST(Analyzer_seq_conv1, profile) { TEST(Analyzer_seq_conv1, profile) {
AnalysisConfig cfg; AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
std::vector<PaddleTensor> outputs; std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
...@@ -119,10 +119,12 @@ TEST(Analyzer_seq_conv1, profile) { ...@@ -119,10 +119,12 @@ TEST(Analyzer_seq_conv1, profile) {
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
// the first inference result // the first inference result
PADDLE_ENFORCE_EQ(outputs.size(), 1UL); PADDLE_ENFORCE_GT(outputs.size(), 0);
size_t size = GetSize(outputs[0]); auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL);
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0); PADDLE_ENFORCE_GT(size, 0);
float *result = static_cast<float *>(outputs[0].data.data()); float *result = static_cast<float *>(output[0].data.data());
// output is probability, which is in (0, 1). // output is probability, which is in (0, 1).
for (size_t i = 0; i < size; i++) { for (size_t i = 0; i < size; i++) {
EXPECT_GT(result[i], 0); EXPECT_GT(result[i], 0);
......
...@@ -156,7 +156,7 @@ void profile(bool use_mkldnn = false) { ...@@ -156,7 +156,7 @@ void profile(bool use_mkldnn = false) {
AnalysisConfig cfg; AnalysisConfig cfg;
SetConfig(&cfg, use_mkldnn); SetConfig(&cfg, use_mkldnn);
std::vector<PaddleTensor> outputs; std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg), TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
......
...@@ -70,7 +70,7 @@ TEST(Analyzer_Text_Classification, profile) { ...@@ -70,7 +70,7 @@ TEST(Analyzer_Text_Classification, profile) {
AnalysisConfig cfg; AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
cfg.SwitchIrDebug(); cfg.SwitchIrDebug();
std::vector<PaddleTensor> outputs; std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
...@@ -79,8 +79,9 @@ TEST(Analyzer_Text_Classification, profile) { ...@@ -79,8 +79,9 @@ TEST(Analyzer_Text_Classification, profile) {
if (FLAGS_num_threads == 1) { if (FLAGS_num_threads == 1) {
// Get output // Get output
LOG(INFO) << "get outputs " << outputs.size(); PADDLE_ENFORCE_GT(outputs.size(), 0);
for (auto &output : outputs) { LOG(INFO) << "get outputs " << outputs.back().size();
for (auto &output : outputs.back()) {
LOG(INFO) << "output.shape: " << to_string(output.shape); LOG(INFO) << "output.shape: " << to_string(output.shape);
// no lod ? // no lod ?
CHECK_EQ(output.lod.size(), 0UL); CHECK_EQ(output.lod.size(), 0UL);
......
...@@ -186,7 +186,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) { ...@@ -186,7 +186,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
void profile(bool use_mkldnn = false) { void profile(bool use_mkldnn = false) {
AnalysisConfig cfg; AnalysisConfig cfg;
SetConfig(&cfg); SetConfig(&cfg);
std::vector<PaddleTensor> outputs; std::vector<std::vector<PaddleTensor>> outputs;
if (use_mkldnn) { if (use_mkldnn) {
cfg.EnableMKLDNN(); cfg.EnableMKLDNN();
} }
......
...@@ -87,7 +87,7 @@ void profile(bool use_mkldnn = false) { ...@@ -87,7 +87,7 @@ void profile(bool use_mkldnn = false) {
cfg.EnableMKLDNN(); cfg.EnableMKLDNN();
} }
// cfg.pass_builder()->TurnOnDebug(); // cfg.pass_builder()->TurnOnDebug();
std::vector<PaddleTensor> outputs; std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
...@@ -100,7 +100,8 @@ void profile(bool use_mkldnn = false) { ...@@ -100,7 +100,8 @@ void profile(bool use_mkldnn = false) {
auto refer = ProcessALine(line); auto refer = ProcessALine(line);
file.close(); file.close();
auto &output = outputs.front(); PADDLE_ENFORCE_GT(outputs.size(), 0);
auto &output = outputs.back().front();
size_t numel = output.data.length() / PaddleDtypeSize(output.dtype); size_t numel = output.data.length() / PaddleDtypeSize(output.dtype);
CHECK_EQ(numel, refer.data.size()); CHECK_EQ(numel, refer.data.size());
for (size_t i = 0; i < numel; ++i) { for (size_t i = 0; i < numel; ++i) {
......
# copyright (c) 2019 paddlepaddle authors. all rights reserved. # copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license"); # licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license. # you may not use this file except in compliance with the license.
# you may obtain a copy of the license at # you may obtain a copy of the license at
...@@ -11,6 +10,7 @@ ...@@ -11,6 +10,7 @@
# without warranties or conditions of any kind, either express or implied. # without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and # see the license for the specific language governing permissions and
# limitations under the license. # limitations under the license.
import hashlib
import unittest import unittest
import os import os
import numpy as np import numpy as np
...@@ -21,16 +21,20 @@ import functools ...@@ -21,16 +21,20 @@ import functools
import contextlib import contextlib
from PIL import Image, ImageEnhance from PIL import Image, ImageEnhance
import math import math
from paddle.dataset.common import download from paddle.dataset.common import download, md5file
import tarfile
random.seed(0) random.seed(0)
np.random.seed(0) np.random.seed(0)
DATA_DIM = 224 DATA_DIM = 224
SIZE_FLOAT32 = 4 SIZE_FLOAT32 = 4
SIZE_INT64 = 8 SIZE_INT64 = 8
FULL_SIZE_BYTES = 30106000008
FULL_IMAGES = 50000
DATA_DIR_NAME = 'ILSVRC2012'
IMG_DIR_NAME = 'var'
TARGET_HASH = '8dc592db6dcc8d521e4d5ba9da5ca7d2'
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
...@@ -70,19 +74,9 @@ def process_image(img_path, mode, color_jitter, rotate): ...@@ -70,19 +74,9 @@ def process_image(img_path, mode, color_jitter, rotate):
return img return img
def download_unzip(): def download_concat(cache_folder, zip_path):
int8_download = 'int8/download'
target_name = 'data'
cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' +
int8_download)
target_folder = os.path.join(cache_folder, target_name)
data_urls = [] data_urls = []
data_md5s = [] data_md5s = []
data_urls.append( data_urls.append(
'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partaa' 'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partaa'
) )
...@@ -91,72 +85,138 @@ def download_unzip(): ...@@ -91,72 +85,138 @@ def download_unzip():
'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partab' 'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partab'
) )
data_md5s.append('1e9f15f64e015e58d6f9ec3210ed18b5') data_md5s.append('1e9f15f64e015e58d6f9ec3210ed18b5')
file_names = [] file_names = []
print("Downloading full ImageNet Validation dataset ...")
for i in range(0, len(data_urls)): for i in range(0, len(data_urls)):
download(data_urls[i], cache_folder, data_md5s[i]) download(data_urls[i], cache_folder, data_md5s[i])
file_names.append(data_urls[i].split('/')[-1]) file_name = os.path.join(cache_folder, data_urls[i].split('/')[-1])
file_names.append(file_name)
zip_path = os.path.join(cache_folder, 'full_imagenet_val.tar.gz') print("Downloaded part {0}\n".format(file_name))
if not os.path.exists(zip_path): if not os.path.exists(zip_path):
cat_command = 'cat' with open(zip_path, "w+") as outfile:
for file_name in file_names: for fname in file_names:
cat_command += ' ' + os.path.join(cache_folder, file_name) with open(fname) as infile:
cat_command += ' > ' + zip_path outfile.write(infile.read())
os.system(cat_command)
print('Data is downloaded at {0}\n').format(zip_path)
def extract(zip_path, extract_folder):
if not os.path.exists(target_folder): data_dir = os.path.join(extract_folder, DATA_DIR_NAME)
cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(target_folder, zip_path) img_dir = os.path.join(data_dir, IMG_DIR_NAME)
os.system(cmd) print("Extracting...\n")
print('Data is unzipped at {0}\n'.format(target_folder))
if not (os.path.exists(img_dir) and
data_dir = os.path.join(target_folder, 'ILSVRC2012') len(os.listdir(img_dir)) == FULL_IMAGES):
print('ILSVRC2012 full val set at {0}\n'.format(data_dir)) tar = tarfile.open(zip_path)
return data_dir tar.extractall(path=extract_folder)
tar.close()
print('Extracted. Full Imagenet Validation dataset is located at {0}\n'.
format(data_dir))
def print_processbar(done, total):
done_filled = done * '='
empty_filled = (total - done) * ' '
percentage_done = done * 100 / total
sys.stdout.write("\r[%s%s]%d%%" %
(done_filled, empty_filled, percentage_done))
sys.stdout.flush()
def check_integrity(filename, target_hash):
print('\nThe binary file exists. Checking file integrity...\n')
md = hashlib.md5()
count = 0
total_parts = 50
chunk_size = 8192
onepart = FULL_SIZE_BYTES / chunk_size / total_parts
with open(filename) as ifs:
while True:
buf = ifs.read(8192)
if count % onepart == 0:
done = count / onepart
print_processbar(done, total_parts)
count = count + 1
if not buf:
break
md.update(buf)
hash1 = md.hexdigest()
if hash1 == target_hash:
return True
else:
return False
def reader(): def convert(file_list, data_dir, output_file):
data_dir = download_unzip() print('Converting 50000 images to binary file ...\n')
file_list = os.path.join(data_dir, 'val_list.txt')
output_file = os.path.join(data_dir, 'int8_full_val.bin')
with open(file_list) as flist: with open(file_list) as flist:
lines = [line.strip() for line in flist] lines = [line.strip() for line in flist]
num_images = len(lines) num_images = len(lines)
if not os.path.exists(output_file): with open(output_file, "w+b") as ofs:
print( #save num_images(int64_t) to file
'Preprocessing to binary file...<num_images><all images><all labels>...\n' ofs.seek(0)
) num = np.array(int(num_images)).astype('int64')
with open(output_file, "w+b") as of: ofs.write(num.tobytes())
#save num_images(int64_t) to file per_parts = 1000
of.seek(0) full_parts = FULL_IMAGES / per_parts
num = np.array(int(num_images)).astype('int64') print_processbar(0, full_parts)
of.write(num.tobytes()) for idx, line in enumerate(lines):
for idx, line in enumerate(lines): img_path, label = line.split()
img_path, label = line.split() img_path = os.path.join(data_dir, img_path)
img_path = os.path.join(data_dir, img_path) if not os.path.exists(img_path):
if not os.path.exists(img_path): continue
continue
#save image(float32) to file
#save image(float32) to file img = process_image(
img = process_image( img_path, 'val', color_jitter=False, rotate=False)
img_path, 'val', color_jitter=False, rotate=False) np_img = np.array(img)
np_img = np.array(img) ofs.seek(SIZE_INT64 + SIZE_FLOAT32 * DATA_DIM * DATA_DIM * 3 *
of.seek(SIZE_INT64 + SIZE_FLOAT32 * DATA_DIM * DATA_DIM * 3 idx)
* idx) ofs.write(np_img.astype('float32').tobytes())
of.write(np_img.astype('float32').tobytes()) ofs.flush()
#save label(int64_t) to file #save label(int64_t) to file
label_int = (int)(label) label_int = (int)(label)
np_label = np.array(label_int) np_label = np.array(label_int)
of.seek(SIZE_INT64 + SIZE_FLOAT32 * DATA_DIM * DATA_DIM * 3 ofs.seek(SIZE_INT64 + SIZE_FLOAT32 * DATA_DIM * DATA_DIM * 3 *
* num_images + idx * SIZE_INT64) num_images + idx * SIZE_INT64)
of.write(np_label.astype('int64').tobytes()) ofs.write(np_label.astype('int64').tobytes())
ofs.flush()
print('The preprocessed binary file path {}\n'.format(output_file)) if (idx + 1) % per_parts == 0:
done = (idx + 1) / per_parts
print_processbar(done, full_parts)
print("Conversion finished.")
def run_convert():
print('Start to download and convert 50000 images to binary file...')
cache_folder = os.path.expanduser('~/.cache/paddle/dataset/int8/download')
extract_folder = os.path.join(cache_folder, 'full_data')
data_dir = os.path.join(extract_folder, DATA_DIR_NAME)
file_list = os.path.join(data_dir, 'val_list.txt')
zip_path = os.path.join(cache_folder, 'full_imagenet_val.tar.gz')
output_file = os.path.join(cache_folder, 'int8_full_val.bin')
retry = 0
try_limit = 3
while not (os.path.exists(output_file) and
os.path.getsize(output_file) == FULL_SIZE_BYTES and
check_integrity(output_file, TARGET_HASH)):
if os.path.exists(output_file):
sys.stderr.write(
"\n\nThe existing binary file is broken. Start to generate new one...\n\n".
format(output_file))
os.remove(output_file)
if retry < try_limit:
retry = retry + 1
else:
raise RuntimeError(
"Can not convert the dataset to binary file with try limit {0}".
format(try_limit))
download_concat(cache_folder, zip_path)
extract(zip_path, extract_folder)
convert(file_list, data_dir, output_file)
print("\nSuccess! The binary file can be found at {0}".format(output_file))
if __name__ == '__main__': if __name__ == '__main__':
reader() run_convert()
...@@ -41,7 +41,10 @@ DEFINE_string(model_name, "", "model name"); ...@@ -41,7 +41,10 @@ DEFINE_string(model_name, "", "model name");
DEFINE_string(infer_model, "", "model path"); DEFINE_string(infer_model, "", "model path");
DEFINE_string(infer_data, "", "data file"); DEFINE_string(infer_data, "", "data file");
DEFINE_string(refer_result, "", "reference result for comparison"); DEFINE_string(refer_result, "", "reference result for comparison");
DEFINE_int32(batch_size, 1, "batch size."); DEFINE_int32(batch_size, 1, "batch size");
DEFINE_int32(warmup_batch_size, 100, "batch size for quantization warmup");
// setting iterations to 0 means processing the whole dataset
DEFINE_int32(iterations, 0, "number of batches to process");
DEFINE_int32(repeat, 1, "Running the inference program repeat times."); DEFINE_int32(repeat, 1, "Running the inference program repeat times.");
DEFINE_bool(test_all_data, false, "Test the all dataset in data file."); DEFINE_bool(test_all_data, false, "Test the all dataset in data file.");
DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads."); DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads.");
...@@ -239,7 +242,7 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs, ...@@ -239,7 +242,7 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
} }
input.shape = shape; input.shape = shape;
input.dtype = PaddleDType::FLOAT32; input.dtype = PaddleDType::FLOAT32;
size_t len = std::accumulate(shape.begin(), shape.end(), 1, size_t len = std::accumulate(shape.begin(), shape.end(), size_t{1},
[](int a, int b) { return a * b; }); [](int a, int b) { return a * b; });
input.data.Resize(len * sizeof(float)); input.data.Resize(len * sizeof(float));
input.lod.assign({{0, static_cast<size_t>(FLAGS_batch_size)}}); input.lod.assign({{0, static_cast<size_t>(FLAGS_batch_size)}});
...@@ -286,17 +289,18 @@ void ConvertPaddleTensorToZeroCopyTensor( ...@@ -286,17 +289,18 @@ void ConvertPaddleTensorToZeroCopyTensor(
void PredictionWarmUp(PaddlePredictor *predictor, void PredictionWarmUp(PaddlePredictor *predictor,
const std::vector<std::vector<PaddleTensor>> &inputs, const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<PaddleTensor> *outputs, int num_threads, std::vector<std::vector<PaddleTensor>> *outputs,
int tid) { int num_threads, int tid) {
int batch_size = FLAGS_batch_size; int batch_size = FLAGS_batch_size;
LOG(INFO) << "Running thread " << tid << ", warm up run..."; LOG(INFO) << "Running thread " << tid << ", warm up run...";
if (FLAGS_zero_copy) { if (FLAGS_zero_copy) {
ConvertPaddleTensorToZeroCopyTensor(predictor, inputs[0]); ConvertPaddleTensorToZeroCopyTensor(predictor, inputs[0]);
} }
outputs->resize(1);
Timer warmup_timer; Timer warmup_timer;
warmup_timer.tic(); warmup_timer.tic();
if (!FLAGS_zero_copy) { if (!FLAGS_zero_copy) {
predictor->Run(inputs[0], outputs, batch_size); predictor->Run(inputs[0], &(*outputs)[0], batch_size);
} else { } else {
predictor->ZeroCopyRun(); predictor->ZeroCopyRun();
} }
...@@ -308,11 +312,16 @@ void PredictionWarmUp(PaddlePredictor *predictor, ...@@ -308,11 +312,16 @@ void PredictionWarmUp(PaddlePredictor *predictor,
void PredictionRun(PaddlePredictor *predictor, void PredictionRun(PaddlePredictor *predictor,
const std::vector<std::vector<PaddleTensor>> &inputs, const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<PaddleTensor> *outputs, int num_threads, std::vector<std::vector<PaddleTensor>> *outputs,
int tid) { int num_threads, int tid) {
int batch_size = FLAGS_batch_size;
int num_times = FLAGS_repeat; int num_times = FLAGS_repeat;
LOG(INFO) << "Thread " << tid << " run " << num_times << " times..."; int iterations = inputs.size(); // process the whole dataset ...
if (FLAGS_iterations > 0 && FLAGS_iterations < inputs.size())
iterations =
FLAGS_iterations; // ... unless the number of iterations is set
outputs->resize(iterations);
LOG(INFO) << "Thread " << tid << ", number of threads " << num_threads
<< ", run " << num_times << " times...";
Timer run_timer; Timer run_timer;
double elapsed_time = 0; double elapsed_time = 0;
#ifdef WITH_GPERFTOOLS #ifdef WITH_GPERFTOOLS
...@@ -320,14 +329,14 @@ void PredictionRun(PaddlePredictor *predictor, ...@@ -320,14 +329,14 @@ void PredictionRun(PaddlePredictor *predictor,
#endif #endif
if (!FLAGS_zero_copy) { if (!FLAGS_zero_copy) {
run_timer.tic(); run_timer.tic();
for (size_t i = 0; i < inputs.size(); i++) { for (size_t i = 0; i < iterations; i++) {
for (int j = 0; j < num_times; j++) { for (int j = 0; j < num_times; j++) {
predictor->Run(inputs[i], outputs, batch_size); predictor->Run(inputs[i], &(*outputs)[i], FLAGS_batch_size);
} }
} }
elapsed_time = run_timer.toc(); elapsed_time = run_timer.toc();
} else { } else {
for (size_t i = 0; i < inputs.size(); i++) { for (size_t i = 0; i < iterations; i++) {
ConvertPaddleTensorToZeroCopyTensor(predictor, inputs[i]); ConvertPaddleTensorToZeroCopyTensor(predictor, inputs[i]);
run_timer.tic(); run_timer.tic();
for (int j = 0; j < num_times; j++) { for (int j = 0; j < num_times; j++) {
...@@ -340,13 +349,14 @@ void PredictionRun(PaddlePredictor *predictor, ...@@ -340,13 +349,14 @@ void PredictionRun(PaddlePredictor *predictor,
ProfilerStop(); ProfilerStop();
#endif #endif
PrintTime(batch_size, num_times, num_threads, tid, elapsed_time / num_times, auto batch_latency = elapsed_time / (iterations * num_times);
inputs.size()); PrintTime(FLAGS_batch_size, num_times, num_threads, tid, batch_latency,
iterations);
if (FLAGS_record_benchmark) { if (FLAGS_record_benchmark) {
Benchmark benchmark; Benchmark benchmark;
benchmark.SetName(FLAGS_model_name); benchmark.SetName(FLAGS_model_name);
benchmark.SetBatchSize(batch_size); benchmark.SetBatchSize(FLAGS_batch_size);
benchmark.SetLatency(elapsed_time / num_times); benchmark.SetLatency(batch_latency);
benchmark.PersistToFile("benchmark_record.txt"); benchmark.PersistToFile("benchmark_record.txt");
} }
} }
...@@ -354,16 +364,17 @@ void PredictionRun(PaddlePredictor *predictor, ...@@ -354,16 +364,17 @@ void PredictionRun(PaddlePredictor *predictor,
void TestOneThreadPrediction( void TestOneThreadPrediction(
const PaddlePredictor::Config *config, const PaddlePredictor::Config *config,
const std::vector<std::vector<PaddleTensor>> &inputs, const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<PaddleTensor> *outputs, bool use_analysis = true) { std::vector<std::vector<PaddleTensor>> *outputs, bool use_analysis = true) {
auto predictor = CreateTestPredictor(config, use_analysis); auto predictor = CreateTestPredictor(config, use_analysis);
PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0); PredictionWarmUp(predictor.get(), inputs, outputs, FLAGS_paddle_num_threads,
PredictionRun(predictor.get(), inputs, outputs, 1, 0); 0);
PredictionRun(predictor.get(), inputs, outputs, FLAGS_paddle_num_threads, 0);
} }
void TestMultiThreadPrediction( void TestMultiThreadPrediction(
const PaddlePredictor::Config *config, const PaddlePredictor::Config *config,
const std::vector<std::vector<PaddleTensor>> &inputs, const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<PaddleTensor> *outputs, int num_threads, std::vector<std::vector<PaddleTensor>> *outputs, int num_threads,
bool use_analysis = true) { bool use_analysis = true) {
std::vector<std::thread> threads; std::vector<std::thread> threads;
std::vector<std::unique_ptr<PaddlePredictor>> predictors; std::vector<std::unique_ptr<PaddlePredictor>> predictors;
...@@ -376,7 +387,7 @@ void TestMultiThreadPrediction( ...@@ -376,7 +387,7 @@ void TestMultiThreadPrediction(
threads.emplace_back([&, tid]() { threads.emplace_back([&, tid]() {
// Each thread should have local inputs and outputs. // Each thread should have local inputs and outputs.
// The inputs of each thread are all the same. // The inputs of each thread are all the same.
std::vector<PaddleTensor> outputs_tid; std::vector<std::vector<PaddleTensor>> outputs_tid;
auto &predictor = predictors[tid]; auto &predictor = predictors[tid];
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
if (use_analysis) { if (use_analysis) {
...@@ -384,8 +395,8 @@ void TestMultiThreadPrediction( ...@@ -384,8 +395,8 @@ void TestMultiThreadPrediction(
->SetMkldnnThreadID(static_cast<int>(tid) + 1); ->SetMkldnnThreadID(static_cast<int>(tid) + 1);
} }
#endif #endif
PredictionWarmUp(predictor.get(), inputs, outputs, num_threads, tid); PredictionWarmUp(predictor.get(), inputs, &outputs_tid, num_threads, tid);
PredictionRun(predictor.get(), inputs, outputs, num_threads, tid); PredictionRun(predictor.get(), inputs, &outputs_tid, num_threads, tid);
}); });
} }
for (int i = 0; i < num_threads; ++i) { for (int i = 0; i < num_threads; ++i) {
...@@ -395,8 +406,8 @@ void TestMultiThreadPrediction( ...@@ -395,8 +406,8 @@ void TestMultiThreadPrediction(
void TestPrediction(const PaddlePredictor::Config *config, void TestPrediction(const PaddlePredictor::Config *config,
const std::vector<std::vector<PaddleTensor>> &inputs, const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<PaddleTensor> *outputs, int num_threads, std::vector<std::vector<PaddleTensor>> *outputs,
bool use_analysis = FLAGS_use_analysis) { int num_threads, bool use_analysis = FLAGS_use_analysis) {
PrintConfig(config, use_analysis); PrintConfig(config, use_analysis);
if (num_threads == 1) { if (num_threads == 1) {
TestOneThreadPrediction(config, inputs, outputs, use_analysis); TestOneThreadPrediction(config, inputs, outputs, use_analysis);
...@@ -406,30 +417,41 @@ void TestPrediction(const PaddlePredictor::Config *config, ...@@ -406,30 +417,41 @@ void TestPrediction(const PaddlePredictor::Config *config,
} }
} }
void CompareTopAccuracy(const std::vector<PaddleTensor> &output_slots1, void CompareTopAccuracy(
const std::vector<PaddleTensor> &output_slots2) { const std::vector<std::vector<PaddleTensor>> &output_slots_quant,
// first output: avg_cost const std::vector<std::vector<PaddleTensor>> &output_slots_ref) {
if (output_slots1.size() == 0 || output_slots2.size() == 0) if (output_slots_quant.size() == 0 || output_slots_ref.size() == 0)
throw std::invalid_argument( throw std::invalid_argument(
"CompareTopAccuracy: output_slots vector is empty."); "CompareTopAccuracy: output_slots vector is empty.");
PADDLE_ENFORCE(output_slots1.size() >= 2UL);
PADDLE_ENFORCE(output_slots2.size() >= 2UL);
// second output: acc_top1 float total_accs1_quant{0};
if (output_slots1[1].lod.size() > 0 || output_slots2[1].lod.size() > 0) float total_accs1_ref{0};
throw std::invalid_argument( for (size_t i = 0; i < output_slots_quant.size(); ++i) {
"CompareTopAccuracy: top1 accuracy output has nonempty LoD."); PADDLE_ENFORCE(output_slots_quant[i].size() >= 2UL);
if (output_slots1[1].dtype != paddle::PaddleDType::FLOAT32 || PADDLE_ENFORCE(output_slots_ref[i].size() >= 2UL);
output_slots2[1].dtype != paddle::PaddleDType::FLOAT32) // second output: acc_top1
throw std::invalid_argument( if (output_slots_quant[i][1].lod.size() > 0 ||
"CompareTopAccuracy: top1 accuracy output is of a wrong type."); output_slots_ref[i][1].lod.size() > 0)
float *top1_quantized = static_cast<float *>(output_slots1[1].data.data()); throw std::invalid_argument(
float *top1_reference = static_cast<float *>(output_slots2[1].data.data()); "CompareTopAccuracy: top1 accuracy output has nonempty LoD.");
LOG(INFO) << "top1 INT8 accuracy: " << *top1_quantized; if (output_slots_quant[i][1].dtype != paddle::PaddleDType::FLOAT32 ||
LOG(INFO) << "top1 FP32 accuracy: " << *top1_reference; output_slots_ref[i][1].dtype != paddle::PaddleDType::FLOAT32)
throw std::invalid_argument(
"CompareTopAccuracy: top1 accuracy output is of a wrong type.");
total_accs1_quant +=
*static_cast<float *>(output_slots_quant[i][1].data.data());
total_accs1_ref +=
*static_cast<float *>(output_slots_ref[i][1].data.data());
}
float avg_acc1_quant = total_accs1_quant / output_slots_quant.size();
float avg_acc1_ref = total_accs1_ref / output_slots_ref.size();
LOG(INFO) << "Avg top1 INT8 accuracy: " << std::fixed << std::setw(6)
<< std::setprecision(4) << avg_acc1_quant;
LOG(INFO) << "Avg top1 FP32 accuracy: " << std::fixed << std::setw(6)
<< std::setprecision(4) << avg_acc1_ref;
LOG(INFO) << "Accepted accuracy drop threshold: " << FLAGS_quantized_accuracy; LOG(INFO) << "Accepted accuracy drop threshold: " << FLAGS_quantized_accuracy;
CHECK_LE(std::abs(*top1_quantized - *top1_reference), CHECK_LE(std::abs(avg_acc1_quant - avg_acc1_ref), FLAGS_quantized_accuracy);
FLAGS_quantized_accuracy);
} }
void CompareDeterministic( void CompareDeterministic(
...@@ -455,20 +477,35 @@ void CompareNativeAndAnalysis( ...@@ -455,20 +477,35 @@ void CompareNativeAndAnalysis(
const PaddlePredictor::Config *config, const PaddlePredictor::Config *config,
const std::vector<std::vector<PaddleTensor>> &inputs) { const std::vector<std::vector<PaddleTensor>> &inputs) {
PrintConfig(config, true); PrintConfig(config, true);
std::vector<PaddleTensor> native_outputs, analysis_outputs; std::vector<std::vector<PaddleTensor>> native_outputs, analysis_outputs;
TestOneThreadPrediction(config, inputs, &native_outputs, false); TestOneThreadPrediction(config, inputs, &native_outputs, false);
TestOneThreadPrediction(config, inputs, &analysis_outputs, true); TestOneThreadPrediction(config, inputs, &analysis_outputs, true);
CompareResult(analysis_outputs, native_outputs); PADDLE_ENFORCE(native_outputs.size() > 0, "Native output is empty.");
PADDLE_ENFORCE(analysis_outputs.size() > 0, "Analysis output is empty.");
CompareResult(analysis_outputs.back(), native_outputs.back());
} }
void CompareQuantizedAndAnalysis( void CompareQuantizedAndAnalysis(
const PaddlePredictor::Config *config, const AnalysisConfig *config, const AnalysisConfig *qconfig,
const PaddlePredictor::Config *qconfig,
const std::vector<std::vector<PaddleTensor>> &inputs) { const std::vector<std::vector<PaddleTensor>> &inputs) {
PrintConfig(config, true); PADDLE_ENFORCE_EQ(inputs[0][0].shape[0], FLAGS_batch_size,
std::vector<PaddleTensor> analysis_outputs, quantized_outputs; "Input data has to be packed batch by batch.");
TestOneThreadPrediction(config, inputs, &analysis_outputs, true); LOG(INFO) << "FP32 & INT8 prediction run: batch_size " << FLAGS_batch_size
TestOneThreadPrediction(qconfig, inputs, &quantized_outputs, true); << ", warmup batch size " << FLAGS_warmup_batch_size << ".";
LOG(INFO) << "--- FP32 prediction start ---";
auto *cfg = reinterpret_cast<const PaddlePredictor::Config *>(config);
PrintConfig(cfg, true);
std::vector<std::vector<PaddleTensor>> analysis_outputs;
TestOneThreadPrediction(cfg, inputs, &analysis_outputs, true);
LOG(INFO) << "--- INT8 prediction start ---";
auto *qcfg = reinterpret_cast<const PaddlePredictor::Config *>(qconfig);
PrintConfig(qcfg, true);
std::vector<std::vector<PaddleTensor>> quantized_outputs;
TestOneThreadPrediction(qcfg, inputs, &quantized_outputs, true);
LOG(INFO) << "--- comparing outputs --- ";
CompareTopAccuracy(quantized_outputs, analysis_outputs); CompareTopAccuracy(quantized_outputs, analysis_outputs);
} }
...@@ -578,9 +615,9 @@ static bool CompareTensorData(const framework::LoDTensor &a, ...@@ -578,9 +615,9 @@ static bool CompareTensorData(const framework::LoDTensor &a,
const framework::LoDTensor &b) { const framework::LoDTensor &b) {
auto a_shape = framework::vectorize(a.dims()); auto a_shape = framework::vectorize(a.dims());
auto b_shape = framework::vectorize(b.dims()); auto b_shape = framework::vectorize(b.dims());
size_t a_size = std::accumulate(a_shape.begin(), a_shape.end(), 1, size_t a_size = std::accumulate(a_shape.begin(), a_shape.end(), size_t{1},
[](int a, int b) { return a * b; }); [](int a, int b) { return a * b; });
size_t b_size = std::accumulate(b_shape.begin(), b_shape.end(), 1, size_t b_size = std::accumulate(b_shape.begin(), b_shape.end(), size_t{1},
[](int a, int b) { return a * b; }); [](int a, int b) { return a * b; });
if (a_size != b_size) { if (a_size != b_size) {
LOG(ERROR) << string::Sprintf("tensor data size not match, %d != %d", LOG(ERROR) << string::Sprintf("tensor data size not match, %d != %d",
......
...@@ -74,7 +74,7 @@ void profile(std::string model_dir, bool use_analysis, bool use_tensorrt) { ...@@ -74,7 +74,7 @@ void profile(std::string model_dir, bool use_analysis, bool use_tensorrt) {
SetFakeImageInput(&inputs_all, model_dir, false, "__model__", ""); SetFakeImageInput(&inputs_all, model_dir, false, "__model__", "");
} }
std::vector<PaddleTensor> outputs; std::vector<std::vector<PaddleTensor>> outputs;
if (use_analysis || use_tensorrt) { if (use_analysis || use_tensorrt) {
AnalysisConfig config; AnalysisConfig config;
config.EnableUseGpu(100, 0); config.EnableUseGpu(100, 0);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册