提交 44cb70c0 编写于 作者: T tensor-tang

Merge remote-tracking branch 'ups/develop' into fix/mac

...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <cstddef> // for size_t
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -26,6 +27,7 @@ struct ExecutionStrategy { ...@@ -26,6 +27,7 @@ struct ExecutionStrategy {
bool allow_op_delay_{false}; bool allow_op_delay_{false};
size_t num_iteration_per_drop_scope_{100}; size_t num_iteration_per_drop_scope_{100};
ExecutorType type_{kDefault}; ExecutorType type_{kDefault};
bool dry_run_{false};
}; };
} // namespace details } // namespace details
......
...@@ -128,7 +128,9 @@ void FastThreadedSSAGraphExecutor::RunOpAsync( ...@@ -128,7 +128,9 @@ void FastThreadedSSAGraphExecutor::RunOpAsync(
size_t complete = 0; size_t complete = 0;
while (op_to_run != nullptr) { while (op_to_run != nullptr) {
try { try {
op_to_run->Run(strategy_.use_cuda_); if (LIKELY(!strategy_.dry_run_)) {
op_to_run->Run(strategy_.use_cuda_);
}
++complete; ++complete;
} catch (...) { } catch (...) {
exception_.Catch(std::current_exception()); exception_.Catch(std::current_exception());
......
...@@ -211,7 +211,9 @@ void ThreadedSSAGraphExecutor::RunOp( ...@@ -211,7 +211,9 @@ void ThreadedSSAGraphExecutor::RunOp(
if (VLOG_IS_ON(10)) { if (VLOG_IS_ON(10)) {
VLOG(10) << op << " " << op->Name() << " : " << op->DebugString(); VLOG(10) << op << " " << op->Name() << " : " << op->DebugString();
} }
op->Run(strategy_.use_cuda_); if (LIKELY(!strategy_.dry_run_)) {
op->Run(strategy_.use_cuda_);
}
VLOG(10) << op << " " << op->Name() << " Done "; VLOG(10) << op << " " << op->Name() << " Done ";
running_ops_--; running_ops_--;
ready_var_q->Extend(op->Outputs()); ready_var_q->Extend(op->Outputs());
......
...@@ -48,7 +48,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { ...@@ -48,7 +48,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
// Use topological sort algorithm // Use topological sort algorithm
FeedFetchList Run(const std::vector<std::string> &fetch_tensors) override; FeedFetchList Run(const std::vector<std::string> &fetch_tensors) override;
~ThreadedSSAGraphExecutor() {} ~ThreadedSSAGraphExecutor() final = default;
private: private:
void RunOp(const std::shared_ptr<BlockingQueue<VarHandleBase *>> &ready_var_q, void RunOp(const std::shared_ptr<BlockingQueue<VarHandleBase *>> &ready_var_q,
......
...@@ -38,9 +38,20 @@ class ParallelExecutorPrivate { ...@@ -38,9 +38,20 @@ class ParallelExecutorPrivate {
explicit ParallelExecutorPrivate(const std::vector<platform::Place> &places) explicit ParallelExecutorPrivate(const std::vector<platform::Place> &places)
: places_(places) {} : places_(places) {}
~ParallelExecutorPrivate() {
if (own_local_scope_) {
for (size_t i = 1; i < local_scopes_.size(); ++i) {
// Skip the first scope, since it is the global scope.
Scope *local_scope = local_scopes_[i];
if (global_scope_->HasKid(local_scope)) {
global_scope_->DeleteScope(local_scope);
}
}
}
}
std::vector<platform::Place> places_; std::vector<platform::Place> places_;
std::vector<Scope *> local_scopes_; std::vector<Scope *> local_scopes_;
Scope *global_scope_; Scope *global_scope_; // not owned
std::unique_ptr<details::SSAGraphExecutor> executor_; std::unique_ptr<details::SSAGraphExecutor> executor_;
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
...@@ -306,16 +317,6 @@ ParallelExecutor::~ParallelExecutor() { ...@@ -306,16 +317,6 @@ ParallelExecutor::~ParallelExecutor() {
for (auto &p : member_->places_) { for (auto &p : member_->places_) {
platform::DeviceContextPool::Instance().Get(p)->Wait(); platform::DeviceContextPool::Instance().Get(p)->Wait();
} }
if (member_->own_local_scope_) {
for (size_t i = 1; i < member_->local_scopes_.size(); ++i) {
Scope *local_scope = member_->local_scopes_[i];
if (member_->global_scope_->HasKid(local_scope)) {
member_->global_scope_->DeleteScope(local_scope);
}
}
}
// member_ must be destructed before gcs_ since the destructor of // member_ must be destructed before gcs_ since the destructor of
// ReferenceCountOpHandle use raw pointers of gcs_ inside. // ReferenceCountOpHandle use raw pointers of gcs_ inside.
member_.reset(); member_.reset();
......
if(WITH_TESTING) if(WITH_TESTING)
include(test.cmake) # some generic cmake funtion for inference include(tests/test.cmake) # some generic cmake funtion for inference
endif() endif()
# analysis and tensorrt must be added before creating static library, # analysis and tensorrt must be added before creating static library,
# otherwise, there would be undefined reference to them in static library. # otherwise, there would be undefined reference to them in static library.
......
set(INFERENCE_EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor) set(INFERENCE_EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor)
function(download_model install_dir model_name)
if (NOT EXISTS ${install_dir})
inference_download_and_uncompress(${install_dir} ${INFERENCE_URL} ${model_name})
endif()
endfunction()
function(download_model_and_data install_dir model_name data_name) function(download_model_and_data install_dir model_name data_name)
if (NOT EXISTS ${install_dir}) if (NOT EXISTS ${install_dir})
inference_download_and_uncompress(${install_dir} ${INFERENCE_URL} ${model_name}) inference_download_and_uncompress(${install_dir} ${INFERENCE_URL} ${model_name})
...@@ -13,6 +19,13 @@ function(inference_analysis_api_test target install_dir filename) ...@@ -13,6 +19,13 @@ function(inference_analysis_api_test target install_dir filename)
ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt) ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt)
endfunction() endfunction()
function(inference_analysis_api_test_with_fake_data target install_dir filename model_name)
download_model(${install_dir} ${model_name})
inference_analysis_test(${target} SRCS ${filename}
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
ARGS --infer_model=${install_dir}/model)
endfunction()
# RNN1 # RNN1
if(NOT APPLE) if(NOT APPLE)
set(RNN1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/rnn1") set(RNN1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/rnn1")
...@@ -61,17 +74,13 @@ inference_analysis_api_test(test_analyzer_seq_conv1 ${SEQ_CONV1_INSTALL_DIR} ana ...@@ -61,17 +74,13 @@ inference_analysis_api_test(test_analyzer_seq_conv1 ${SEQ_CONV1_INSTALL_DIR} ana
# ocr # ocr
set(OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/ocr") set(OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/ocr")
if (NOT EXISTS ${OCR_INSTALL_DIR}) if (NOT EXISTS ${OCR_INSTALL_DIR})
inference_download_and_uncompress(${OCR_INSTALL_DIR} "http://paddlemodels.cdn.bcebos.com/" "inference-vis-demos%2Focr.tar.gz") inference_download_and_uncompress(${OCR_INSTALL_DIR} "http://paddlemodels.cdn.bcebos.com/" "inference-vis-demos%2Focr.tar.gz")
endif() endif()
inference_analysis_api_test(test_analyzer_ocr ${OCR_INSTALL_DIR} analyzer_vis_tester.cc) inference_analysis_api_test(test_analyzer_ocr ${OCR_INSTALL_DIR} analyzer_vis_tester.cc)
# resnet50 # resnet50
set(RESNET50_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/resnet50") inference_analysis_api_test_with_fake_data(test_analyzer_resnet50
if (NOT EXISTS ${RESNET50_INSTALL_DIR}) "${INFERENCE_DEMO_INSTALL_DIR}/resnet50" analyzer_resnet50_tester.cc "resnet50_model.tar.gz")
inference_download_and_uncompress(${RESNET50_INSTALL_DIR} ${INFERENCE_URL} "resnet50_model.tar.gz")
endif()
inference_analysis_test(test_analyzer_resnet50 SRCS analyzer_resnet50_tester.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} ARGS --infer_model=${RESNET50_INSTALL_DIR}/model)
# anakin # anakin
if (WITH_ANAKIN AND WITH_MKL) # only needed in CI if (WITH_ANAKIN AND WITH_MKL) # only needed in CI
......
...@@ -30,25 +30,7 @@ void SetConfig(AnalysisConfig *cfg) { ...@@ -30,25 +30,7 @@ void SetConfig(AnalysisConfig *cfg) {
} }
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) { void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
PADDLE_ENFORCE_EQ(FLAGS_test_all_data, 0, "Only have single batch of data."); SetFakeImageInput(inputs, FLAGS_infer_model);
PaddleTensor input;
// channel=3, height/width=318
std::vector<int> shape({FLAGS_batch_size, 3, 318, 318});
input.shape = shape;
input.dtype = PaddleDType::FLOAT32;
// fill input data, for profile easily, do not use random data here.
size_t size = FLAGS_batch_size * 3 * 318 * 318;
input.data.Resize(size * sizeof(float));
float *input_data = static_cast<float *>(input.data.data());
for (size_t i = 0; i < size; i++) {
*(input_data + i) = static_cast<float>(i) / size;
}
std::vector<PaddleTensor> input_slots;
input_slots.assign({input});
(*inputs).emplace_back(input_slots);
} }
// Easy for profiling independently. // Easy for profiling independently.
...@@ -61,13 +43,6 @@ void profile(bool use_mkldnn = false) { ...@@ -61,13 +43,6 @@ void profile(bool use_mkldnn = false) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
PADDLE_ENFORCE_EQ(outputs.size(), 1UL);
size_t size = GetSize(outputs[0]);
// output is a 512-dimension feature
EXPECT_EQ(size, 512 * FLAGS_batch_size);
}
} }
TEST(Analyzer_resnet50, profile) { profile(); } TEST(Analyzer_resnet50, profile) { profile(); }
...@@ -83,8 +58,7 @@ TEST(Analyzer_resnet50, fuse_statis) { ...@@ -83,8 +58,7 @@ TEST(Analyzer_resnet50, fuse_statis) {
auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg); auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);
auto fuse_statis = GetFuseStatis( auto fuse_statis = GetFuseStatis(
static_cast<AnalysisPredictor *>(predictor.get()), &num_ops); static_cast<AnalysisPredictor *>(predictor.get()), &num_ops);
ASSERT_TRUE(fuse_statis.count("fc_fuse")); LOG(INFO) << "num_ops: " << num_ops;
EXPECT_EQ(fuse_statis.at("fc_fuse"), 1);
} }
// Compare result of NativeConfig and AnalysisConfig // Compare result of NativeConfig and AnalysisConfig
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "paddle/fluid/inference/api/analysis_predictor.h" #include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h" #include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
DEFINE_string(infer_model, "", "model path"); DEFINE_string(infer_model, "", "model path");
...@@ -105,6 +106,34 @@ std::unordered_map<std::string, int> GetFuseStatis(PaddlePredictor *predictor, ...@@ -105,6 +106,34 @@ std::unordered_map<std::string, int> GetFuseStatis(PaddlePredictor *predictor,
return fuse_statis; return fuse_statis;
} }
void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
const std::string &dirname) {
// Set fake_image_data
PADDLE_ENFORCE_EQ(FLAGS_test_all_data, 0, "Only have single batch of data.");
std::vector<std::vector<int64_t>> feed_target_shapes =
GetFeedTargetShapes(dirname, true, "model", "params");
int dim1 = feed_target_shapes[0][1];
int dim2 = feed_target_shapes[0][2];
int dim3 = feed_target_shapes[0][3];
PaddleTensor input;
std::vector<int> shape({FLAGS_batch_size, dim1, dim2, dim3});
input.shape = shape;
input.dtype = PaddleDType::FLOAT32;
// fill input data, for profile easily, do not use random data here.
size_t size = FLAGS_batch_size * dim1 * dim2 * dim3;
input.data.Resize(size * sizeof(float));
float *input_data = static_cast<float *>(input.data.data());
for (size_t i = 0; i < size; i++) {
*(input_data + i) = static_cast<float>(i) / size;
}
std::vector<PaddleTensor> input_slots;
input_slots.assign({input});
(*inputs).emplace_back(input_slots);
}
void TestOneThreadPrediction( void TestOneThreadPrediction(
const AnalysisConfig &config, const AnalysisConfig &config,
const std::vector<std::vector<PaddleTensor>> &inputs, const std::vector<std::vector<PaddleTensor>> &inputs,
......
...@@ -18,7 +18,6 @@ limitations under the License. */ ...@@ -18,7 +18,6 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/ir/graph_to_program_pass.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/io.h" #include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
...@@ -94,15 +93,15 @@ void CheckError(const paddle::framework::LoDTensor& output1, ...@@ -94,15 +93,15 @@ void CheckError(const paddle::framework::LoDTensor& output1,
std::unique_ptr<paddle::framework::ProgramDesc> InitProgram( std::unique_ptr<paddle::framework::ProgramDesc> InitProgram(
paddle::framework::Executor* executor, paddle::framework::Scope* scope, paddle::framework::Executor* executor, paddle::framework::Scope* scope,
const std::string& dirname, const bool is_combined = false) { const std::string& dirname, const bool is_combined = false,
const std::string& prog_filename = "__model_combined__",
const std::string& param_filename = "__params_combined__") {
std::unique_ptr<paddle::framework::ProgramDesc> inference_program; std::unique_ptr<paddle::framework::ProgramDesc> inference_program;
if (is_combined) { if (is_combined) {
// All parameters are saved in a single file. // All parameters are saved in a single file.
// Hard-coding the file names of program and parameters in unittest. // Hard-coding the file names of program and parameters in unittest.
// The file names should be consistent with that used in Python API // The file names should be consistent with that used in Python API
// `fluid.io.save_inference_model`. // `fluid.io.save_inference_model`.
std::string prog_filename = "__model_combined__";
std::string param_filename = "__params_combined__";
inference_program = inference_program =
paddle::inference::Load(executor, scope, dirname + "/" + prog_filename, paddle::inference::Load(executor, scope, dirname + "/" + prog_filename,
dirname + "/" + param_filename); dirname + "/" + param_filename);
...@@ -115,12 +114,15 @@ std::unique_ptr<paddle::framework::ProgramDesc> InitProgram( ...@@ -115,12 +114,15 @@ std::unique_ptr<paddle::framework::ProgramDesc> InitProgram(
} }
std::vector<std::vector<int64_t>> GetFeedTargetShapes( std::vector<std::vector<int64_t>> GetFeedTargetShapes(
const std::string& dirname, const bool is_combined = false) { const std::string& dirname, const bool is_combined = false,
const std::string& prog_filename = "__model_combined__",
const std::string& param_filename = "__params_combined__") {
auto place = paddle::platform::CPUPlace(); auto place = paddle::platform::CPUPlace();
auto executor = paddle::framework::Executor(place); auto executor = paddle::framework::Executor(place);
auto* scope = new paddle::framework::Scope(); auto* scope = new paddle::framework::Scope();
auto inference_program = InitProgram(&executor, scope, dirname, is_combined); auto inference_program = InitProgram(&executor, scope, dirname, is_combined,
prog_filename, param_filename);
auto& global_block = inference_program->Block(0); auto& global_block = inference_program->Block(0);
const std::vector<std::string>& feed_target_names = const std::vector<std::string>& feed_target_names =
...@@ -136,15 +138,6 @@ std::vector<std::vector<int64_t>> GetFeedTargetShapes( ...@@ -136,15 +138,6 @@ std::vector<std::vector<int64_t>> GetFeedTargetShapes(
return feed_target_shapes; return feed_target_shapes;
} }
void Compile(paddle::framework::ProgramDesc* program) {
std::unique_ptr<paddle::framework::ir::Graph> g(
new paddle::framework::ir::Graph(*program));
auto pass = paddle::framework::ir::PassRegistry::Instance().Get(
"graph_to_program_pass");
pass->SetNotOwned<paddle::framework::ProgramDesc>("program", program);
pass->Apply(std::move(g));
}
template <typename Place, bool CreateVars = true, bool PrepareContext = false> template <typename Place, bool CreateVars = true, bool PrepareContext = false>
void TestInference(const std::string& dirname, void TestInference(const std::string& dirname,
const std::vector<paddle::framework::LoDTensor*>& cpu_feeds, const std::vector<paddle::framework::LoDTensor*>& cpu_feeds,
...@@ -182,7 +175,6 @@ void TestInference(const std::string& dirname, ...@@ -182,7 +175,6 @@ void TestInference(const std::string& dirname,
paddle::platform::DeviceContextPool::Instance().Get(place)); paddle::platform::DeviceContextPool::Instance().Get(place));
inference_program = InitProgram(&executor, scope, dirname, is_combined); inference_program = InitProgram(&executor, scope, dirname, is_combined);
} }
Compile(inference_program.get());
// Disable the profiler and print the timing information // Disable the profiler and print the timing information
paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kDefault, paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kDefault,
...@@ -261,5 +253,3 @@ void TestInference(const std::string& dirname, ...@@ -261,5 +253,3 @@ void TestInference(const std::string& dirname,
delete scope; delete scope;
} }
USE_PASS(graph_to_program_pass);
...@@ -742,7 +742,12 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -742,7 +742,12 @@ All parameter, weight, gradient are variables in Paddle.
will clean up the temp variables at the end of the current iteration. will clean up the temp variables at the end of the current iteration.
2. In some NLP model, it may cause the GPU memory is insufficient, 2. In some NLP model, it may cause the GPU memory is insufficient,
in this case, you should reduce `num_iteration_per_drop_scope`. in this case, you should reduce `num_iteration_per_drop_scope`.
)DOC"); )DOC")
.def_property("_dry_run",
[](const ExecutionStrategy &self) { return self.dry_run_; },
[](ExecutionStrategy &self, bool dry_run) {
self.dry_run_ = dry_run;
});
exec_strategy.def_property( exec_strategy.def_property(
"use_experimental_executor", "use_experimental_executor",
......
...@@ -60,7 +60,7 @@ def data(name, ...@@ -60,7 +60,7 @@ def data(name,
For example if shape=[1], the resulting shape is [-1, 1]. For example if shape=[1], the resulting shape is [-1, 1].
2. If shape contains -1, such as shape=[1, -1], 2. If shape contains -1, such as shape=[1, -1],
append_batch_size will be enforced to be be False (ineffective). append_batch_size will be enforced to be be False (ineffective).
dtype(int|float): The type of data : float32, float_16, int etc dtype(basestring): The type of data : float32, float_16, int etc
type(VarType): The output type. By default it is LOD_TENSOR. type(VarType): The output type. By default it is LOD_TENSOR.
lod_level(int): The LoD Level. 0 means the input data is not a sequence. lod_level(int): The LoD Level. 0 means the input data is not a sequence.
stop_gradient(bool): A boolean that mentions whether gradient should flow. stop_gradient(bool): A boolean that mentions whether gradient should flow.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import unittest
import logging
import six
class TestBase(unittest.TestCase):
def main(self,
network_func,
iter=100,
iter_per_pe=100,
use_gpu=True,
use_experimental_executor=False):
if use_gpu and not fluid.core.is_compiled_with_cuda():
logging.warning(
"Paddle is not compiled with CUDA, skip GPU unittests")
return
main_prog = fluid.Program()
startup_prog = fluid.Program()
scope = fluid.Scope()
with fluid.program_guard(main_prog, startup_prog):
with fluid.scope_guard(scope):
loss = network_func()
fluid.Executor(
fluid.CUDAPlace(0)
if use_gpu else fluid.CPUPlace()).run(startup_prog)
for _ in six.moves.xrange(iter):
exe_strategy = fluid.ExecutionStrategy()
exe_strategy._dry_run = True
exe_strategy.use_experimental_executor = use_experimental_executor
pe = fluid.ParallelExecutor(
use_cuda=True,
loss_name=loss.name,
main_program=main_prog,
exec_strategy=exe_strategy)
for _ in six.moves.xrange(iter_per_pe):
pe.run([])
class TestMNISTDryRun(TestBase):
def test_mnist_dry_run(self):
for use_gpu in (False, True):
for use_experimental_executor in (False, True):
self.main(
network_func=TestMNISTDryRun.network_func,
use_gpu=use_gpu,
use_experimental_executor=use_experimental_executor)
@staticmethod
def network_func():
img = fluid.layers.data(name='img', shape=[784], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
hidden = img
for _ in six.moves.xrange(10):
hidden = fluid.layers.fc(input=img, size=200, act='tanh')
prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=prediction, label=label)
avg_loss = fluid.layers.mean(loss)
fluid.optimizer.Adam().minimize(avg_loss)
return avg_loss
if __name__ == '__main__':
unittest.main()
...@@ -14,30 +14,18 @@ ...@@ -14,30 +14,18 @@
from __future__ import print_function from __future__ import print_function
from parallel_executor_test_base import TestParallelExecutorBase
import paddle.fluid as fluid
import paddle.fluid.core as core
import numpy as np
import paddle
import paddle.dataset.mnist as mnist
import unittest import unittest
import os
MNIST_RECORDIO_FILE = "./mnist_test_pe.recordio" import numpy as np
import paddle.fluid.core as core
import os
import paddle.fluid as fluid
from parallel_executor_test_base import TestParallelExecutorBase
def simple_fc_net(use_feed): def simple_fc_net(use_feed):
if use_feed: img = fluid.layers.data(name='image', shape=[784], dtype='float32')
img = fluid.layers.data(name='image', shape=[784], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
else:
reader = fluid.layers.open_files(
filenames=[MNIST_RECORDIO_FILE],
shapes=[[-1, 784], [-1, 1]],
lod_levels=[0, 0],
dtypes=['float32', 'int64'])
reader = fluid.layers.io.double_buffer(reader)
img, label = fluid.layers.read_file(reader)
hidden = img hidden = img
for _ in range(4): for _ in range(4):
hidden = fluid.layers.fc( hidden = fluid.layers.fc(
...@@ -53,17 +41,8 @@ def simple_fc_net(use_feed): ...@@ -53,17 +41,8 @@ def simple_fc_net(use_feed):
def fc_with_batchnorm(use_feed): def fc_with_batchnorm(use_feed):
if use_feed: img = fluid.layers.data(name='image', shape=[784], dtype='float32')
img = fluid.layers.data(name='image', shape=[784], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
else:
reader = fluid.layers.open_files(
filenames=[MNIST_RECORDIO_FILE],
shapes=[[-1, 784], [-1, 1]],
lod_levels=[0, 0],
dtypes=['float32', 'int64'])
reader = fluid.layers.io.double_buffer(reader)
img, label = fluid.layers.read_file(reader)
hidden = img hidden = img
for _ in range(1): for _ in range(1):
...@@ -88,19 +67,6 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -88,19 +67,6 @@ class TestMNIST(TestParallelExecutorBase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
os.environ['CPU_NUM'] = str(4) os.environ['CPU_NUM'] = str(4)
# Convert mnist to recordio file
with fluid.program_guard(fluid.Program(), fluid.Program()):
reader = paddle.batch(mnist.train(), batch_size=4)
feeder = fluid.DataFeeder(
feed_list=[ # order is image and label
fluid.layers.data(
name='image', shape=[784]),
fluid.layers.data(
name='label', shape=[1], dtype='int64'),
],
place=fluid.CPUPlace())
fluid.recordio_writer.convert_reader_to_recordio_file(
MNIST_RECORDIO_FILE, reader, feeder)
def _init_data(self): def _init_data(self):
np.random.seed(5) np.random.seed(5)
...@@ -111,10 +77,6 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -111,10 +77,6 @@ class TestMNIST(TestParallelExecutorBase):
def _compare_reduce_and_allreduce(self, model, use_cuda): def _compare_reduce_and_allreduce(self, model, use_cuda):
if use_cuda and not core.is_compiled_with_cuda(): if use_cuda and not core.is_compiled_with_cuda():
return return
self.check_network_convergence(
model, use_cuda=use_cuda, use_reduce=True)
self.check_network_convergence(
model, use_cuda=use_cuda, allow_op_delay=True, use_reduce=True)
img, label = self._init_data() img, label = self._init_data()
...@@ -140,9 +102,6 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -140,9 +102,6 @@ class TestMNIST(TestParallelExecutorBase):
def check_simple_fc_convergence(self, use_cuda, use_reduce=False): def check_simple_fc_convergence(self, use_cuda, use_reduce=False):
if use_cuda and not core.is_compiled_with_cuda(): if use_cuda and not core.is_compiled_with_cuda():
return return
self.check_network_convergence(simple_fc_net, use_cuda=use_cuda)
self.check_network_convergence(
simple_fc_net, use_cuda=use_cuda, allow_op_delay=True)
img, label = self._init_data() img, label = self._init_data()
...@@ -199,8 +158,6 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -199,8 +158,6 @@ class TestMNIST(TestParallelExecutorBase):
if use_cuda and not core.is_compiled_with_cuda(): if use_cuda and not core.is_compiled_with_cuda():
return return
self.check_network_convergence(fc_with_batchnorm, use_cuda=use_cuda)
img, label = self._init_data() img, label = self._init_data()
self.check_network_convergence( self.check_network_convergence(
......
...@@ -14,7 +14,8 @@ RC = 0 ...@@ -14,7 +14,8 @@ RC = 0
def git_commit(): def git_commit():
try: try:
cmd = ['git', 'rev-parse', 'HEAD'] cmd = ['git', 'rev-parse', 'HEAD']
git_commit = subprocess.Popen(cmd, stdout = subprocess.PIPE).communicate()[0].strip() git_commit = subprocess.Popen(cmd, stdout = subprocess.PIPE,
cwd="@PADDLE_SOURCE_DIR@").communicate()[0].strip()
except: except:
git_commit = 'Unknown' git_commit = 'Unknown'
git_commit = git_commit.decode() git_commit = git_commit.decode()
...@@ -44,7 +45,7 @@ def get_patch(): ...@@ -44,7 +45,7 @@ def get_patch():
def is_taged(): def is_taged():
try: try:
cmd = ['git', 'describe', '--exact-match', '--tags', 'HEAD', '2>/dev/null'] cmd = ['git', 'describe', '--exact-match', '--tags', 'HEAD', '2>/dev/null']
git_tag = subprocess.Popen(cmd, stdout = subprocess.PIPE).communicate()[0].strip() git_tag = subprocess.Popen(cmd, stdout = subprocess.PIPE, cwd="@PADDLE_SOURCE_DIR@").communicate()[0].strip()
git_tag = git_tag.decode() git_tag = git_tag.decode()
except: except:
return False return False
...@@ -55,8 +56,7 @@ def is_taged(): ...@@ -55,8 +56,7 @@ def is_taged():
return False return False
def write_version_py(filename='paddle/version.py'): def write_version_py(filename='paddle/version.py'):
cnt = ''' cnt = '''# THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY
# THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY
# #
full_version = '%(major)d.%(minor)d.%(patch)s' full_version = '%(major)d.%(minor)d.%(patch)s'
major = '%(major)d' major = '%(major)d'
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册