提交 7834b4a4 编写于 作者: F fengjiayi

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into dev_op_tensor_support

...@@ -27,15 +27,6 @@ script: ...@@ -27,15 +27,6 @@ script:
# 43min timeout # 43min timeout
paddle/scripts/paddle_docker_build.sh ${JOB} paddle/scripts/paddle_docker_build.sh ${JOB}
if [ $? -eq 0 ] || [ $? -eq 142 ]; then true; else exit 1; fi; if [ $? -eq 0 ] || [ $? -eq 142 ]; then true; else exit 1; fi;
- |
if [[ "$JOB" != "doc" ]]; then exit 0; fi;
# For document only
if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi;
if [[ "$TRAVIS_BRANCH" != "develop" && ! "$TRAVIS_BRANCH" =~ ^v|release/[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then exit 0; fi;
export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/master/scripts/deploy/deploy_docs.sh
export DOCS_DIR=`pwd`
cd ..
curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH $DOCS_DIR $DOCS_DIR/build/doc/
notifications: notifications:
email: email:
on_success: change on_success: change
......
...@@ -265,6 +265,7 @@ function(cc_test TARGET_NAME) ...@@ -265,6 +265,7 @@ function(cc_test TARGET_NAME)
if (${cc_test_SERIAL}) if (${cc_test_SERIAL})
set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1) set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true) set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
endif() endif()
endif() endif()
endfunction(cc_test) endfunction(cc_test)
...@@ -330,6 +331,7 @@ function(nv_test TARGET_NAME) ...@@ -330,6 +331,7 @@ function(nv_test TARGET_NAME)
if (nv_test_SERIAL) if (nv_test_SERIAL)
set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1) set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true) set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
endif() endif()
endif() endif()
endfunction(nv_test) endfunction(nv_test)
...@@ -577,7 +579,8 @@ function(py_test TARGET_NAME) ...@@ -577,7 +579,8 @@ function(py_test TARGET_NAME)
set(multiValueArgs SRCS DEPS ARGS ENVS) set(multiValueArgs SRCS DEPS ARGS ENVS)
cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND env FLAGS_init_allocated_mem=true PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS} COMMAND env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS}
${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS} ${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif() endif()
......
...@@ -136,6 +136,8 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { ...@@ -136,6 +136,8 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
platform::SetDeviceId(dev_id); platform::SetDeviceId(dev_id);
#endif #endif
} }
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
platform::RecordEvent record_event(Type(), pool.Get(place));
RunImpl(scope, place); RunImpl(scope, place);
VLOG(10) << "+ " << DebugStringEx(&scope); VLOG(10) << "+ " << DebugStringEx(&scope);
} }
...@@ -639,9 +641,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -639,9 +641,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place); auto* dev_ctx = pool.Get(place);
// For profiling, don't move out of this function because that will result
// in the failure of multi-GPU profiling.
platform::RecordEvent record_event(Type(), dev_ctx);
// check if op[type] has kernel registered. // check if op[type] has kernel registered.
auto& all_op_kernels = AllOpKernels(); auto& all_op_kernels = AllOpKernels();
auto kernels_iter = all_op_kernels.find(type_); auto kernels_iter = all_op_kernels.find(type_);
......
...@@ -74,9 +74,10 @@ if (WITH_ANAKIN) # only needed in CI ...@@ -74,9 +74,10 @@ if (WITH_ANAKIN) # only needed in CI
target_link_libraries(inference_anakin_api anakin anakin_saber_common) target_link_libraries(inference_anakin_api anakin anakin_saber_common)
target_link_libraries(inference_anakin_api_shared anakin anakin_saber_common) target_link_libraries(inference_anakin_api_shared anakin anakin_saber_common)
if (WITH_TESTING) if (WITH_TESTING)
cc_test(inference_anakin_test SRCS api_anakin_engine_tester.cc # this test is unstable, disable it first.
ARGS --model=${ANAKIN_INSTALL_DIR}/mobilenet_v2.anakin.bin #cc_test(inference_anakin_test SRCS api_anakin_engine_tester.cc
DEPS inference_anakin_api_shared) #ARGS --model=${ANAKIN_INSTALL_DIR}/mobilenet_v2.anakin.bin
target_compile_options(inference_anakin_test BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) #DEPS inference_anakin_api_shared)
#target_compile_options(inference_anakin_test BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
endif(WITH_TESTING) endif(WITH_TESTING)
endif() endif()
...@@ -31,7 +31,6 @@ class FeedOp : public framework::OperatorBase { ...@@ -31,7 +31,6 @@ class FeedOp : public framework::OperatorBase {
const platform::Place &place) const override { const platform::Place &place) const override {
// get device context from pool // get device context from pool
auto *dev_ctx = platform::DeviceContextPool::Instance().Get(place); auto *dev_ctx = platform::DeviceContextPool::Instance().Get(place);
platform::RecordEvent record_event(Type(), dev_ctx);
auto feed_var_name = Input("X"); auto feed_var_name = Input("X");
auto *feed_var = scope.FindVar(feed_var_name); auto *feed_var = scope.FindVar(feed_var_name);
......
...@@ -36,12 +36,6 @@ class FetchBarrierOp : public framework::OperatorBase { ...@@ -36,12 +36,6 @@ class FetchBarrierOp : public framework::OperatorBase {
void RunImpl(const framework::Scope& scope, void RunImpl(const framework::Scope& scope,
const platform::Place& place) const override { const platform::Place& place) const override {
std::vector<std::string> eps = Attr<std::vector<std::string>>("endpoints"); std::vector<std::string> eps = Attr<std::vector<std::string>>("endpoints");
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto& ctx = *pool.Get(place);
// For profiling
platform::RecordEvent record_event(Type(), &ctx);
distributed::RPCClient* rpc_client = distributed::RPCClient* rpc_client =
distributed::RPCClient::GetInstance<RPCCLIENT_T>(); distributed::RPCClient::GetInstance<RPCCLIENT_T>();
......
...@@ -30,9 +30,6 @@ class FetchOp : public framework::OperatorBase { ...@@ -30,9 +30,6 @@ class FetchOp : public framework::OperatorBase {
private: private:
void RunImpl(const framework::Scope &scope, void RunImpl(const framework::Scope &scope,
const platform::Place &place) const override { const platform::Place &place) const override {
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
platform::RecordEvent record_event(Type(), pool.Get(place));
auto fetch_var_name = Input("X"); auto fetch_var_name = Input("X");
auto *fetch_var = scope.FindVar(fetch_var_name); auto *fetch_var = scope.FindVar(fetch_var_name);
PADDLE_ENFORCE(fetch_var != nullptr, PADDLE_ENFORCE(fetch_var != nullptr,
......
...@@ -31,9 +31,6 @@ class LoadOp : public framework::OperatorBase { ...@@ -31,9 +31,6 @@ class LoadOp : public framework::OperatorBase {
private: private:
void RunImpl(const framework::Scope &scope, void RunImpl(const framework::Scope &scope,
const platform::Place &place) const override { const platform::Place &place) const override {
auto *dev_ctx = platform::DeviceContextPool::Instance().Get(place);
platform::RecordEvent record_event(Type(), dev_ctx);
// FIXME(yuyang18): We save variable to local file now, but we should change // FIXME(yuyang18): We save variable to local file now, but we should change
// it to save an output stream. // it to save an output stream.
auto filename = Attr<std::string>("file_path"); auto filename = Attr<std::string>("file_path");
......
...@@ -40,8 +40,6 @@ class RecvOp : public framework::OperatorBase { ...@@ -40,8 +40,6 @@ class RecvOp : public framework::OperatorBase {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto& ctx = *pool.Get(place); auto& ctx = *pool.Get(place);
// For profiling
platform::RecordEvent record_event(Type(), &ctx);
distributed::RPCClient* rpc_client = distributed::RPCClient* rpc_client =
distributed::RPCClient::GetInstance<RPCCLIENT_T>(); distributed::RPCClient::GetInstance<RPCCLIENT_T>();
......
...@@ -39,11 +39,6 @@ class SendBarrierOp : public framework::OperatorBase { ...@@ -39,11 +39,6 @@ class SendBarrierOp : public framework::OperatorBase {
std::vector<std::string> eps = Attr<std::vector<std::string>>("endpoints"); std::vector<std::string> eps = Attr<std::vector<std::string>>("endpoints");
bool sync_mode = Attr<bool>("sync_mode"); bool sync_mode = Attr<bool>("sync_mode");
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto& ctx = *pool.Get(place);
// For profiling
platform::RecordEvent record_event(Type(), &ctx);
distributed::RPCClient* rpc_client = distributed::RPCClient* rpc_client =
distributed::RPCClient::GetInstance<RPCCLIENT_T>(); distributed::RPCClient::GetInstance<RPCCLIENT_T>();
......
...@@ -42,9 +42,6 @@ class SendOp : public framework::OperatorBase { ...@@ -42,9 +42,6 @@ class SendOp : public framework::OperatorBase {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto& ctx = *pool.Get(place); auto& ctx = *pool.Get(place);
// For profiling
platform::RecordEvent record_event(Type(), &ctx);
distributed::RPCClient* rpc_client = distributed::RPCClient* rpc_client =
distributed::RPCClient::GetInstance<RPCCLIENT_T>(); distributed::RPCClient::GetInstance<RPCCLIENT_T>();
......
...@@ -110,6 +110,8 @@ Event::Event(EventType type, std::string name, uint32_t thread_id, ...@@ -110,6 +110,8 @@ Event::Event(EventType type, std::string name, uint32_t thread_id,
has_cuda_ = dev_ctx ? platform::is_gpu_place(dev_ctx->GetPlace()) : false; has_cuda_ = dev_ctx ? platform::is_gpu_place(dev_ctx->GetPlace()) : false;
if (has_cuda_) { if (has_cuda_) {
auto* cuda_dev_ctx = static_cast<const CUDADeviceContext*>(dev_ctx); auto* cuda_dev_ctx = static_cast<const CUDADeviceContext*>(dev_ctx);
PADDLE_ENFORCE(cudaSetDevice(
boost::get<platform::CUDAPlace>(cuda_dev_ctx->GetPlace()).device));
PADDLE_ENFORCE(cudaGetDevice(&device_)); PADDLE_ENFORCE(cudaGetDevice(&device_));
PADDLE_ENFORCE(cudaEventCreate(&event_)); PADDLE_ENFORCE(cudaEventCreate(&event_));
auto stream = cuda_dev_ctx->stream(); auto stream = cuda_dev_ctx->stream();
...@@ -176,6 +178,7 @@ void PopEvent(const std::string& name, const DeviceContext* dev_ctx) { ...@@ -176,6 +178,7 @@ void PopEvent(const std::string& name, const DeviceContext* dev_ctx) {
RecordEvent::RecordEvent(const std::string& name, const DeviceContext* dev_ctx) RecordEvent::RecordEvent(const std::string& name, const DeviceContext* dev_ctx)
: is_enabled_(false), start_ns_(PosixInNsec()) { : is_enabled_(false), start_ns_(PosixInNsec()) {
std::lock_guard<std::mutex> l(profiler_mu);
if (g_state == ProfilerState::kDisabled) return; if (g_state == ProfilerState::kDisabled) return;
is_enabled_ = true; is_enabled_ = true;
dev_ctx_ = dev_ctx; dev_ctx_ = dev_ctx;
...@@ -186,6 +189,7 @@ RecordEvent::RecordEvent(const std::string& name, const DeviceContext* dev_ctx) ...@@ -186,6 +189,7 @@ RecordEvent::RecordEvent(const std::string& name, const DeviceContext* dev_ctx)
} }
RecordEvent::~RecordEvent() { RecordEvent::~RecordEvent() {
std::lock_guard<std::mutex> l(profiler_mu);
if (g_state == ProfilerState::kDisabled || !is_enabled_) return; if (g_state == ProfilerState::kDisabled || !is_enabled_) return;
DeviceTracer* tracer = GetDeviceTracer(); DeviceTracer* tracer = GetDeviceTracer();
if (tracer) { if (tracer) {
...@@ -198,6 +202,7 @@ RecordEvent::~RecordEvent() { ...@@ -198,6 +202,7 @@ RecordEvent::~RecordEvent() {
RecordBlock::RecordBlock(int block_id) RecordBlock::RecordBlock(int block_id)
: is_enabled_(false), start_ns_(PosixInNsec()) { : is_enabled_(false), start_ns_(PosixInNsec()) {
std::lock_guard<std::mutex> l(profiler_mu);
if (g_state == ProfilerState::kDisabled) return; if (g_state == ProfilerState::kDisabled) return;
is_enabled_ = true; is_enabled_ = true;
SetCurBlock(block_id); SetCurBlock(block_id);
...@@ -205,6 +210,7 @@ RecordBlock::RecordBlock(int block_id) ...@@ -205,6 +210,7 @@ RecordBlock::RecordBlock(int block_id)
} }
RecordBlock::~RecordBlock() { RecordBlock::~RecordBlock() {
std::lock_guard<std::mutex> l(profiler_mu);
if (g_state == ProfilerState::kDisabled || !is_enabled_) return; if (g_state == ProfilerState::kDisabled || !is_enabled_) return;
DeviceTracer* tracer = GetDeviceTracer(); DeviceTracer* tracer = GetDeviceTracer();
if (tracer) { if (tracer) {
......
...@@ -419,6 +419,25 @@ EOF ...@@ -419,6 +419,25 @@ EOF
linkchecker doc/v2/en/html/index.html linkchecker doc/v2/en/html/index.html
linkchecker doc/v2/cn/html/index.html linkchecker doc/v2/cn/html/index.html
linkchecker doc/v2/api/en/html/index.html linkchecker doc/v2/api/en/html/index.html
if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi;
# Deploy to the the content server if its a "develop" or "release/version" branch
# The "develop_doc" branch is reserved to test full deploy process without impacting the real content.
if [ "$TRAVIS_BRANCH" == "develop_doc" ]; then
PPO_SCRIPT_BRANCH=develop
elif [[ "$TRAVIS_BRANCH" == "develop" || "$TRAVIS_BRANCH" =~ ^v|release/[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then
PPO_SCRIPT_BRANCH=master
else
# Early exit, this branch doesn't require documentation build
return 0;
fi
# Fetch the paddlepaddle.org deploy_docs.sh from the appopriate branch
export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/$PPO_SCRIPT_BRANCH/scripts/deploy/deploy_docs.sh
export PYTHONPATH=$PYTHONPATH:${PADDLE_ROOT}/build/python:/paddle/build/python
cd ..
curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH ${PADDLE_ROOT} ${PADDLE_ROOT}/build/doc/ ${PPO_SCRIPT_BRANCH}
cd -
} }
function gen_html() { function gen_html() {
......
...@@ -52,6 +52,9 @@ EOL ...@@ -52,6 +52,9 @@ EOL
${DOCKER_CMD} run -it \ ${DOCKER_CMD} run -it \
${DOCKER_ENV} \ ${DOCKER_ENV} \
-e SCRIPT_NAME=$0 \ -e SCRIPT_NAME=$0 \
-e CONTENT_DEC_PASSWD=$CONTENT_DEC_PASSWD \
-e TRAVIS_BRANCH=$TRAVIS_BRANCH \
-e TRAVIS_PULL_REQUEST=$TRAVIS_PULL_REQUEST \
-v $PADDLE_ROOT:/paddle \ -v $PADDLE_ROOT:/paddle \
-v ${HOME}/.ccache:/root/.ccache \ -v ${HOME}/.ccache:/root/.ccache \
-w /paddle \ -w /paddle \
......
...@@ -572,8 +572,6 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, ...@@ -572,8 +572,6 @@ def append_backward(loss, parameter_list=None, no_grad_set=None,
program.current_block_idx = current_block_idx program.current_block_idx = current_block_idx
program._sync_with_cpp() program._sync_with_cpp()
# FIXME(zcd): prevent loss.grad optimized by mem_opt.
loss.block.var(_append_grad_suffix_(loss.name)).persistable = True
if parameter_list is not None: if parameter_list is not None:
parameters = parameter_list parameters = parameter_list
......
...@@ -1038,7 +1038,26 @@ class Block(object): ...@@ -1038,7 +1038,26 @@ class Block(object):
global_block = self.program.global_block() global_block = self.program.global_block()
param = Parameter(global_block, *args, **kwargs) param = Parameter(global_block, *args, **kwargs)
if 'initializer' in kwargs: if 'initializer' in kwargs:
kwargs['initializer'](param, self)
def _is_inited_by(block, var):
init_ops = []
for op in block.ops:
if var.name in op.output_arg_names:
init_ops.append(op)
return init_ops
initializer = kwargs['initializer']
init_ops = _is_inited_by(global_block, param)
init_ops_len = len(init_ops)
if init_ops_len > 1:
raise RuntimeError("param " + param.name +
" is inited by multiple init ops " + str(
init_ops))
elif init_ops_len == 1:
#TODO already inited, do nothing, should log a warning
pass
else:
initializer(param, self)
return param return param
def append_op(self, *args, **kwargs): def append_op(self, *args, **kwargs):
......
...@@ -949,6 +949,10 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None): ...@@ -949,6 +949,10 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None):
helper = LayerHelper('dropout', **locals()) helper = LayerHelper('dropout', **locals())
out = helper.create_tmp_variable(dtype=x.dtype) out = helper.create_tmp_variable(dtype=x.dtype)
mask = helper.create_tmp_variable(dtype=x.dtype, stop_gradient=True) mask = helper.create_tmp_variable(dtype=x.dtype, stop_gradient=True)
if (seed is None or seed == 0) and helper.main_program.random_seed != 0:
seed = helper.main_program.random_seed
helper.append_op( helper.append_op(
type='dropout', type='dropout',
inputs={'X': [x]}, inputs={'X': [x]},
......
...@@ -73,9 +73,18 @@ class TranspilerTest(unittest.TestCase): ...@@ -73,9 +73,18 @@ class TranspilerTest(unittest.TestCase):
return self.transpiler return self.transpiler
def transpiler_test_impl(self):
pass
class TestBasicModel(TranspilerTest):
def test_transpiler(self): def test_transpiler(self):
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
self.transpiler_test_impl()
class TestBasicModel(TranspilerTest):
def transpiler_test_impl(self):
pserver, startup = self.get_pserver(self.pserver1_ep) pserver, startup = self.get_pserver(self.pserver1_ep)
pserver2, startup2 = self.get_pserver(self.pserver2_ep) pserver2, startup2 = self.get_pserver(self.pserver2_ep)
...@@ -123,7 +132,7 @@ class TestBasicModel(TranspilerTest): ...@@ -123,7 +132,7 @@ class TestBasicModel(TranspilerTest):
class TestBasicModelWithLargeBlockSize(TranspilerTest): class TestBasicModelWithLargeBlockSize(TranspilerTest):
def test_transpiler(self): def transpiler_test_impl(self):
config = fluid.DistributeTranspilerConfig() config = fluid.DistributeTranspilerConfig()
config.min_block_size = 1048576 config.min_block_size = 1048576
...@@ -148,7 +157,7 @@ class TestBasicModelWithLargeBlockSize(TranspilerTest): ...@@ -148,7 +157,7 @@ class TestBasicModelWithLargeBlockSize(TranspilerTest):
["sum", "scale", "sgd"]) ["sum", "scale", "sgd"])
# confirm startup program # confirm startup program
self.assertEqual([op.type for op in startup.global_block().ops], self.assertEqual([op.type for op in startup.global_block().ops],
["fill_constant", "fill_constant", "fill_constant"]) ["fill_constant", "fill_constant"])
# the variable #fc_w will be split into two blocks # the variable #fc_w will be split into two blocks
fc_w_var = startup2.global_block().var("fc_w") fc_w_var = startup2.global_block().var("fc_w")
self.assertEqual(fc_w_var.shape, (1000L, 1000L)) self.assertEqual(fc_w_var.shape, (1000L, 1000L))
...@@ -177,7 +186,7 @@ class TestNoSliceVar(TranspilerTest): ...@@ -177,7 +186,7 @@ class TestNoSliceVar(TranspilerTest):
def setUp(self): def setUp(self):
super(TestNoSliceVar, self).setUp() super(TestNoSliceVar, self).setUp()
def test_transpiler(self): def transpiler_test_impl(self):
config = fluid.DistributeTranspilerConfig() config = fluid.DistributeTranspilerConfig()
config.slice_var_up = False config.slice_var_up = False
...@@ -212,7 +221,7 @@ class TestLRDecay(TranspilerTest): ...@@ -212,7 +221,7 @@ class TestLRDecay(TranspilerTest):
sgd_optimizer.minimize(avg_cost) sgd_optimizer.minimize(avg_cost)
return return
def test_transpiler(self): def transpiler_test_impl(self):
pserver, startup = self.get_pserver(self.pserver1_ep) pserver, startup = self.get_pserver(self.pserver1_ep)
trainer = self.get_trainer() trainer = self.get_trainer()
...@@ -242,7 +251,7 @@ class TestLRDecayConditional(TranspilerTest): ...@@ -242,7 +251,7 @@ class TestLRDecayConditional(TranspilerTest):
sgd_optimizer.minimize(avg_cost) sgd_optimizer.minimize(avg_cost)
return return
def test_transpiler(self): def transpiler_test_impl(self):
pserver, startup = self.get_pserver(self.pserver1_ep) pserver, startup = self.get_pserver(self.pserver1_ep)
trainer = self.get_trainer() trainer = self.get_trainer()
...@@ -291,7 +300,7 @@ class TestL2Decay(TranspilerTest): ...@@ -291,7 +300,7 @@ class TestL2Decay(TranspilerTest):
sgd_optimizer.minimize(avg_cost) sgd_optimizer.minimize(avg_cost)
return return
def test_transpiler(self): def transpiler_test_impl(self):
pserver, startup = self.get_pserver(self.pserver1_ep) pserver, startup = self.get_pserver(self.pserver1_ep)
trainer = self.get_trainer() trainer = self.get_trainer()
...@@ -326,7 +335,7 @@ class TestL2DecayWithPiecewise(TranspilerTest): ...@@ -326,7 +335,7 @@ class TestL2DecayWithPiecewise(TranspilerTest):
sgd_optimizer.minimize(avg_cost) sgd_optimizer.minimize(avg_cost)
return return
def test_transpiler(self): def transpiler_test_impl(self):
pserver, startup = self.get_pserver(self.pserver1_ep) pserver, startup = self.get_pserver(self.pserver1_ep)
trainer = self.get_trainer() trainer = self.get_trainer()
......
...@@ -27,6 +27,7 @@ class TestConstantInitializer(unittest.TestCase): ...@@ -27,6 +27,7 @@ class TestConstantInitializer(unittest.TestCase):
""" """
program = framework.Program() program = framework.Program()
block = program.global_block() block = program.global_block()
for _ in range(2):
block.create_parameter( block.create_parameter(
dtype="float32", dtype="float32",
shape=[5, 10], shape=[5, 10],
...@@ -43,6 +44,7 @@ class TestConstantInitializer(unittest.TestCase): ...@@ -43,6 +44,7 @@ class TestConstantInitializer(unittest.TestCase):
""" """
program = framework.Program() program = framework.Program()
block = program.global_block() block = program.global_block()
for _ in range(2):
block.create_parameter( block.create_parameter(
dtype="float32", dtype="float32",
shape=[5, 10], shape=[5, 10],
...@@ -61,6 +63,7 @@ class TestUniformInitializer(unittest.TestCase): ...@@ -61,6 +63,7 @@ class TestUniformInitializer(unittest.TestCase):
""" """
program = framework.Program() program = framework.Program()
block = program.global_block() block = program.global_block()
for _ in range(2):
block.create_parameter( block.create_parameter(
dtype="float32", dtype="float32",
shape=[5, 10], shape=[5, 10],
...@@ -80,17 +83,18 @@ class TestUniformInitializer(unittest.TestCase): ...@@ -80,17 +83,18 @@ class TestUniformInitializer(unittest.TestCase):
program = framework.Program() program = framework.Program()
program.random_seed = 123 program.random_seed = 123
block = program.global_block() block = program.global_block()
for _ in range(2):
block.create_parameter( block.create_parameter(
dtype="float32", dtype="float32",
shape=[5, 10], shape=[5, 10],
lod_level=0, lod_level=0,
name="param", name="param1",
initializer=initializer.UniformInitializer()) initializer=initializer.UniformInitializer())
block.create_parameter( block.create_parameter(
dtype="float32", dtype="float32",
shape=[5, 10], shape=[5, 10],
lod_level=0, lod_level=0,
name="param", name="param2",
initializer=initializer.UniformInitializer(seed=456)) initializer=initializer.UniformInitializer(seed=456))
init_op = block.ops[1] init_op = block.ops[1]
self.assertEqual(init_op.attr("seed"), 123) self.assertEqual(init_op.attr("seed"), 123)
...@@ -102,6 +106,7 @@ class TestUniformInitializer(unittest.TestCase): ...@@ -102,6 +106,7 @@ class TestUniformInitializer(unittest.TestCase):
""" """
program = framework.Program() program = framework.Program()
block = program.global_block() block = program.global_block()
for _ in range(2):
block.create_parameter( block.create_parameter(
dtype="float32", dtype="float32",
shape=[5, 10], shape=[5, 10],
...@@ -115,6 +120,25 @@ class TestUniformInitializer(unittest.TestCase): ...@@ -115,6 +120,25 @@ class TestUniformInitializer(unittest.TestCase):
self.assertAlmostEqual(init_op.attr('max'), 3.1, delta=DELTA) self.assertAlmostEqual(init_op.attr('max'), 3.1, delta=DELTA)
self.assertEqual(init_op.attr('seed'), 123) self.assertEqual(init_op.attr('seed'), 123)
def test_uniform_initializer_two_op(self):
"""Test uniform initializer with supplied attributes
"""
program = framework.Program()
block = program.global_block()
for i in range(2):
block.create_parameter(
dtype="float32",
shape=[5, 10],
lod_level=0,
name="param",
initializer=initializer.UniformInitializer(-4.2, float(i), 123))
self.assertEqual(len(block.ops), 1)
init_op0 = block.ops[0]
self.assertEqual(init_op0.type, 'uniform_random')
self.assertAlmostEqual(init_op0.attr('min'), -4.2, delta=DELTA)
self.assertAlmostEqual(init_op0.attr('max'), 0.0, delta=DELTA)
self.assertEqual(init_op0.attr('seed'), 123)
class TestNormalInitializer(unittest.TestCase): class TestNormalInitializer(unittest.TestCase):
def test_normal_initializer_default_value(self): def test_normal_initializer_default_value(self):
...@@ -122,6 +146,7 @@ class TestNormalInitializer(unittest.TestCase): ...@@ -122,6 +146,7 @@ class TestNormalInitializer(unittest.TestCase):
""" """
program = framework.Program() program = framework.Program()
block = program.global_block() block = program.global_block()
for _ in range(2):
block.create_parameter( block.create_parameter(
dtype="float32", dtype="float32",
shape=[5, 10], shape=[5, 10],
...@@ -140,6 +165,7 @@ class TestNormalInitializer(unittest.TestCase): ...@@ -140,6 +165,7 @@ class TestNormalInitializer(unittest.TestCase):
""" """
program = framework.Program() program = framework.Program()
block = program.global_block() block = program.global_block()
for _ in range(2):
block.create_parameter( block.create_parameter(
dtype="float32", dtype="float32",
shape=[5, 10], shape=[5, 10],
...@@ -161,6 +187,7 @@ class TestXavierInitializer(unittest.TestCase): ...@@ -161,6 +187,7 @@ class TestXavierInitializer(unittest.TestCase):
""" """
program = framework.Program() program = framework.Program()
block = program.global_block() block = program.global_block()
for _ in range(2):
param = block.create_parameter( param = block.create_parameter(
dtype="float32", dtype="float32",
shape=[5, 10], shape=[5, 10],
...@@ -181,6 +208,7 @@ class TestXavierInitializer(unittest.TestCase): ...@@ -181,6 +208,7 @@ class TestXavierInitializer(unittest.TestCase):
""" """
program = framework.Program() program = framework.Program()
block = program.global_block() block = program.global_block()
for _ in range(2):
param = block.create_parameter( param = block.create_parameter(
dtype="float32", dtype="float32",
shape=[5, 10, 15, 20], shape=[5, 10, 15, 20],
...@@ -203,6 +231,7 @@ class TestXavierInitializer(unittest.TestCase): ...@@ -203,6 +231,7 @@ class TestXavierInitializer(unittest.TestCase):
""" """
program = framework.Program() program = framework.Program()
block = program.global_block() block = program.global_block()
for _ in range(2):
param = block.create_parameter( param = block.create_parameter(
dtype="float32", dtype="float32",
shape=[5, 10], shape=[5, 10],
...@@ -223,6 +252,7 @@ class TestXavierInitializer(unittest.TestCase): ...@@ -223,6 +252,7 @@ class TestXavierInitializer(unittest.TestCase):
""" """
program = framework.Program() program = framework.Program()
block = program.global_block() block = program.global_block()
for _ in range(2):
param = block.create_parameter( param = block.create_parameter(
dtype="float32", dtype="float32",
shape=[5, 10, 15, 20], shape=[5, 10, 15, 20],
...@@ -244,6 +274,7 @@ class TestXavierInitializer(unittest.TestCase): ...@@ -244,6 +274,7 @@ class TestXavierInitializer(unittest.TestCase):
""" """
program = framework.Program() program = framework.Program()
block = program.global_block() block = program.global_block()
for _ in range(2):
block.create_parameter( block.create_parameter(
dtype="float32", dtype="float32",
shape=[5, 10], shape=[5, 10],
...@@ -267,6 +298,7 @@ class TestMSRAInitializer(unittest.TestCase): ...@@ -267,6 +298,7 @@ class TestMSRAInitializer(unittest.TestCase):
""" """
program = framework.Program() program = framework.Program()
block = program.global_block() block = program.global_block()
for _ in range(2):
param = block.create_parameter( param = block.create_parameter(
dtype="float32", dtype="float32",
shape=[5, 10], shape=[5, 10],
...@@ -287,6 +319,7 @@ class TestMSRAInitializer(unittest.TestCase): ...@@ -287,6 +319,7 @@ class TestMSRAInitializer(unittest.TestCase):
""" """
program = framework.Program() program = framework.Program()
block = program.global_block() block = program.global_block()
for _ in range(2):
param = block.create_parameter( param = block.create_parameter(
dtype="float32", dtype="float32",
shape=[5, 10, 15, 20], shape=[5, 10, 15, 20],
...@@ -308,6 +341,7 @@ class TestMSRAInitializer(unittest.TestCase): ...@@ -308,6 +341,7 @@ class TestMSRAInitializer(unittest.TestCase):
""" """
program = framework.Program() program = framework.Program()
block = program.global_block() block = program.global_block()
for _ in range(2):
param = block.create_parameter( param = block.create_parameter(
dtype="float32", dtype="float32",
shape=[5, 10], shape=[5, 10],
...@@ -328,6 +362,7 @@ class TestMSRAInitializer(unittest.TestCase): ...@@ -328,6 +362,7 @@ class TestMSRAInitializer(unittest.TestCase):
""" """
program = framework.Program() program = framework.Program()
block = program.global_block() block = program.global_block()
for _ in range(2):
param = block.create_parameter( param = block.create_parameter(
dtype="float32", dtype="float32",
shape=[5, 10, 15, 20], shape=[5, 10, 15, 20],
...@@ -348,6 +383,7 @@ class TestMSRAInitializer(unittest.TestCase): ...@@ -348,6 +383,7 @@ class TestMSRAInitializer(unittest.TestCase):
""" """
program = framework.Program() program = framework.Program()
block = program.global_block() block = program.global_block()
for _ in range(2):
block.create_parameter( block.create_parameter(
dtype="float32", dtype="float32",
shape=[5, 10], shape=[5, 10],
...@@ -370,6 +406,7 @@ class TestMSRAInitializer(unittest.TestCase): ...@@ -370,6 +406,7 @@ class TestMSRAInitializer(unittest.TestCase):
""" """
program = framework.Program() program = framework.Program()
block = program.global_block() block = program.global_block()
for _ in range(2):
block.create_parameter( block.create_parameter(
dtype="float32", dtype="float32",
shape=[8, 1, 3, 3], shape=[8, 1, 3, 3],
......
...@@ -98,16 +98,13 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -98,16 +98,13 @@ class TestMNIST(TestParallelExecutorBase):
fluid.recordio_writer.convert_reader_to_recordio_file( fluid.recordio_writer.convert_reader_to_recordio_file(
MNIST_RECORDIO_FILE, reader, feeder) MNIST_RECORDIO_FILE, reader, feeder)
def _init_data(self, random=True): def _init_data(self):
np.random.seed(5) np.random.seed(5)
if random:
img = np.random.random(size=[32, 784]).astype(np.float32) img = np.random.random(size=[32, 784]).astype(np.float32)
else:
img = np.ones(shape=[32, 784], dtype='float32')
label = np.ones(shape=[32, 1], dtype='int64') label = np.ones(shape=[32, 1], dtype='int64')
return img, label return img, label
def _compare_reduce_and_allreduce(self, model, use_cuda, random_data=True): def _compare_reduce_and_allreduce(self, model, use_cuda):
if use_cuda and not core.is_compiled_with_cuda(): if use_cuda and not core.is_compiled_with_cuda():
return return
self.check_network_convergence( self.check_network_convergence(
...@@ -115,7 +112,7 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -115,7 +112,7 @@ class TestMNIST(TestParallelExecutorBase):
self.check_network_convergence( self.check_network_convergence(
model, use_cuda=use_cuda, allow_op_delay=True, use_reduce=True) model, use_cuda=use_cuda, allow_op_delay=True, use_reduce=True)
img, label = self._init_data(random_data) img, label = self._init_data()
all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence( all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence(
model, model,
...@@ -166,27 +163,27 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -166,27 +163,27 @@ class TestMNIST(TestParallelExecutorBase):
if use_cuda and not core.is_compiled_with_cuda(): if use_cuda and not core.is_compiled_with_cuda():
return return
img, label = self._init_data(random=False) img, label = self._init_data()
single_first_loss, single_last_loss = self.check_network_convergence( single_first_loss, single_last_loss = self.check_network_convergence(
method=simple_fc_net, method=simple_fc_net,
seed=1000, seed=1,
feed_dict={"image": img, feed_dict={"image": img,
"label": label}, "label": label},
use_cuda=use_cuda, use_cuda=use_cuda,
use_parallel_executor=False) use_parallel_executor=False)
parallel_first_loss, parallel_last_loss = self.check_network_convergence( parallel_first_loss, parallel_last_loss = self.check_network_convergence(
method=simple_fc_net, method=simple_fc_net,
seed=1000, seed=1,
feed_dict={"image": img, feed_dict={"image": img,
"label": label}, "label": label},
use_cuda=use_cuda, use_cuda=use_cuda,
use_parallel_executor=True) use_parallel_executor=True)
for p_f in parallel_first_loss: self.assertAlmostEquals(
self.assertAlmostEquals(p_f, single_first_loss[0], delta=1e-6) np.mean(parallel_first_loss), single_first_loss, delta=1e-6)
for p_l in parallel_last_loss: self.assertAlmostEquals(
self.assertAlmostEquals(p_l, single_last_loss[0], delta=1e-6) np.mean(parallel_last_loss), single_last_loss, delta=1e-6)
def test_simple_fc_parallel_accuracy(self): def test_simple_fc_parallel_accuracy(self):
self.check_simple_fc_parallel_accuracy(True) self.check_simple_fc_parallel_accuracy(True)
......
...@@ -21,6 +21,19 @@ from parallel_executor_test_base import TestParallelExecutorBase ...@@ -21,6 +21,19 @@ from parallel_executor_test_base import TestParallelExecutorBase
import unittest import unittest
import math import math
import os import os
import numpy as np
# FIXME(zcd): If the neural net has dropout_op, the output of ParallelExecutor
# and Executor is different. Because, for ParallelExecutor, the dropout_op of
# the neural net will be copied N copies(N is the number of device). This will
# lead to the random numbers generated by ParallelExecutor and Executor are different.
# So, if we compare the loss of ParallelExecutor and Executor, we should remove the
# dropout_op.
remove_dropout = False
# FIXME(zcd): If the neural net has batch_norm, the output of ParallelExecutor
# and Executor is different.
remove_bn = False
def squeeze_excitation(input, num_channels, reduction_ratio): def squeeze_excitation(input, num_channels, reduction_ratio):
...@@ -53,7 +66,8 @@ def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1, ...@@ -53,7 +66,8 @@ def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1,
groups=groups, groups=groups,
act=None, act=None,
bias_attr=False) bias_attr=False)
return fluid.layers.batch_norm(input=conv, act=act, momentum=0.1) return conv if remove_bn else fluid.layers.batch_norm(
input=conv, act=act, momentum=0.1)
def shortcut(input, ch_out, stride): def shortcut(input, ch_out, stride):
...@@ -92,13 +106,14 @@ def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio): ...@@ -92,13 +106,14 @@ def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio):
return fluid.layers.elementwise_add(x=short, y=scale, act='relu') return fluid.layers.elementwise_add(x=short, y=scale, act='relu')
def SE_ResNeXt50Small(batch_size=2, use_feed=False): batch_size = 12
assert not use_feed, "SE_ResNeXt doesn't support feed yet" img_shape = [3, 224, 224]
img = fluid.layers.fill_constant( def SE_ResNeXt50Small(use_feed):
shape=[batch_size, 3, 224, 224], dtype='float32', value=0.0)
label = fluid.layers.fill_constant( img = fluid.layers.data(name='image', shape=img_shape, dtype='float32')
shape=[batch_size, 1], dtype='int64', value=0.0) label = fluid.layers.data(name='label', shape=[1], dtype='int64')
conv = conv_bn_layer( conv = conv_bn_layer(
input=img, num_filters=16, filter_size=3, stride=2, act='relu') input=img, num_filters=16, filter_size=3, stride=2, act='relu')
...@@ -127,7 +142,8 @@ def SE_ResNeXt50Small(batch_size=2, use_feed=False): ...@@ -127,7 +142,8 @@ def SE_ResNeXt50Small(batch_size=2, use_feed=False):
reshape = fluid.layers.reshape( reshape = fluid.layers.reshape(
x=conv, shape=[-1, shape[1], shape[2] * shape[3]]) x=conv, shape=[-1, shape[1], shape[2] * shape[3]])
pool = fluid.layers.reduce_mean(input=reshape, dim=2) pool = fluid.layers.reduce_mean(input=reshape, dim=2)
dropout = fluid.layers.dropout(x=pool, dropout_prob=0.2) dropout = pool if remove_dropout else fluid.layers.dropout(
x=pool, dropout_prob=0.2, seed=1)
# Classifier layer: # Classifier layer:
prediction = fluid.layers.fc(input=dropout, size=1000, act='softmax') prediction = fluid.layers.fc(input=dropout, size=1000, act='softmax')
loss = fluid.layers.cross_entropy(input=prediction, label=label) loss = fluid.layers.cross_entropy(input=prediction, label=label)
...@@ -135,18 +151,7 @@ def SE_ResNeXt50Small(batch_size=2, use_feed=False): ...@@ -135,18 +151,7 @@ def SE_ResNeXt50Small(batch_size=2, use_feed=False):
return loss return loss
class TestResnet(TestParallelExecutorBase): def cosine_decay(learning_rate, step_each_epoch, epochs=120):
def check_resnet_convergence_with_learning_rate_decay(self,
use_cuda=True,
use_reduce=False,
iter=20):
if use_cuda and not core.is_compiled_with_cuda():
return
os.environ['CPU_NUM'] = str(4)
def _cosine_decay(learning_rate, step_each_epoch, epochs=120):
""" """
Applies cosine decay to the learning rate. Applies cosine decay to the learning rate.
lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1) lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
...@@ -159,51 +164,122 @@ class TestResnet(TestParallelExecutorBase): ...@@ -159,51 +164,122 @@ class TestResnet(TestParallelExecutorBase):
(ops.cos(epoch * (math.pi / epochs)) + 1)/2 (ops.cos(epoch * (math.pi / epochs)) + 1)/2
return decayed_lr return decayed_lr
def _optimizer(learning_rate=0.01):
def optimizer(learning_rate=0.01):
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
learning_rate=_cosine_decay( learning_rate=cosine_decay(
learning_rate=learning_rate, step_each_epoch=2, epochs=1), learning_rate=learning_rate, step_each_epoch=2, epochs=1),
momentum=0.9, momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4)) regularization=fluid.regularizer.L2Decay(1e-4))
return optimizer return optimizer
import functools
batch_size = 2 class TestResnet(TestParallelExecutorBase):
@classmethod
def setUpClass(cls):
os.environ['CPU_NUM'] = str(4)
global remove_dropout
global remove_bn
remove_dropout = False
remove_bn = False
def _init_data(self, batch_size=2, random=True):
np.random.seed(5)
if random:
img = np.random.random(
size=[batch_size] + img_shape).astype(np.float32)
else:
img = np.ones(shape=[batch_size] + img_shape, dtype='float32')
label = [np.random.randint(0, 999) for _ in range(batch_size)]
label = np.array(label).astype(np.int64).reshape(-1, 1)
return img, label
def _compare_reduce_and_allreduce(self,
model,
use_cuda,
iter=20,
delta2=1e-4):
if use_cuda and not core.is_compiled_with_cuda():
return
global remove_bn
remove_bn = True
img, label = self._init_data(batch_size=batch_size)
all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence(
model,
feed_dict={"image": img,
"label": label},
iter=iter,
batch_size=batch_size,
use_cuda=use_cuda,
use_reduce=False,
optimizer=optimizer)
reduce_first_loss, reduce_last_loss = self.check_network_convergence(
model,
feed_dict={"image": img,
"label": label},
iter=iter,
batch_size=batch_size,
use_cuda=use_cuda,
use_reduce=True,
optimizer=optimizer)
for loss in zip(all_reduce_first_loss, reduce_first_loss):
self.assertAlmostEquals(loss[0], loss[1], delta=1e-6)
for loss in zip(all_reduce_last_loss, reduce_last_loss):
self.assertAlmostEquals(loss[0], loss[1], delta=delta2)
def _check_resnet_convergence(self,
model,
use_cuda=True,
use_reduce=False,
iter=20,
delta2=1e-6):
if use_cuda and not core.is_compiled_with_cuda():
return
global remove_dropout
global remove_bn
remove_dropout = True
remove_bn = True
img, label = self._init_data(batch_size=batch_size)
single_first_loss, single_last_loss = self.check_network_convergence( single_first_loss, single_last_loss = self.check_network_convergence(
functools.partial( model,
SE_ResNeXt50Small, batch_size=batch_size), feed_dict={"image": img,
"label": label},
iter=iter, iter=iter,
batch_size=batch_size, batch_size=batch_size,
use_cuda=use_cuda, use_cuda=use_cuda,
use_reduce=use_reduce, use_reduce=use_reduce,
optimizer=_optimizer, optimizer=optimizer,
use_parallel_executor=False) use_parallel_executor=False)
parallel_first_loss, parallel_last_loss = self.check_network_convergence( parallel_first_loss, parallel_last_loss = self.check_network_convergence(
functools.partial( model,
SE_ResNeXt50Small, batch_size=batch_size), feed_dict={"image": img,
"label": label},
iter=iter, iter=iter,
batch_size=batch_size, batch_size=batch_size,
use_cuda=use_cuda, use_cuda=use_cuda,
use_reduce=use_reduce, use_reduce=use_reduce,
optimizer=_optimizer) optimizer=optimizer)
for p_f in parallel_first_loss: self.assertAlmostEquals(
self.assertAlmostEquals(p_f, single_first_loss[0], delta=1e-6) np.mean(parallel_first_loss), single_first_loss[0], delta=1e-6)
for p_l in parallel_last_loss: self.assertAlmostEquals(
self.assertAlmostEquals(p_l, single_last_loss[0], delta=1e-6) np.mean(parallel_last_loss), single_last_loss[0], delta=delta2)
def test_seresnext_with_learning_rate_decay(self): def test_seresnext_with_learning_rate_decay(self):
self.check_resnet_convergence_with_learning_rate_decay(True, False) self._check_resnet_convergence(model=SE_ResNeXt50Small, use_cuda=True)
self.check_resnet_convergence_with_learning_rate_decay( self._check_resnet_convergence(
False, False, iter=5) model=SE_ResNeXt50Small, use_cuda=False, iter=2, delta2=1e-3)
def test_seresnext_with_new_strategy_with_learning_rate_decay(self): def test_seresnext_with_new_strategy(self):
self.check_resnet_convergence_with_learning_rate_decay(True, True) # self._compare_reduce_and_allreduce(
self.check_resnet_convergence_with_learning_rate_decay( # model=SE_ResNeXt50Small, use_cuda=True)
False, True, iter=5) self._compare_reduce_and_allreduce(
model=SE_ResNeXt50Small, use_cuda=False, iter=5, delta2=1e-2)
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册