未验证 提交 be8c82cc 编写于 作者: C chengduo 提交者: GitHub

[Cherry pick]Update CPU_NUM config (#18110)

* update CPU_NUM config
test=develop
上级 751497db
...@@ -31,7 +31,7 @@ paddle.fluid.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'pr ...@@ -31,7 +31,7 @@ paddle.fluid.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'pr
paddle.fluid.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd38c5b8b2b2e0bb19bcf1b581a80a7e4')) paddle.fluid.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd38c5b8b2b2e0bb19bcf1b581a80a7e4'))
paddle.fluid.DistributeTranspilerConfig.__init__ paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.ParallelExecutor.__init__ (ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.ParallelExecutor.__init__ (ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.ParallelExecutor.drop_local_exe_scopes (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '80d857dc626612e2b2460d0154551e95')) paddle.fluid.ParallelExecutor.drop_local_exe_scopes (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '77c739744ea5708b80fb1b37cc89db40'))
paddle.fluid.ParallelExecutor.run (ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '33ce6ec50f8eeb05d340e6b114b026fd')) paddle.fluid.ParallelExecutor.run (ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '33ce6ec50f8eeb05d340e6b114b026fd'))
paddle.fluid.create_lod_tensor (ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None), ('document', 'b82ea20e2dc5ff2372e0643169ca47ff')) paddle.fluid.create_lod_tensor (ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None), ('document', 'b82ea20e2dc5ff2372e0643169ca47ff'))
paddle.fluid.create_random_int_lodtensor (ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None), ('document', '74dc6d23185d90a7a50fbac19f5b65fb')) paddle.fluid.create_random_int_lodtensor (ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None), ('document', '74dc6d23185d90a7a50fbac19f5b65fb'))
...@@ -311,7 +311,7 @@ paddle.fluid.layers.StaticRNN.step_input (ArgSpec(args=['self', 'x'], varargs=No ...@@ -311,7 +311,7 @@ paddle.fluid.layers.StaticRNN.step_input (ArgSpec(args=['self', 'x'], varargs=No
paddle.fluid.layers.StaticRNN.step_output (ArgSpec(args=['self', 'o'], varargs=None, keywords=None, defaults=None), ('document', '252890d4c3199a7623ab8667e13fd837')) paddle.fluid.layers.StaticRNN.step_output (ArgSpec(args=['self', 'o'], varargs=None, keywords=None, defaults=None), ('document', '252890d4c3199a7623ab8667e13fd837'))
paddle.fluid.layers.StaticRNN.update_memory (ArgSpec(args=['self', 'mem', 'var'], varargs=None, keywords=None, defaults=None), ('document', '7a0000520f179f35239956a5ba55119f')) paddle.fluid.layers.StaticRNN.update_memory (ArgSpec(args=['self', 'mem', 'var'], varargs=None, keywords=None, defaults=None), ('document', '7a0000520f179f35239956a5ba55119f'))
paddle.fluid.layers.reorder_lod_tensor_by_rank (ArgSpec(args=['x', 'rank_table'], varargs=None, keywords=None, defaults=None), ('document', '5b552a1f0f7eb4dacb768a975ba15d08')) paddle.fluid.layers.reorder_lod_tensor_by_rank (ArgSpec(args=['x', 'rank_table'], varargs=None, keywords=None, defaults=None), ('document', '5b552a1f0f7eb4dacb768a975ba15d08'))
paddle.fluid.layers.Print (ArgSpec(args=['input', 'first_n', 'message', 'summarize', 'print_tensor_name', 'print_tensor_type', 'print_tensor_shape', 'print_tensor_lod', 'print_phase'], varargs=None, keywords=None, defaults=(-1, None, -1, True, True, True, True, 'both')), ('document', 'a222dbad457441941e50b812e5af9c7e')) paddle.fluid.layers.Print (ArgSpec(args=['input', 'first_n', 'message', 'summarize', 'print_tensor_name', 'print_tensor_type', 'print_tensor_shape', 'print_tensor_lod', 'print_phase'], varargs=None, keywords=None, defaults=(-1, None, -1, True, True, True, True, 'both')), ('document', 'ee6c70867d317b0a87094ed23546215f'))
paddle.fluid.layers.is_empty (ArgSpec(args=['x', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '3011dc695f490afdf504dc24f628319a')) paddle.fluid.layers.is_empty (ArgSpec(args=['x', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '3011dc695f490afdf504dc24f628319a'))
paddle.fluid.layers.sigmoid (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'a4e395ab004e7da34e94a0a1f9eee183')) paddle.fluid.layers.sigmoid (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'a4e395ab004e7da34e94a0a1f9eee183'))
paddle.fluid.layers.logsigmoid (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5f2508c52e0a797bb9bd5e29d79ede78')) paddle.fluid.layers.logsigmoid (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5f2508c52e0a797bb9bd5e29d79ede78'))
......
...@@ -71,6 +71,7 @@ void ScopeBufferedSSAGraphExecutor::DropLocalExeScopes() { ...@@ -71,6 +71,7 @@ void ScopeBufferedSSAGraphExecutor::DropLocalExeScopes() {
if (local_scope_var != nullptr) { if (local_scope_var != nullptr) {
auto &local_scope = *local_scope_var->GetMutable<Scope *>(); auto &local_scope = *local_scope_var->GetMutable<Scope *>();
scope->DeleteScope(local_scope); scope->DeleteScope(local_scope);
scope->EraseVars({std::string(details::kLocalExecScopeName)});
VLOG(3) << "Drop local execution scope: " << local_scope; VLOG(3) << "Drop local execution scope: " << local_scope;
} }
} }
......
...@@ -325,6 +325,12 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places, ...@@ -325,6 +325,12 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
"the number of places must be greater than 1."); "the number of places must be greater than 1.");
} }
LOG(WARNING) << string::Sprintf(
"The number of %s, which is used in ParallelExecutor, is %lu. And "
"the Program will be copied %lu copies",
(member_->use_cuda_ ? "CUDAPlace" : "CPUPlace"), places.size(),
places.size());
// Step 1. Bcast the bcast_vars to devs. // Step 1. Bcast the bcast_vars to devs.
// Create local scopes // Create local scopes
if (local_scopes.empty()) { if (local_scopes.empty()) {
......
...@@ -135,33 +135,34 @@ struct Formater { ...@@ -135,33 +135,34 @@ struct Formater {
}; };
// TODO(ChunweiYan) there should be some other printers for TensorArray // TODO(ChunweiYan) there should be some other printers for TensorArray
class TensorPrintOp : public framework::OperatorBase { class PrintOp : public framework::OperatorBase {
public: public:
TensorPrintOp(const std::string &type, PrintOp(const std::string &type, const framework::VariableNameMap &inputs,
const framework::VariableNameMap &inputs, const framework::VariableNameMap &outputs,
const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs)
const framework::AttributeMap &attrs)
: OperatorBase(type, inputs, outputs, attrs) {} : OperatorBase(type, inputs, outputs, attrs) {}
TensorPrintOp(const TensorPrintOp &o)
: framework::OperatorBase(
static_cast<const framework::OperatorBase &>(o)) {
PADDLE_THROW("Not implemented.");
}
private: private:
void RunImpl(const framework::Scope &scope, void RunImpl(const framework::Scope &scope,
const platform::Place &place) const override { const platform::Place &place) const override {
const framework::Variable *in_var_ptr = nullptr; const auto in_var = scope.FindVar(Input("In"));
std::string printed_var_name = ""; auto out_var = scope.FindVar(Output("Out"));
PADDLE_ENFORCE_NOT_NULL(in_var, "The input should not be found in scope",
in_var_ptr = scope.FindVar(Input("In")); Input("In"));
printed_var_name = Inputs("In").front(); PADDLE_ENFORCE_NOT_NULL(out_var, "The output should not be found in scope",
Output("Out"));
PADDLE_ENFORCE_NOT_NULL(in_var_ptr); auto &in_tensor = in_var->Get<framework::LoDTensor>();
framework::LoDTensor *out_tensor =
auto &in_tensor = in_var_ptr->Get<framework::LoDTensor>(); out_var->GetMutable<framework::LoDTensor>();
PrintValue(place, Inputs("In").front(), in_tensor);
framework::TensorCopy(in_tensor, place, out_tensor);
out_tensor->set_lod(in_tensor.lod());
}
void PrintValue(const platform::Place &place,
const std::string &printed_var_name,
const framework::LoDTensor &in_tensor) const {
std::string print_phase = Attr<std::string>("print_phase"); std::string print_phase = Attr<std::string>("print_phase");
bool is_forward = Attr<bool>("is_forward"); bool is_forward = Attr<bool>("is_forward");
...@@ -177,12 +178,12 @@ class TensorPrintOp : public framework::OperatorBase { ...@@ -177,12 +178,12 @@ class TensorPrintOp : public framework::OperatorBase {
printed_tensor.set_lod(in_tensor.lod()); printed_tensor.set_lod(in_tensor.lod());
printed_tensor.Resize(in_tensor.dims()); printed_tensor.Resize(in_tensor.dims());
if (platform::is_cpu_place(in_tensor.place())) { if (is_cpu_place(in_tensor.place())) {
printed_tensor.ShareDataWith(in_tensor); printed_tensor.ShareDataWith(in_tensor);
} else { } else {
// copy data to cpu to print // copy data to cpu to print
platform::CPUPlace place; platform::CPUPlace place;
framework::TensorCopy(in_tensor, place, &printed_tensor); TensorCopy(in_tensor, place, &printed_tensor);
} }
Formater formater; Formater formater;
...@@ -215,6 +216,7 @@ class PrintOpProtoAndCheckMaker : public framework::OpProtoAndCheckerMaker { ...@@ -215,6 +216,7 @@ class PrintOpProtoAndCheckMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("In", "Input tensor to be displayed."); AddInput("In", "Input tensor to be displayed.");
AddOutput("Out", "The output tensor.");
AddAttr<int>("first_n", "Only log `first_n` number of times."); AddAttr<int>("first_n", "Only log `first_n` number of times.");
AddAttr<std::string>("message", "A string message to print as a prefix."); AddAttr<std::string>("message", "A string message to print as a prefix.");
AddAttr<int>("summarize", "Number of elements printed."); AddAttr<int>("summarize", "Number of elements printed.");
...@@ -239,10 +241,23 @@ tensor `t`.)DOC"); ...@@ -239,10 +241,23 @@ tensor `t`.)DOC");
} }
}; };
class InferShapeForward : public framework::InferShapeBase { class PrintOpInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext *ctx) const override {
VLOG(10) << "PrintOpInferShape";
PADDLE_ENFORCE(ctx->HasInput("In"), "Input(In) should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null.");
ctx->ShareDim("In", /*->*/ "Out");
ctx->ShareLoD("In", /*->*/ "Out");
}
};
class PrintOpVarTypeInference : public framework::VarTypeInference {
public: public:
void operator()(framework::InferShapeContext *context) const override { void operator()(framework::InferVarTypeContext *ctx) const override {
PADDLE_ENFORCE(context->HasInput("In"), "Input(In) should not be null."); auto input_type = ctx->GetType(ctx->Input("In")[0]);
auto out_name = ctx->Output("Out").front();
ctx->SetType(out_name, input_type);
} }
}; };
...@@ -253,7 +268,8 @@ class PrintOpGradientMaker : public framework::SingleGradOpDescMaker { ...@@ -253,7 +268,8 @@ class PrintOpGradientMaker : public framework::SingleGradOpDescMaker {
std::unique_ptr<framework::OpDesc> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto *op_desc_ptr = new framework::OpDesc(); auto *op_desc_ptr = new framework::OpDesc();
op_desc_ptr->SetType("print"); op_desc_ptr->SetType("print");
op_desc_ptr->SetInput("In", InputGrad("In")); op_desc_ptr->SetInput("In", OutputGrad("Out"));
op_desc_ptr->SetOutput("Out", InputGrad("In"));
op_desc_ptr->SetAttrMap(Attrs()); op_desc_ptr->SetAttrMap(Attrs());
op_desc_ptr->SetAttr("is_forward", false); op_desc_ptr->SetAttr("is_forward", false);
return std::unique_ptr<framework::OpDesc>(op_desc_ptr); return std::unique_ptr<framework::OpDesc>(op_desc_ptr);
...@@ -265,5 +281,6 @@ class PrintOpGradientMaker : public framework::SingleGradOpDescMaker { ...@@ -265,5 +281,6 @@ class PrintOpGradientMaker : public framework::SingleGradOpDescMaker {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(print, ops::TensorPrintOp, ops::PrintOpProtoAndCheckMaker, REGISTER_OPERATOR(print, ops::PrintOp, ops::PrintOpProtoAndCheckMaker,
ops::PrintOpGradientMaker, ops::InferShapeForward); ops::PrintOpGradientMaker, ops::PrintOpInferShape,
ops::PrintOpVarTypeInference);
...@@ -138,8 +138,7 @@ def reader_creator(data_file, ...@@ -138,8 +138,7 @@ def reader_creator(data_file,
break break
if use_xmap: if use_xmap:
cpu_num = int(os.environ.get('CPU_NUM', cpu_count())) return xmap_readers(mapper, reader, min(4, cpu_count()), buffered_size)
return xmap_readers(mapper, reader, cpu_num, buffered_size)
else: else:
return map_readers(mapper, reader) return map_readers(mapper, reader)
......
...@@ -19,6 +19,8 @@ import six ...@@ -19,6 +19,8 @@ import six
import numpy as np import numpy as np
from paddle.fluid.contrib.slim.graph import GraphWrapper from paddle.fluid.contrib.slim.graph import GraphWrapper
from paddle.fluid import core from paddle.fluid import core
import os
os.environ['CPU_NUM'] = str(4)
def residual_block(num): def residual_block(num):
......
...@@ -22,7 +22,7 @@ from six.moves import zip, range, xrange ...@@ -22,7 +22,7 @@ from six.moves import zip, range, xrange
import multiprocessing import multiprocessing
from .framework import Variable, default_main_program, _current_expected_place from .framework import Variable, default_main_program, _current_expected_place
from .framework import _cpu_num, _cuda_ids
__all__ = ['DataFeeder'] __all__ = ['DataFeeder']
...@@ -359,11 +359,9 @@ class DataFeeder(object): ...@@ -359,11 +359,9 @@ class DataFeeder(object):
if num_places is not None: if num_places is not None:
return int(num_places) return int(num_places)
elif isinstance(self.place, core.CUDAPlace): elif isinstance(self.place, core.CUDAPlace):
return core.get_cuda_device_count() return len(_cuda_ids())
else: else:
cpu_num = int( return _cpu_num()
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
return cpu_num
def decorate_reader(self, def decorate_reader(self,
reader, reader,
......
...@@ -27,7 +27,7 @@ import six ...@@ -27,7 +27,7 @@ import six
import numpy as np import numpy as np
import subprocess import subprocess
import multiprocessing import multiprocessing
import sys
from .. import compat as cpt from .. import compat as cpt
from .proto import framework_pb2 from .proto import framework_pb2
...@@ -82,7 +82,24 @@ def _current_expected_place(): ...@@ -82,7 +82,24 @@ def _current_expected_place():
def _cpu_num(): def _cpu_num():
return int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) if "CPU_NUM" not in os.environ.keys():
sys.stderr.write(
'The CPU_NUM is not specified, you should set CPU_NUM in '
'the environment variable list, i.e export CPU_NUM=1. CPU_NUM '
'indicates that how many CPUPlace are used in the current task.\n'
'!!! The default number of CPUPlaces is 1.\n\n')
os.environ['CPU_NUM'] = str(1)
cpu_num = os.environ.get('CPU_NUM')
return int(cpu_num)
def _cuda_ids():
gpus_env = os.getenv("FLAGS_selected_gpus")
if gpus_env:
device_ids = [int(s) for s in gpus_env.split(",")]
else:
device_ids = six.moves.range(core.get_cuda_device_count())
return device_ids
def cuda_places(device_ids=None): def cuda_places(device_ids=None):
...@@ -116,11 +133,7 @@ def cuda_places(device_ids=None): ...@@ -116,11 +133,7 @@ def cuda_places(device_ids=None):
assert core.is_compiled_with_cuda(), \ assert core.is_compiled_with_cuda(), \
"Not compiled with CUDA" "Not compiled with CUDA"
if device_ids is None: if device_ids is None:
gpus_env = os.getenv("FLAGS_selected_gpus") device_ids = _cuda_ids()
if gpus_env:
device_ids = [int(s) for s in gpus_env.split(",")]
else:
device_ids = six.moves.range(core.get_cuda_device_count())
elif not isinstance(device_ids, (list, tuple)): elif not isinstance(device_ids, (list, tuple)):
device_ids = [device_ids] device_ids = [device_ids]
return [core.CUDAPlace(dev_id) for dev_id in device_ids] return [core.CUDAPlace(dev_id) for dev_id in device_ids]
......
...@@ -165,8 +165,12 @@ def Print(input, ...@@ -165,8 +165,12 @@ def Print(input,
print the gradients of input tensor. print the gradients of input tensor.
Returns: Returns:
Variable: Output tensor, same data with input tensor. Variable: Output tensor.
NOTES:
The input and output are two different variables, and in the
following process, you should use the output variable but not the input,
otherwise, the print layer doesn't have backward.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -174,16 +178,18 @@ def Print(input, ...@@ -174,16 +178,18 @@ def Print(input,
import paddle.fluid as fluid import paddle.fluid as fluid
input = fluid.layers.data(name="input", shape=[4, 32, 32], dtype="float32") input = fluid.layers.data(name="input", shape=[4, 32, 32], dtype="float32")
fluid.layers.Print(input, message = "The content of input layer:") input = fluid.layers.Print(input, message = "The content of input layer:")
# value = some_layer(...) # value = some_layer(...)
# Print(value, summarize=10, # Print(value, summarize=10,
# message="The content of some_layer: ") # message="The content of some_layer: ")
''' '''
helper = LayerHelper('print', **locals()) helper = LayerHelper('print' + "_" + input.name, **locals())
output = helper.create_variable_for_type_inference(input.dtype)
helper.append_op( helper.append_op(
type='print', type='print',
inputs={'In': input}, inputs={'In': input},
outputs={'Out': output},
attrs={ attrs={
'first_n': first_n, 'first_n': first_n,
'summarize': summarize, 'summarize': summarize,
...@@ -194,7 +200,7 @@ def Print(input, ...@@ -194,7 +200,7 @@ def Print(input,
'print_tensor_lod': print_tensor_lod, 'print_tensor_lod': print_tensor_lod,
'print_phase': print_phase.upper() 'print_phase': print_phase.upper()
}) })
return input return output
class BlockGuard(object): class BlockGuard(object):
......
...@@ -324,6 +324,7 @@ class ParallelExecutor(object): ...@@ -324,6 +324,7 @@ class ParallelExecutor(object):
loss = fluid.layers.mean(hidden) loss = fluid.layers.mean(hidden)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_program) exe.run(startup_program)
parallel_exe = fluid.ParallelExecutor(use_cuda=use_cuda, parallel_exe = fluid.ParallelExecutor(use_cuda=use_cuda,
......
...@@ -17,6 +17,8 @@ from paddle.fluid import compiler ...@@ -17,6 +17,8 @@ from paddle.fluid import compiler
import unittest import unittest
import logging import logging
import six import six
import os
os.environ['CPU_NUM'] = str(4)
class TestBase(unittest.TestCase): class TestBase(unittest.TestCase):
......
...@@ -17,11 +17,13 @@ from __future__ import print_function ...@@ -17,11 +17,13 @@ from __future__ import print_function
import unittest import unittest
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.executor import Executor from paddle.fluid.executor import Executor
import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.fluid.backward import append_backward from paddle.fluid.backward import append_backward
from paddle.fluid.framework import switch_main_program from paddle.fluid.framework import switch_main_program
from paddle.fluid.framework import Program from paddle.fluid.framework import Program
import numpy as np import numpy as np
from simple_nets import simple_fc_net, init_data
class TestPrintOpCPU(unittest.TestCase): class TestPrintOpCPU(unittest.TestCase):
...@@ -68,5 +70,35 @@ class TestPrintOpGPU(TestPrintOpCPU): ...@@ -68,5 +70,35 @@ class TestPrintOpGPU(TestPrintOpCPU):
self.x_tensor.set_recursive_sequence_lengths([[1, 1]]) self.x_tensor.set_recursive_sequence_lengths([[1, 1]])
class TestPrintOpBackward(unittest.TestCase):
def check_backward(self, use_cuda):
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
loss = simple_fc_net()
loss = fluid.layers.Print(loss)
fluid.optimizer.Adam().minimize(loss)
print_ops = [op for op in main.blocks[0].ops if op.type == u'print']
assert len(print_ops) == 2, "The number of print op should be 2"
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup)
binary = fluid.compiler.CompiledProgram(main).with_data_parallel(
loss_name=loss.name)
img, label = init_data()
feed_dict = {"image": img, "label": label}
exe.run(binary, feed_dict)
def test_fw_bw(self):
if core.is_compiled_with_cuda():
self.check_backward(use_cuda=True)
self.check_backward(use_cuda=False)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -22,6 +22,7 @@ import numpy as np ...@@ -22,6 +22,7 @@ import numpy as np
import threading import threading
import multiprocessing import multiprocessing
import os import os
os.environ['CPU_NUM'] = str(4)
def as_tensor(np_array_or_tensor, place=None): def as_tensor(np_array_or_tensor, place=None):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册