提交 728062a5 编写于 作者: B baiyfbupt

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into develop

...@@ -159,6 +159,7 @@ def run_benchmark(model, args): ...@@ -159,6 +159,7 @@ def run_benchmark(model, args):
paddle.dataset.mnist.train(), batch_size=args.batch_size) paddle.dataset.mnist.train(), batch_size=args.batch_size)
accuracy = fluid.metrics.Accuracy() accuracy = fluid.metrics.Accuracy()
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
iters, num_samples, start_time = 0, 0, time.time() iters, num_samples, start_time = 0, 0, time.time()
for pass_id in range(args.pass_num): for pass_id in range(args.pass_num):
accuracy.reset() accuracy.reset()
...@@ -175,17 +176,20 @@ def run_benchmark(model, args): ...@@ -175,17 +176,20 @@ def run_benchmark(model, args):
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([len(y_data), 1]) y_data = y_data.reshape([len(y_data), 1])
outs = exe.run( outs = train_exe.run(
fluid.default_main_program(),
feed={"pixel": img_data, feed={"pixel": img_data,
"label": y_data}, "label": y_data},
fetch_list=[avg_cost, batch_acc, batch_size_tensor] fetch_list=[
avg_cost.name, batch_acc.name, batch_size_tensor.name
]
) # The accuracy is the accumulation of batches, but not the current batch. ) # The accuracy is the accumulation of batches, but not the current batch.
accuracy.update(value=outs[1], weight=outs[2]) accuracy.update(
value=np.array(np.mean(outs[1])),
weight=np.mean(np.array(outs[2])))
iters += 1 iters += 1
num_samples += len(y_data) num_samples += len(y_data)
loss = np.array(outs[0]) loss = np.mean(np.array(outs[0]))
acc = np.array(outs[1]) acc = np.mean(np.array(outs[1]))
train_losses.append(loss) train_losses.append(loss)
train_accs.append(acc) train_accs.append(acc)
print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" % print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" %
......
...@@ -241,6 +241,7 @@ def run_benchmark(model, args): ...@@ -241,6 +241,7 @@ def run_benchmark(model, args):
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
accuracy = fluid.average.WeightedAverage() accuracy = fluid.average.WeightedAverage()
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
if args.use_fake_data: if args.use_fake_data:
data = train_reader().next() data = train_reader().next()
image = np.array(map(lambda x: x[0].reshape(dshape), data)).astype( image = np.array(map(lambda x: x[0].reshape(dshape), data)).astype(
...@@ -264,14 +265,17 @@ def run_benchmark(model, args): ...@@ -264,14 +265,17 @@ def run_benchmark(model, args):
data)).astype('float32') data)).astype('float32')
label = np.array(map(lambda x: x[1], data)).astype('int64') label = np.array(map(lambda x: x[1], data)).astype('int64')
label = label.reshape([-1, 1]) label = label.reshape([-1, 1])
loss, acc, weight = exe.run( loss, acc, weight = train_exe.run(
fluid.default_main_program(),
feed={'data': image, feed={'data': image,
'label': label}, 'label': label},
fetch_list=[avg_cost, batch_acc, batch_size_tensor]) fetch_list=[
avg_cost.name, batch_acc.name, batch_size_tensor.name
])
iters += 1 iters += 1
num_samples += len(label) num_samples += len(label)
accuracy.add(value=acc, weight=weight) accuracy.add(value=np.array(np.mean(acc)), weight=np.mean(weight))
loss = np.mean(np.array(loss))
acc = np.mean(np.array(acc))
train_losses.append(loss) train_losses.append(loss)
train_accs.append(acc) train_accs.append(acc)
print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" % print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" %
......
...@@ -169,6 +169,7 @@ def main(): ...@@ -169,6 +169,7 @@ def main():
iters, num_samples, start_time = 0, 0, time.time() iters, num_samples, start_time = 0, 0, time.time()
accuracy = fluid.average.WeightedAverage() accuracy = fluid.average.WeightedAverage()
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
for pass_id in range(args.pass_num): for pass_id in range(args.pass_num):
accuracy.reset() accuracy.reset()
train_accs = [] train_accs = []
...@@ -184,14 +185,17 @@ def main(): ...@@ -184,14 +185,17 @@ def main():
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
loss, acc, weight = exe.run( loss, acc, weight = train_exe.run(
fluid.default_main_program(),
feed={"pixel": img_data, feed={"pixel": img_data,
"label": y_data}, "label": y_data},
fetch_list=[avg_cost, batch_acc, batch_size_tensor]) fetch_list=[
accuracy.add(value=acc, weight=weight) avg_cost.name, batch_acc.name, batch_size_tensor.name
])
accuracy.add(value=np.array(np.mean(acc)), weight=np.mean(weight))
iters += 1 iters += 1
num_samples += len(y_data) num_samples += len(y_data)
loss = np.mean(np.array(loss))
acc = np.mean(np.array(acc))
print( print(
"Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" % "Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" %
(pass_id, iters, loss, acc) (pass_id, iters, loss, acc)
......
...@@ -40,7 +40,7 @@ template <typename T> ...@@ -40,7 +40,7 @@ template <typename T>
class FCOp : public OperatorBase { class FCOp : public OperatorBase {
public: public:
void Run(...) { void Run(...) {
add(mul(Input<T>("X"), Input<T>("W")), Input<T>("b"); add(mul(Input<T>("X"), Input<T>("W")), Input<T>("b"));
} }
}; };
REGISTER_OP(FCOp, "fc"); REGISTER_OP(FCOp, "fc");
......
...@@ -70,6 +70,14 @@ class OpHandleBase { ...@@ -70,6 +70,14 @@ class OpHandleBase {
const std::vector<VarHandleBase *> &Inputs() const { return inputs_; } const std::vector<VarHandleBase *> &Inputs() const { return inputs_; }
size_t NoDupInputSize() const {
std::unordered_set<VarHandleBase *> res;
for (auto *var : inputs_) {
res.emplace(var);
}
return res.size();
}
const std::vector<VarHandleBase *> &Outputs() const { return outputs_; } const std::vector<VarHandleBase *> &Outputs() const { return outputs_; }
protected: protected:
......
...@@ -174,7 +174,7 @@ void ThreadedSSAGraphExecutor::InsertFetchOps( ...@@ -174,7 +174,7 @@ void ThreadedSSAGraphExecutor::InsertFetchOps(
void ThreadedSSAGraphExecutor::InsertPendingOp( void ThreadedSSAGraphExecutor::InsertPendingOp(
std::unordered_map<OpHandleBase *, size_t> *pending_ops, std::unordered_map<OpHandleBase *, size_t> *pending_ops,
OpHandleBase *op_instance) const { OpHandleBase *op_instance) const {
pending_ops->insert({op_instance, op_instance->Inputs().size()}); pending_ops->insert({op_instance, op_instance->NoDupInputSize()});
} }
void ThreadedSSAGraphExecutor::InsertPendingVar( void ThreadedSSAGraphExecutor::InsertPendingVar(
......
...@@ -49,7 +49,7 @@ class OpConverter { ...@@ -49,7 +49,7 @@ class OpConverter {
// convert fluid block to tensorrt network // convert fluid block to tensorrt network
void ConvertBlock(const framework::proto::BlockDesc& block, void ConvertBlock(const framework::proto::BlockDesc& block,
TensorRTEngine* engine) { TensorRTEngine* engine) {
for (size_t i = 0; i < block.ops_size(); i++) { for (int i = 0; i < block.ops_size(); i++) {
const auto& op = block.ops(i); const auto& op = block.ops(i);
OpConverter::Run(op, engine); OpConverter::Run(op, engine);
} }
......
...@@ -105,7 +105,7 @@ class SmoothL1LossGradOp : public framework::OperatorWithKernel { ...@@ -105,7 +105,7 @@ class SmoothL1LossGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
auto in_dims = ctx->GetInputDim("X"); auto in_dims = ctx->GetInputDim("Diff");
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
PADDLE_ENFORCE_GE(out_dims.size(), 2, PADDLE_ENFORCE_GE(out_dims.size(), 2,
...@@ -127,12 +127,33 @@ class SmoothL1LossGradOp : public framework::OperatorWithKernel { ...@@ -127,12 +127,33 @@ class SmoothL1LossGradOp : public framework::OperatorWithKernel {
} }
}; };
class SmoothL1LossGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
auto* op = new framework::OpDesc();
op->SetType("smooth_l1_loss_grad");
op->SetInput("InsideWeight", Input("InsideWeight"));
op->SetInput("OutsideWeight", Input("OutsideWeight"));
op->SetInput("Diff", Output("Diff"));
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetAttrMap(Attrs());
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetOutput(framework::GradVarName("Y"), InputGrad("Y"));
return std::unique_ptr<framework::OpDesc>(op);
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(smooth_l1_loss, ops::SmoothL1LossOp, ops::SmoothL1LossOpMaker, REGISTER_OPERATOR(smooth_l1_loss, ops::SmoothL1LossOp, ops::SmoothL1LossOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>); ops::SmoothL1LossGradMaker);
REGISTER_OPERATOR(smooth_l1_loss_grad, ops::SmoothL1LossGradOp); REGISTER_OPERATOR(smooth_l1_loss_grad, ops::SmoothL1LossGradOp);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
smooth_l1_loss, smooth_l1_loss,
......
...@@ -480,6 +480,7 @@ function main() { ...@@ -480,6 +480,7 @@ function main() {
build) build)
cmake_gen ${PYTHON_ABI:-""} cmake_gen ${PYTHON_ABI:-""}
build build
gen_dockerfile
;; ;;
build_android) build_android)
build_android build_android
......
...@@ -54,9 +54,9 @@ class DataToLoDTensorConverter(object): ...@@ -54,9 +54,9 @@ class DataToLoDTensorConverter(object):
self.data.append(data) self.data.append(data)
else: else:
cur_lod_len = len(data) cur_lod_len = len(data)
lod[-1].append(lod[-1][-1] + cur_lod_len) lod[0].append(lod[0][-1] + cur_lod_len)
for each_data in data: for each_data in data:
self._feed_impl_(each_data, lod[:-1], lod_level - 1) self._feed_impl_(each_data, lod[1:], lod_level - 1)
def done(self): def done(self):
arr = numpy.array(self.data, dtype=self.dtype).reshape(self.shape) arr = numpy.array(self.data, dtype=self.dtype).reshape(self.shape)
......
...@@ -1329,6 +1329,8 @@ def sequence_pool(input, pool_type): ...@@ -1329,6 +1329,8 @@ def sequence_pool(input, pool_type):
sqrt : out.data = [2.82, 6.93, 4.24], where 2.82=(1+3)/sqrt(2), sqrt : out.data = [2.82, 6.93, 4.24], where 2.82=(1+3)/sqrt(2),
6.93=(2+4+6)/sqrt(3), 4.24=(5+1)/sqrt(2) 6.93=(2+4+6)/sqrt(3), 4.24=(5+1)/sqrt(2)
max : out.data = [3, 6, 5], where 3=max(1,3), 6=max(2,4,6), 5=max(5,1) max : out.data = [3, 6, 5], where 3=max(1,3), 6=max(2,4,6), 5=max(5,1)
last : out.data = [3, 6, 1], where 3=last(1,3), 6=last(2,4,6), 1=last(5,1)
first : out.data = [1, 2, 5], where 1=first(1,3), 2=first(2,4,6), 5=first(5,1)
Args: Args:
input(variable): The input variable which is a LoDTensor. input(variable): The input variable which is a LoDTensor.
...@@ -1348,6 +1350,8 @@ def sequence_pool(input, pool_type): ...@@ -1348,6 +1350,8 @@ def sequence_pool(input, pool_type):
sum_x = fluid.layers.sequence_pool(input=x, pool_type='sum') sum_x = fluid.layers.sequence_pool(input=x, pool_type='sum')
sqrt_x = fluid.layers.sequence_pool(input=x, pool_type='sqrt') sqrt_x = fluid.layers.sequence_pool(input=x, pool_type='sqrt')
max_x = fluid.layers.sequence_pool(input=x, pool_type='max') max_x = fluid.layers.sequence_pool(input=x, pool_type='max')
last_x = fluid.layers.sequence_pool(input=x, pool_type='last')
first_x = fluid.layers.sequence_pool(input=x, pool_type='first')
""" """
helper = LayerHelper('sequence_pool', **locals()) helper = LayerHelper('sequence_pool', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -3263,35 +3267,35 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None): ...@@ -3263,35 +3267,35 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
""" """
**Smooth L1 Loss Operator. ** **Smooth L1 Loss Operator. **
This operator computes the smooth l1 loss for X and Y. This operator computes the smooth L1 loss for X and Y.
The operator takes the first dimension of X and Y as batch size. The operator takes the first dimension of X and Y as batch size.
For each instance, it computes the smooth l1 loss element by element first For each instance, it computes the smooth L1 loss element by element first
and then sums all the losses. So the shape of Out is [batch_size, 1]. and then sums all the losses. So the shape of Out is [batch_size, 1].
Args: Args:
x (Variable): A tensor with rank at least 2. The input value of smooth x (Variable): A tensor with rank at least 2. The input value of smooth
l1 loss op with shape [batch_size, dim1, ..., dimN]. L1 loss op with shape [batch_size, dim1, ..., dimN].
y (Variable): A tensor with rank at least 2. The target value of smooth y (Variable): A tensor with rank at least 2. The target value of smooth
l1 loss op with same shape as x. L1 loss op with same shape as x.
inside_weight (Variable|None): A tensor with rank at least 2. This inside_weight (Variable|None): A tensor with rank at least 2. This
input is optional and should have same shape with x. If provided, input is optional and should have same shape with x. If provided,
the result of (x - y) will be multiplied by this tensor element by the result of (x - y) will be multiplied by this tensor element by
element. element.
outside_weight (Variable|None): A tensor with rank at least 2. This outside_weight (Variable|None): A tensor with rank at least 2. This
input is optional and should have same shape with x. If provided, input is optional and should have same shape with x. If provided,
the out smooth l1 loss will be multiplied by this tensor element the out smooth L1 loss will be multiplied by this tensor element
by element. by element.
sigma (float|None): Hyper parameter of smooth l1 loss op. A float scalar sigma (float|None): Hyper parameter of smooth L1 loss op. A float scalar
with default value 1.0. with default value 1.0.
Returns: Returns:
Variable: A tensor with rank be 2. The output smooth l1 loss with Variable: A tensor with rank be 2. The output smooth L1 loss with
shape [batch_size, 1]. shape [batch_size, 1].
Examples: Examples:
.. code-block:: python .. code-block:: python
data = fluid.layers.data(name='data', shape=[128], dtype='float32') data = fluid.layers.data(name='data', shape=[128], dtype='float32')
label = fluid.layers.data(name='label', shape=[100], dtype='int64') label = fluid.layers.data(name='label', shape=[100], dtype='float32')
fc = fluid.layers.fc(input=data, size=100) fc = fluid.layers.fc(input=data, size=100)
out = fluid.layers.smooth_l1(x=fc, y=label) out = fluid.layers.smooth_l1(x=fc, y=label)
""" """
......
...@@ -182,12 +182,6 @@ def train(use_cuda, save_dirname=None, is_local=True): ...@@ -182,12 +182,6 @@ def train(use_cuda, save_dirname=None, is_local=True):
crf_decode = fluid.layers.crf_decoding( crf_decode = fluid.layers.crf_decoding(
input=feature_out, param_attr=fluid.ParamAttr(name='crfw')) input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
chunk_evaluator = fluid.evaluator.ChunkEvaluator(
input=crf_decode,
label=target,
chunk_scheme="IOB",
num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))
train_data = paddle.batch( train_data = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
paddle.dataset.conll05.test(), buf_size=8192), paddle.dataset.conll05.test(), buf_size=8192),
...@@ -203,7 +197,6 @@ def train(use_cuda, save_dirname=None, is_local=True): ...@@ -203,7 +197,6 @@ def train(use_cuda, save_dirname=None, is_local=True):
def train_loop(main_program): def train_loop(main_program):
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
embedding_param = fluid.global_scope().find_var( embedding_param = fluid.global_scope().find_var(
embedding_name).get_tensor() embedding_name).get_tensor()
embedding_param.set( embedding_param.set(
...@@ -213,27 +206,19 @@ def train(use_cuda, save_dirname=None, is_local=True): ...@@ -213,27 +206,19 @@ def train(use_cuda, save_dirname=None, is_local=True):
start_time = time.time() start_time = time.time()
batch_id = 0 batch_id = 0
for pass_id in xrange(PASS_NUM): for pass_id in xrange(PASS_NUM):
chunk_evaluator.reset(exe)
for data in train_data(): for data in train_data():
cost, precision, recall, f1_score = exe.run( cost = exe.run(main_program,
main_program,
feed=feeder.feed(data), feed=feeder.feed(data),
fetch_list=[avg_cost] + chunk_evaluator.metrics) fetch_list=[avg_cost])
pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval( cost = cost[0]
exe)
if batch_id % 10 == 0: if batch_id % 10 == 0:
print("avg_cost:" + str(cost) + " precision:" + str( print("avg_cost:" + str(cost))
precision) + " recall:" + str(recall) + " f1_score:" +
str(f1_score) + " pass_precision:" + str(
pass_precision) + " pass_recall:" + str(
pass_recall) + " pass_f1_score:" + str(
pass_f1_score))
if batch_id != 0: if batch_id != 0:
print("second per batch: " + str((time.time( print("second per batch: " + str((time.time(
) - start_time) / batch_id)) ) - start_time) / batch_id))
# Set the threshold low to speed up the CI test # Set the threshold low to speed up the CI test
if float(pass_precision) > 0.01: if float(cost) < 60.0:
if save_dirname is not None: if save_dirname is not None:
# TODO(liuyiqun): Change the target to crf_decode # TODO(liuyiqun): Change the target to crf_decode
fluid.io.save_inference_model(save_dirname, [ fluid.io.save_inference_model(save_dirname, [
......
...@@ -13,15 +13,62 @@ ...@@ -13,15 +13,62 @@
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
import unittest
def test_converter(): class TestDataFeeder(unittest.TestCase):
def test_lod_level_0_converter(self):
img = fluid.layers.data(name='image', shape=[1, 28, 28]) img = fluid.layers.data(name='image', shape=[1, 28, 28])
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
feeder = fluid.DataFeeder([img, label], fluid.CPUPlace()) feeder = fluid.DataFeeder([img, label], fluid.CPUPlace())
result = feeder.feed([[[0] * 784, [9]], [[1] * 784, [1]]]) result = feeder.feed([([0] * 784, [9]), ([1] * 784, [1])])
print(result) print(result)
self.assertEqual(result['image'].shape(), [2, 1, 28, 28])
self.assertEqual(result['label'].shape(), [2, 1])
self.assertEqual(result['image'].lod(), [])
self.assertEqual(result['label'].lod(), [])
def test_lod_level_1_converter(self):
# lod_level = 1
# each sentence has a different number of words
sentences = fluid.layers.data(
name='sentences', shape=[1], dtype='int64', lod_level=1)
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
feeder = fluid.DataFeeder([sentences, label], fluid.CPUPlace())
# lod = [[0, 3, 5, 9]]
# data = [[1, 2, 3], [4, 5], [6, 7, 8, 9]]
# label = [1] * len(data)
result = feeder.feed(
[([1, 2, 3], [1]), ([4, 5], [1]), ([6, 7, 8, 9], [1])])
print(result)
self.assertEqual(result['sentences'].shape(), [9, 1])
self.assertEqual(result['label'].shape(), [3, 1])
self.assertEqual(result['sentences'].lod(), [[0, 3, 5, 9]])
self.assertEqual(result['label'].lod(), [])
def test_lod_level_2_converter(self):
# lod_level = 2
# paragraphs -> sentences -> words
paragraphs = fluid.layers.data(
name='paragraphs', shape=[1], dtype='int64', lod_level=2)
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
feeder = fluid.DataFeeder([paragraphs, label], fluid.CPUPlace())
# lod = [[0, 2, 3], [0, 3, 5, 9]]
# data = [[[1, 2, 3], [4, 5]], [[6, 7, 8, 9]]]
# label = [1] * len(data)
result = feeder.feed(
[([[1, 2, 3], [4, 5]], [1]), ([[6, 7, 8, 9]], [1])])
print(result)
self.assertEqual(result['paragraphs'].shape(), [9, 1])
self.assertEqual(result['label'].shape(), [2, 1])
self.assertEqual(result['paragraphs'].lod(), [[0, 2, 3], [0, 3, 5, 9]])
self.assertEqual(result['label'].lod(), [])
if __name__ == '__main__': if __name__ == '__main__':
test_converter() unittest.main()
...@@ -28,11 +28,11 @@ function(py_test_modules TARGET_NAME) ...@@ -28,11 +28,11 @@ function(py_test_modules TARGET_NAME)
if(WITH_TESTING) if(WITH_TESTING)
set(options "") set(options "")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs MODULES DEPS ARGS ENVS) set(multiValueArgs MODULES DEPS ENVS)
cmake_parse_arguments(py_test_modules "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(py_test_modules "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND env PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_modules_ENVS} COMMAND env PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_modules_ENVS}
${PYTHON_EXECUTABLE} -u -m unittest --verbose ${py_test_modules_MODULES} ${py_test_modules_ARGS} ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/tools/test_runner.py ${py_test_modules_MODULES}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif() endif()
endfunction() endfunction()
......
...@@ -131,7 +131,40 @@ class Trainer(object): ...@@ -131,7 +131,40 @@ class Trainer(object):
# load params from param_path into scope # load params from param_path into scope
io.load_persistables(exe, dirname=param_path) io.load_persistables(exe, dirname=param_path)
def _transpile_nccl2_dist(self):
# PADDLE_TRAINER_IPS
if "PADDLE_TRAINER_IPS" not in os.environ:
self.nccl_id_var = None
else:
self.trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
port = os.getenv("PADDLE_PSERVER_PORT")
worker_ips = os.getenv("PADDLE_TRAINER_IPS")
worker_endpoints = []
for ip in worker_ips.split(","):
worker_endpoints.append(':'.join([ip, port]))
self.num_trainers = len(worker_endpoints)
current_endpoint = os.getenv("POD_IP") + ":" + port
worker_endpoints.remove(current_endpoint)
# TODO(wuyi): use self.nccl_id_var, self.num_trainers and self.trainer_id
# in ParallelExecutor to start
# distributed training using NCCL2
self.nccl_id_var = self.startup_program.global_block().create_var(
name="NCCLID", persistable=True, type=core.VarDesc.VarType.RAW)
self.startup_program.global_block().append_op(
type="gen_nccl_id",
inputs={},
outputs={"NCCLID": self.nccl_id_var},
attrs={
"endpoint": current_endpoint,
"endpoint_list": worker_endpoints,
"trainer_id": self.trainer_id
})
def _dist_transpile_if_necessary(self, optimize_ops, params_grads): def _dist_transpile_if_necessary(self, optimize_ops, params_grads):
self._transpile_nccl2_dist()
if self.nccl_id_var != None:
return
if "PADDLE_TRAINING_ROLE" not in os.environ: if "PADDLE_TRAINING_ROLE" not in os.environ:
return return
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import os
import sys
import paddle.fluid as fluid
import importlib
import cStringIO
def main():
sys.path.append(os.getcwd())
some_test_failed = False
for module_name in sys.argv[1:]:
buffer = cStringIO.StringIO()
main = fluid.Program()
startup = fluid.Program()
scope = fluid.core.Scope()
with fluid.program_guard(main, startup):
with fluid.scope_guard(scope):
with fluid.unique_name.guard():
test_loader = unittest.TestLoader()
module = importlib.import_module(module_name)
tests = test_loader.loadTestsFromModule(module)
res = unittest.TextTestRunner(stream=buffer).run(tests)
if not res.wasSuccessful():
some_test_failed = True
print >> sys.stderr, module_name, 'failed\n', buffer.getvalue(
)
if some_test_failed:
exit(1)
if __name__ == '__main__':
main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册