未验证 提交 157211c4 编写于 作者: Y Yi Liu 提交者: GitHub

supports distributed classification (#18690)

* supports distributed classification training
* update API.spec
* fix evenly division in python3
* change "index_range" to "index_num" in shard_index operator
test=document_preview
test=develop
上级 3429e65a
......@@ -28,6 +28,7 @@
| lcy-seso | Ying Cao |
| cjld | Dun Liang |
| lipeng-unisound | Peng Li |
| gavin1332 | Yi Liu |
| liuyuan | Yuan Liu |
| livc | Zhao Li |
| llxxxll | Yong-Feng Liu |
......
......@@ -158,7 +158,7 @@ paddle.fluid.layers.group_norm (ArgSpec(args=['input', 'groups', 'epsilon', 'par
paddle.fluid.layers.spectral_norm (ArgSpec(args=['weight', 'dim', 'power_iters', 'eps', 'name'], varargs=None, keywords=None, defaults=(0, 1, 1e-12, None)), ('document', '9461e67095a6fc5d568fb2ce8fef66ff'))
paddle.fluid.layers.softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax', 'axis'], varargs=None, keywords=None, defaults=(False, -100, True, False, -1)), ('document', '54e1675aa0364f4a78fa72804ec0f413'))
paddle.fluid.layers.smooth_l1 (ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'ecb75c1b00c4c76c98b482f633b7a10c'))
paddle.fluid.layers.one_hot (ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None), ('document', '52db6229214fc6ab167d7009df29170d'))
paddle.fluid.layers.one_hot (ArgSpec(args=['input', 'depth', 'allow_out_of_range'], varargs=None, keywords=None, defaults=(False,)), ('document', 'ec4115591be842868c86b2e5334245c6'))
paddle.fluid.layers.autoincreased_step_counter (ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1)), ('document', '98e7927f09ee2270535b29f048e481ec'))
paddle.fluid.layers.reshape (ArgSpec(args=['x', 'shape', 'actual_shape', 'act', 'inplace', 'name'], varargs=None, keywords=None, defaults=(None, None, False, None)), ('document', '6196c9ec3075ca5a9c058ea1f8492256'))
paddle.fluid.layers.squeeze (ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ebbac07662a6e22e8e299ced880c7775'))
......@@ -264,6 +264,7 @@ paddle.fluid.layers.sign (ArgSpec(args=['x'], varargs=None, keywords=None, defau
paddle.fluid.layers.deformable_conv (ArgSpec(args=['input', 'offset', 'mask', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'deformable_groups', 'im2col_step', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, None, None, None)), ('document', '4d83ba6b971cfd590493b0925b3e081e'))
paddle.fluid.layers.unfold (ArgSpec(args=['x', 'kernel_sizes', 'strides', 'paddings', 'dilations', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None)), ('document', '3f884662ad443d9ecc2b3734b4f61ad6'))
paddle.fluid.layers.deformable_roi_pooling (ArgSpec(args=['input', 'rois', 'trans', 'no_trans', 'spatial_scale', 'group_size', 'pooled_height', 'pooled_width', 'part_size', 'sample_per_part', 'trans_std', 'position_sensitive', 'name'], varargs=None, keywords=None, defaults=(False, 1.0, [1, 1], 1, 1, None, 1, 0.1, False, None)), ('document', '99c03e3f249e36854f87dedaa17c8f35'))
paddle.fluid.layers.shard_index (ArgSpec(args=['input', 'index_num', 'nshards', 'shard_id', 'ignore_value'], varargs=None, keywords=None, defaults=(-1,)), ('document', '5786fdbba6753ecd6cbce5e6b0889924'))
paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', '9d7806e31bdf727c1a23b8782a09b545'))
paddle.fluid.layers.open_files (ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)), ('document', 'cccb6eb5410c822e5307c947aca2c899'))
paddle.fluid.layers.read_file (ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None), ('document', '32181f6037e387fb6e68a5beaafe33b6'))
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/shard_index_op.h"
namespace paddle {
namespace operators {
class ShardIndexOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of ShardIndexOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of ShardIndexOp should not be null.");
auto x_dims = ctx->GetInputDim("X");
PADDLE_ENFORCE_GE(x_dims.size(), 2,
"Rank of Input(X) should be at least 2.");
if (ctx->IsRuntime() || x_dims[x_dims.size() - 1] > 0) {
PADDLE_ENFORCE_GE(x_dims[x_dims.size() - 1], 1U,
"Last dimension of Input(X) should be 1.");
}
ctx->SetOutputDim("Out", x_dims);
ctx->ShareLoD("X", /* --> */ "Out");
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(ctx.Input<framework::Tensor>("X")->type(),
ctx.device_context());
}
};
class ShardIndexOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X",
"(LoDTensor, LoDTensor<int|int64>) Input variable. Each value "
"of X is an index.");
AddOutput(
"Out",
"(Tensor, Tensor<int|int64>) Output tensor with same shape as X. "
"The tensor consists of sharding representations of values in X.");
AddAttr<int>("index_num",
"A positive integer to specify the range of the input X.");
AddAttr<int>("nshards",
"A positive integer to specify the number of shards.");
AddAttr<int>("shard_id", "The current shard id");
AddAttr<int>("ignore_value", "An ingeter value out of sharded range")
.SetDefault(-1);
AddComment(R"DOC(
This layer creates the sharded index for input. This layers is used in
model- and data- parallel mixed training generally, in which the index
data (usually the label) should be recaculated in each trainer according
to
.. math::
assert index_num % nshards == 0
shard_size = index_num / nshards
y = x % shard_size if x / shard_size == shard_id else ignore_value
We take the distributed one-hot representation to show what this layer is
used for. The distributed one-hot representation is seperated into multiple
shards, and each shard is filling zeros except the one with the index
inside. In order to create these sharded representation in each trainer,
the original index should be recalculated (i.e. sharded) before.
Examples:
X is a Tensor of integer values:
X.shape = [4, 1]
X.data = [[1], [6], [12], [19]]
suppose index_num = 20 and nshards = 2, then we get shard_size = 10
if shard_id == 0, we get the Out:
Out.shape = [4, 1]
Out.data = [[1], [6], [-1], [-1]]
if shard_id == 1, we get the Out:
Out.shape = [4, 1]
Out.data = [[-1], [-1], [2], [9]]
the default `ignore_value` -1 is used in this example.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(shard_index, ops::ShardIndexOp,
ops::ShardIndexOpMaker);
REGISTER_OP_CPU_KERNEL(shard_index, ops::ShardIndexCPUKernel<int>,
ops::ShardIndexCPUKernel<int64_t>);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/shard_index_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/gpu_info.h"
namespace paddle {
namespace operators {
using platform::PADDLE_CUDA_NUM_THREADS;
template <typename T>
__global__ void ShardIndexInner(const T* in_data, T* out_data,
const int64_t numel, const int index_num,
const int nshards, const int shard_id,
const int ignore_value) {
int shard_size = index_num / nshards;
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < numel) {
assert(in_data[idx] >= 0 && in_data[idx] < index_num);
if (in_data[idx] / shard_size == shard_id) {
out_data[idx] = in_data[idx] % shard_size;
} else {
out_data[idx] = ignore_value;
}
}
}
using LoDTensor = framework::LoDTensor;
template <typename T>
class ShardIndexCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* in = context.Input<LoDTensor>("X");
auto* out = context.Output<LoDTensor>("Out");
int index_num = context.Attr<int>("index_num");
int nshards = context.Attr<int>("nshards");
int shard_id = context.Attr<int>("shard_id");
int ignore_value = context.Attr<int>("ignore_value");
PADDLE_ENFORCE_GT(index_num, 0);
PADDLE_ENFORCE_GT(nshards, 0);
PADDLE_ENFORCE(shard_id >= 0 && shard_id < nshards,
"shard_id(%d) is not in range [0, %d)", shard_id, nshards);
out->Resize(in->dims());
out->set_lod(in->lod());
auto* in_data = in->data<T>();
auto* out_data = out->mutable_data<T>(context.GetPlace());
int64_t numel = in->numel();
auto stream =
context.template device_context<platform::CUDADeviceContext>().stream();
ShardIndexInner<<<(numel + PADDLE_CUDA_NUM_THREADS - 1) /
PADDLE_CUDA_NUM_THREADS,
PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
in_data, out_data, numel, index_num, nshards, shard_id, ignore_value);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(shard_index, ops::ShardIndexCUDAKernel<int>,
ops::ShardIndexCUDAKernel<int64_t>);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
using LoDTensor = framework::LoDTensor;
template <typename T>
class ShardIndexCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* in = context.Input<LoDTensor>("X");
auto* out = context.Output<LoDTensor>("Out");
int index_num = context.Attr<int>("index_num");
int nshards = context.Attr<int>("nshards");
int shard_id = context.Attr<int>("shard_id");
int ignore_value = context.Attr<int>("ignore_value");
PADDLE_ENFORCE_GT(index_num, 0);
PADDLE_ENFORCE_GT(nshards, 0);
PADDLE_ENFORCE(shard_id >= 0 && shard_id < nshards,
"shard_id(%d) is not in range [0, %d)", shard_id, nshards);
int shard_size = index_num / nshards;
out->Resize(in->dims());
out->set_lod(in->lod());
auto* in_data = in->data<T>();
auto* out_data = out->mutable_data<T>(context.GetPlace());
int64_t numel = in->numel();
for (int64_t i = 0; i < numel; ++i) {
PADDLE_ENFORCE(in_data[i] >= 0 && in_data[i] < index_num,
"Input index(%d) is out of range [0,%d)", in_data[i],
index_num);
if (in_data[i] / shard_size == shard_id) {
out_data[i] = in_data[i] % shard_size;
} else {
out_data[i] = ignore_value;
}
}
}
};
} // namespace operators
} // namespace paddle
......@@ -3632,6 +3632,8 @@ class Parameter(Variable):
self.do_model_average = kwargs.get('do_model_average', None)
self.is_distributed = False
def __str__(self):
return self.to_string(True)
......
......@@ -14,6 +14,7 @@
from __future__ import print_function
from ..layer_helper import LayerHelper, unique_name
from ..framework import Variable
def _allreduce(x, out=None, reduce_type="sum", sync_mode=False):
......@@ -58,3 +59,122 @@ def _broadcast(x, root, sync_mode=False):
attrs={"sync_mode": sync_mode,
"root": root})
return x
def _c_allreduce(x,
out=None,
reduce_type='sum',
ring_id=0,
use_calc_stream=False):
helper = LayerHelper('c_allreduce', **locals())
if reduce_type not in ['sum', 'prob', 'max', 'min']:
raise TypeError('reduce type can only be "sum|prod|max|min]"')
op_type = 'c_allreduce_' + reduce_type
if out is None:
out = helper.create_variable(
name=unique_name.generate_with_ignorable_key('.'.join(
[x.name, op_type])),
shape=x.shape,
dtype=x.dtype,
type=x.type,
persistable=x.persistable)
helper.append_op(
type=op_type,
inputs={'X': [x]},
outputs={'Out': [out]},
attrs={'ring_id': ring_id,
'use_calc_stream': use_calc_stream})
return out
def _c_broadcast(x, root=0, ring_id=0, use_calc_stream=False):
op_type = 'c_broadcast'
helper = LayerHelper(op_type, **locals())
helper.append_op(
type=op_type,
inputs={'X': [x]},
outputs={'Out': [x]},
attrs={
'root': root,
'ring_id': ring_id,
'use_calc_stream': use_calc_stream
})
return x
def _c_allgather(x, nranks, ring_id=0, use_calc_stream=False):
op_type = 'c_allgather'
helper = LayerHelper(op_type, **locals())
out_shape = list(x.shape[:])
if out_shape[0] > 0:
out_shape[0] *= nranks
out = helper.create_variable(
name=unique_name.generate_with_ignorable_key('.'.join(
[x.name, op_type])),
shape=out_shape,
dtype=x.dtype,
type=x.type,
persistable=x.persistable)
helper.append_op(
type=op_type,
inputs={'X': [x]},
outputs={'Out': [out]},
attrs={
'nranks': nranks,
'ring_id': ring_id,
'use_calc_stream': use_calc_stream
})
return out
def _c_reducescatter(x, nranks, ring_id=0, use_calc_stream=False):
if not isinstance(x, Variable):
raise TypeError('x must be a Variable')
if x.shape[0] % nranks != 0:
raise ValueError('x.shape[0](%d) cannot be evenly divided by nranks(%d)'
% (x.shape[0], nranks))
op_type = 'c_reducescatter'
helper = LayerHelper(op_type, **locals())
out_shape = list(x.shape[:])
if out_shape[0] > 0:
out_shape[0] //= nranks
out = helper.create_variable(
name=unique_name.generate_with_ignorable_key('.'.join(
[x.name, op_type])),
shape=out_shape,
dtype=x.dtype,
type=x.type,
persistable=x.persistable)
helper.append_op(
type=op_type,
inputs={'X': [x]},
outputs={'Out': [out]},
attrs={
'nranks': nranks,
'ring_id': ring_id,
'use_calc_stream': use_calc_stream
})
return out
def _c_sync_calc_stream(x):
op_type = 'c_sync_calc_stream'
helper = LayerHelper(op_type, **locals())
helper.append_op(type=op_type, inputs={'X': [x]}, outputs={'Out': [x]})
return x
def _c_sync_comm_stream(x, ring_id):
op_type = 'c_sync_comm_stream'
helper = LayerHelper(op_type, **locals())
helper.append_op(
type=op_type,
inputs={'X': [x]},
outputs={'Out': [x]},
attrs={'ring_id': ring_id})
return x
......@@ -207,6 +207,7 @@ __all__ = [
'deformable_conv',
'unfold',
'deformable_roi_pooling',
'shard_index',
]
kIgnoreIndex = -100
......@@ -6643,13 +6644,17 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
return loss
def one_hot(input, depth):
def one_hot(input, depth, allow_out_of_range=False):
"""
This layer creates the one-hot representations for input indices.
Args:
input(Variable): Input indices, last dimension must be 1.
depth(scalar): An interger defining the depth of the one-hot dimension.
allow_out_of_range(bool): A bool value indicating whether the input
indices could be out of range [0, depth). When input indices are
out of range, exceptions is raised if allow_out_of_range is False,
or zero-filling representations is created if it is set True
Returns:
Variable: The one-hot representations of input.
......@@ -12516,3 +12521,87 @@ def deformable_roi_pooling(input,
"trans_std": trans_std
})
return output
def shard_index(input, index_num, nshards, shard_id, ignore_value=-1):
"""
This layer creates the sharded index for input. This layers is used in
model- and data- parallel mixed training generally, in which the index
data (usually the label) should be recaculated in each trainer according
to
.. math::
assert index_num % nshards == 0
shard_size = index_num / nshards
y = x % shard_size if x / shard_size == shard_id else ignore_value
We take the distributed one-hot representation to show what this layer is
used for. The distributed one-hot representation is seperated into multiple
shards, and each shard is filling zeros except the one with the index
inside. In order to create these sharded representation in each trainer,
the original index should be recalculated (i.e. sharded) before.
Examples:
X is a Tensor of integer values:
X.shape = [4, 1]
X.data = [[1], [6], [12], [19]]
suppose index_num = 20 and nshards = 2, then we get shard_size = 10
if shard_id == 0, we get the Out:
Out.shape = [4, 1]
Out.data = [[1], [6], [-1], [-1]]
if shard_id == 1, we get the Out:
Out.shape = [4, 1]
Out.data = [[-1], [-1], [2], [9]]
the default `ignore_value` -1 is used in this example.
Args:
input(Variable): Input indices, last dimension must be 1.
index_num(scalar): An interger defining the range of the index.
nshards(scalar): The number of shards
shard_id(scalar): The index of the current shard
ignore_value(scalar): An ingeter value out of sharded index range
Returns:
Variable: The shard index of input.
Examples:
.. code-block:: python
import paddle.fluid as fluid
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
shard_label = fluid.layers.shard_index(input=label,
index_num=20,
nshards=2,
shard_id=0)
"""
op_type = 'shard_index'
helper = LayerHelper(op_type, **locals())
if index_num % nshards != 0:
raise ValueError(
'The index_num(%d) cannot be evenly divided by nshards(%d)' %
(index_num, nshards))
if shard_id < 0 or shard_id >= nshards:
raise ValueError('The shard_id(%d) should be in [0, %d)' %
(shard_id, nshards))
out = helper.create_variable_for_type_inference(dtype=input.dtype)
helper.append_op(
type=op_type,
inputs={'X': [input]},
outputs={'Out': out},
attrs={
'index_num': index_num,
'nshards': nshards,
'shard_id': shard_id,
'ignore_value': ignore_value
},
stop_gradient=True)
return out
......@@ -118,6 +118,25 @@ class TestOneHotOp_default_dtype_attr(OpTest):
self.check_output()
class TestOneHotOp_out_of_range(OpTest):
def setUp(self):
self.op_type = 'one_hot'
depth = 10
x_lod = [[4, 1, 3, 3]]
x = [np.random.choice([-1, depth]) for i in range(sum(x_lod[0]))]
x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1])
out = np.zeros(shape=(np.product(x.shape[:-1]),
depth)).astype('float32')
self.inputs = {'X': (x, x_lod)}
self.attrs = {'depth': depth, 'allow_out_of_range': True}
self.outputs = {'Out': (out, x_lod)}
def test_check_output(self):
self.check_output()
class TestOneHotOp_exception(OpTest):
def setUp(self):
self.op_type = 'one_hot'
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import math
from op_test import OpTest
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.framework as framework
from paddle.fluid.framework import Program, program_guard
def common_setup(self, index_num, nshards, shard_id, ignore_value):
self.op_type = 'shard_index'
x_lod = [[i for i in range(10)]]
N = sum(x_lod[0])
x = [np.random.randint(0, index_num - 1) for i in range(N)]
x = np.array(x).astype('int32').reshape([N, 1])
shard_size = index_num // nshards
out = np.zeros(shape=x.shape).astype('int32')
for i in range(N):
if x[i] // shard_size == shard_id:
out[i] = x[i] % shard_size
else:
out[i] = ignore_value
self.inputs = {'X': (x, x_lod)}
self.attrs = {
'index_num': index_num,
'nshards': nshards,
'shard_id': shard_id,
'ignore_value': ignore_value
}
self.outputs = {'Out': (out, x_lod)}
class TestShardIndexShardId0Op(OpTest):
def setUp(self):
common_setup(self, 20, 2, 0, -1)
def test_check_output(self):
self.check_output()
class TestShardIndexShardId1Op(OpTest):
def setUp(self):
common_setup(self, 20, 2, 1, -1)
def test_check_output(self):
self.check_output()
class TestShardIndexIgnoreValueOp(OpTest):
def setUp(self):
common_setup(self, 20, 2, 0, -2)
def test_check_output(self):
self.check_output()
class TestShardIndexNotEvenlyDividedOp(OpTest):
def setUp(self):
common_setup(self, 15, 2, 1, -1)
def test_check_output(self):
self.check_output()
if __name__ == '__main__':
unittest.main()
......@@ -134,6 +134,9 @@ class Collective(object):
block = self.startup_program.global_block()
ring_id = -1
for param in block.iter_parameters():
if param.is_distributed:
continue
ring_id = (ring_id + 1) % self.nrings
block.append_op(
type='c_broadcast',
......@@ -219,6 +222,9 @@ class GradAllReduce(Collective):
for i in range(0, len(op_role_var), 2):
param = block.vars[op_role_var[i]]
grad = block.vars[op_role_var[i + 1]]
if param.is_distributed:
continue
if offset == idx:
offset += 1
block._insert_op(
......@@ -273,6 +279,9 @@ class LocalSGD(Collective):
block = self.startup_program.global_block()
for param in block.iter_parameters():
if param.is_distributed:
continue
snapshot = block.create_var(
name=self.snapshot_name(param.name),
shape=param.shape,
......@@ -294,6 +303,9 @@ class LocalSGD(Collective):
for idx, op in reversed(list(enumerate(block.ops))):
if self._is_update_op(op):
param = block.vars[op.input('Param')[0]]
if param.is_distributed:
continue
snapshot = block.create_var(
name=self.snapshot_name(param.name),
shape=param.shape,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册