提交 3c205729 编写于 作者: P panyifeng

support multi param for tuple grad

上级 bc4b1c24
......@@ -59,7 +59,8 @@ class UndeterminedShapeType {
public:
explicit UndeterminedShapeType(const std::string &env_str) {
// param_name indices_shape indices_type values_shape values_type dense_shape
// export UNDETERMINED_SPARSE_SHAPE_TYPES="w1:2:Int32:2 1 2:Float32:3 1 2"
// export UNDETERMINED_SPARSE_SHAPE_TYPES="sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1
// 2:Float32:3 1 2"
std::vector<string> fields;
string tmp;
std::stringstream input(env_str);
......@@ -115,6 +116,20 @@ std::vector<int> UndeterminedShapeType::GetShape(const std::string &shape_str) {
}
const size_t UndeterminedShapeType::fields_num = 6;
std::unordered_map<std::string, UndeterminedShapeType> g_undetermined_configs;
void InitUndeterminedFromEnv(const std::string &sparse_shape_types) {
if (!g_undetermined_configs.empty()) {
return;
}
std::string tmp;
std::stringstream input(sparse_shape_types);
while (std::getline(input, tmp, ';')) {
auto config = UndeterminedShapeType(tmp);
g_undetermined_configs.insert(std::make_pair(config.param_name(), config));
MS_LOG(DEBUG) << "Undetermined config from env: " << tmp;
}
}
AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
const AbstractBasePtrList &args_spec_list) {
MS_EXCEPTION_IF_NULL(primitive);
......@@ -128,27 +143,33 @@ AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePt
MS_LOG(EXCEPTION) << "EnvGetItem evaluator args[1] should be a SymbolicKeyInstance but: " << key->ToString();
}
if (key->sparse_grad()) {
if (!key->sparse_grad().empty()) {
// Will be fixed once undetermined type ready
auto sparse_shape_types = common::GetEnv("UNDETERMINED_SPARSE_SHAPE_TYPES");
if (sparse_shape_types.empty()) {
sparse_shape_types = "w1:2:Int32:2 1 2:Float32:3 1 2";
sparse_shape_types = "sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1 2:Float32:3 1 2";
}
MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString() << ", Undetermined shape is "
<< sparse_shape_types;
InitUndeterminedFromEnv(sparse_shape_types);
auto shape_types = UndeterminedShapeType(sparse_shape_types);
auto shape_types = g_undetermined_configs.find(key->sparse_grad());
if (shape_types == g_undetermined_configs.end()) {
MS_LOG(EXCEPTION) << "Param " << key->ToString()
<< " has sparse_grad, but shape/type is not configured in env UNDETERMINED_SPARSE_SHAPE_TYPES: "
<< sparse_shape_types;
}
MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString();
AbstractBasePtrList sparse_list;
// indices
auto indices_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types.indices_type());
auto indices = std::make_shared<AbstractTensor>(indices_ele, std::make_shared<Shape>(shape_types.indices_shape()));
auto indices_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.indices_type());
auto indices =
std::make_shared<AbstractTensor>(indices_ele, std::make_shared<Shape>(shape_types->second.indices_shape()));
sparse_list.emplace_back(indices);
// values
auto dout_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types.values_type());
auto dout = std::make_shared<AbstractTensor>(dout_ele, std::make_shared<Shape>(shape_types.values_shape()));
auto dout_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.values_type());
auto dout = std::make_shared<AbstractTensor>(dout_ele, std::make_shared<Shape>(shape_types->second.values_shape()));
sparse_list.emplace_back(dout);
// dense_shape
sparse_list.emplace_back(std::make_shared<AbstractTuple>(shape_types.dense_shape()));
sparse_list.emplace_back(std::make_shared<AbstractTuple>(shape_types->second.dense_shape()));
return std::make_shared<AbstractTuple>(sparse_list);
}
......
......@@ -229,7 +229,8 @@ bool AbstractSpecializeAction(const ResourcePtr &res) {
if (param_node->has_default()) {
auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_node->default_param());
AbstractBasePtr ptr = abstract::FromValue(parse::data_converter::PyDataToValue(param_value->value()), true);
auto sparse_grad = py::cast<bool>(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad"));
auto sparse_grad =
py::cast<std::string>(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad"));
ptr->set_sparse_grad(sparse_grad);
parallel::ParallelParameterContextRestoreInNoTraining(func_graph, param_node, ptr);
......
......@@ -44,7 +44,7 @@ class AbstractBase : public Base {
public:
explicit AbstractBase(const ValuePtr &value = nullptr, const TypePtr &type = kAnyType,
const BaseShapePtr &shape = kNoShape)
: value_(value), type_(type), shape_(shape), sparse_grad_(false) {}
: value_(value), type_(type), shape_(shape), sparse_grad_("") {}
~AbstractBase() override = default;
MS_DECLARE_PARENT(AbstractBase, Base)
......@@ -53,13 +53,13 @@ class AbstractBase : public Base {
virtual bool operator==(const AbstractBase &other) const;
void set_value(const ValuePtr &value) { value_ = value; }
void set_sparse_grad(const bool &sparse_grad) { sparse_grad_ = sparse_grad; }
void set_sparse_grad(const std::string &sparse_grad) { sparse_grad_ = sparse_grad; }
void set_type(const TypePtr &type) { type_ = type; }
void set_shape(const BaseShapePtr &shape) { shape_ = shape; }
void set_value_desc(const std::string &desc) { value_desc_ = desc; }
const std::string &value_desc() const { return value_desc_; }
ValuePtr GetValueTrack() const { return value_; }
bool sparse_grad() const { return sparse_grad_; }
const std::string &sparse_grad() const { return sparse_grad_; }
TypePtr GetTypeTrack() const { return type_; }
BaseShapePtr GetShapeTrack() const { return shape_; }
......@@ -87,7 +87,7 @@ class AbstractBase : public Base {
TypePtr type_;
BaseShapePtr shape_;
std::string value_desc_; // store initial value description for error report
bool sparse_grad_;
std::string sparse_grad_;
};
class AbstractScalar : public AbstractBase {
......
......@@ -51,9 +51,9 @@ class Parameter:
requires_grad (bool): True if the parameter requires gradient. Default: True.
layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in paralle mode,
broadcast and gradients communication would not be applied on parameters. Default: False.
sparse_grad (bool): True if the parameter's gradient is sparse. Default: False.
sparse_grad (str): Set if the parameter's gradient is sparse. Default: empty.
"""
def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False, sparse_grad=False):
def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False, sparse_grad=""):
self.set_parameter_data(default_input)
self.name = name
self.requires_grad = requires_grad
......@@ -181,9 +181,9 @@ class Parameter:
return self._sparse_grad
@sparse_grad.setter
def sparse_grad(self, value=True):
if not isinstance(value, bool):
raise TypeError("`sparse_grad` parameter must be bool type")
def sparse_grad(self, value=""):
if not isinstance(value, str):
raise TypeError("`sparse_grad` parameter must be str type")
self._sparse_grad = value
@property
......
......@@ -156,7 +156,7 @@ class Adam(Optimizer):
To improve parameter groups performance, the customized order of parameters can be supported.
The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
`sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse
`sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse
behavior is currently performed on the CPU, weight decay is not supported.
Args:
......
......@@ -72,7 +72,7 @@ class FTRL(Optimizer):
Note:
The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
`sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse
`sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse
behavior is currently performed on the CPU, weight decay is not supported.
Args:
......
......@@ -92,9 +92,10 @@ class LazyAdam(Optimizer):
applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters.
The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
`sparse_grad` of `Parameter` being set as True. The sparse behavior, to be notice, is not equivalent to the
`sparse_grad` of `Parameter` being set. The sparse behavior, to be notice, is not equivalent to the
original Adam algorithm, as only the current indices parames will be updated. The sparse feature is under
continuous development. The sparse behavior is currently performed on the CPU, weight decay is not supported.
continuous development. The sparse behavior is currently performed on the CPU, weight decay is
not supported.
Args:
params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated,
......
......@@ -241,6 +241,7 @@ class HyperMap(HyperMap_):
return func(*args_list)
return tuple(map(hypermap, *args_list))
class Map(Map_):
"""
Map will apply the set operation on input sequences.
......@@ -271,37 +272,12 @@ class Map(Map_):
Map_.__init__(self)
def __call__(self, *args):
func = args[0]
count = 0
count_max = 1
args_list = args[1:]
if self.ops is not None:
func = self.ops
args_list = args
for item in args_list:
if isinstance(item, (tuple, list)):
count_max = len(item)
break
def get_item(x):
nonlocal count
if isinstance(x, (tuple, list)):
return x[count]
return x
for i in range(count_max):
true_args = tuple(map(get_item, args_list))
func(*true_args)
count = i + 1
return True
def register(self, *type_names):
"""Register a function for the given type string."""
def deco(fn):
self.register_fn(type_names, fn)
return fn
return deco
func = self.ops
args_list = args
if self.ops is None:
func = args[0]
args_list = args[1:]
return tuple(map(func, *args_list))
class _ListAppend(ListAppend_):
......
......@@ -53,7 +53,8 @@ class NetWithSparseGatherV2(nn.Cell):
""" NetWithSparseGatherV2 definition """
def __init__(self):
super(NetWithSparseGatherV2, self).__init__()
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
name="weight1", sparse_grad="sparse_key_w1")
self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
self.axis = 0
self.gather = P.SparseGatherV2()
......
......@@ -154,8 +154,8 @@ def test_AdamWeightDecaySparse():
class NetWithSparseGatherV2(nn.Cell):
def __init__(self):
super(NetWithSparseGatherV2, self).__init__()
self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", sparse_grad=True)
self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2")
self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", sparse_grad="sparse_key_w1")
self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2", sparse_grad="sparse_key_w2")
self.gatherv2 = P.SparseGatherV2()
self.axis = 0
def construct(self, indices):
......
......@@ -41,7 +41,8 @@ class NetWithSparseGatherV2(nn.Cell):
""" NetWithSparseGatherV2 definition """
def __init__(self):
super(NetWithSparseGatherV2, self).__init__()
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
name="weight1", sparse_grad="sparse_key_w1")
self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
self.axis = 0
self.gather = P.SparseGatherV2()
......
......@@ -43,7 +43,8 @@ class NetWithSparseGatherV2(nn.Cell):
""" NetWithSparseGatherV2 definition """
def __init__(self):
super(NetWithSparseGatherV2, self).__init__()
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
name="weight1", sparse_grad="sparse_key_w1")
self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
self.axis = 0
self.gather = P.SparseGatherV2()
......
......@@ -40,7 +40,8 @@ class NetWithSparseGatherV2(nn.Cell):
""" NetWithSparseGatherV2 definition """
def __init__(self):
super(NetWithSparseGatherV2, self).__init__()
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1",
sparse_grad="sparse_key_w1")
self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="weight2")
self.axis = 0
self.gather = P.SparseGatherV2()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册