提交 12ff0be5 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!5716 Unify float to int cast and get initial accum for ps ftrl.

Merge pull request !5716 from ZPaC/master-unify-float-to-int-cast
......@@ -48,6 +48,10 @@ void SparseApplyFtrlPSKernel::InitKernel(
if (grad_shape[0] != indices_size_) {
MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices";
}
init_accum_ = AnfAlgo::GetNodeAttr<float>(cnode, "init_accum");
if (init_accum_ < 0) {
MS_LOG(EXCEPTION) << "init_accum should be a non-negative scalar";
}
lr_ = AnfAlgo::GetNodeAttr<float>(cnode, "lr");
if (lr_ <= 0) {
MS_LOG(EXCEPTION) << "lr should be a positive scalar";
......
......@@ -28,7 +28,7 @@ using mindspore::kernel::SparseApplyFtrlCPUKernel;
class SparseApplyFtrlPSKernel : public SparseApplyFtrlCPUKernel, public PServerKernel {
public:
SparseApplyFtrlPSKernel(size_t rank_id, size_t pserver_num, size_t worker_num)
: PServerKernel(rank_id, pserver_num, worker_num) {}
: PServerKernel(rank_id, pserver_num, worker_num), init_accum_(0.1) {}
~SparseApplyFtrlPSKernel() override = default;
void InitKernel(const CNodePtr &cnode,
......@@ -41,9 +41,11 @@ class SparseApplyFtrlPSKernel : public SparseApplyFtrlCPUKernel, public PServerK
const std::vector<size_t> &input_sizes() const override;
const std::vector<size_t> &output_sizes() const override;
const std::vector<size_t> &workspace_sizes() const override;
const float init_accum() const { return init_accum_; }
protected:
void ReInit(const std::vector<AddressPtr> &) override;
float init_accum_;
};
} // namespace ps
} // namespace kernel
......
......@@ -100,16 +100,11 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
for (size_t i = 0; i < indices_index; i++) {
indice_offset += lengths[i];
}
float *incr_indice_data = values.data() + indice_offset;
int *incr_indice_data = reinterpret_cast<int *>(values.data()) + indice_offset;
size_t incr_indice_size = lengths[indices_index];
size_t incr_indice_data_size = incr_indice_size * sizeof(int);
std::vector<int> converted_indices(incr_indice_size);
for (size_t i = 0; i < incr_indice_size; i++) {
converted_indices[i] = static_cast<int>(incr_indice_data[i]);
}
auto ret2 = memcpy_s(accum_indices_data + indices_offset_, incr_indice_data_size, converted_indices.data(),
incr_indice_data_size);
auto ret2 =
memcpy_s(accum_indices_data + indices_offset_, incr_indice_data_size, incr_indice_data, incr_indice_data_size);
if (ret2 != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")";
}
......
......@@ -18,14 +18,16 @@
#include <vector>
#include <memory>
#include <functional>
#include "backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h"
namespace mindspore {
namespace parallel {
namespace ps {
using mindspore::kernel::ps::SparseApplyFtrlPSKernel;
OptimizerInfo *OptimizerInfoBuilder::Build(const std::shared_ptr<PServerKernel> &pserver_kernel,
const WeightPtr &weight, const Keys &keys, const Values &values,
const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num) {
OptimizerInfo *optim_info = BuildInputs(weight, keys, values, lens, inputs_shape, worker_num);
OptimizerInfo *optim_info = BuildInputs(weight, keys, values, lens, inputs_shape, worker_num, pserver_kernel);
std::vector<size_t> ws_sizes = pserver_kernel->workspace_sizes();
BuildWorkspaces(optim_info, ws_sizes, worker_num);
BuildOutputs(optim_info, worker_num);
......@@ -45,7 +47,7 @@ void OptimizerInfoBuilder::BuildWorkspaces(OptimizerInfo *info, const std::vecto
OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values,
const Lengths &lens, const InputsShapePtr &inputs_shape,
size_t worker_num) {
size_t worker_num, const std::shared_ptr<PServerKernel> &) {
AddressPtr weight_addr = std::make_shared<kernel::Address>();
weight_addr->addr = weight->data();
weight_addr->size = weight->size() * sizeof(float);
......@@ -74,7 +76,7 @@ OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, co
OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values,
const Lengths &lens, const InputsShapePtr &inputs_shape,
size_t worker_num) {
size_t worker_num, const std::shared_ptr<PServerKernel> &) {
AddressPtr weight_addr = std::make_shared<kernel::Address>();
weight_addr->addr = weight->data();
weight_addr->size = weight->size() * sizeof(float);
......@@ -140,13 +142,9 @@ OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
std::accumulate((*indices_shape).begin(), (*indices_shape).end(), sizeof(int), std::multiplies<size_t>());
AddressPtr indices = std::make_shared<kernel::Address>();
indices->addr = new int[total_indice_size * worker_num];
std::vector<int> converted_indices(lens[7]);
size_t indices_data_size = lens[7] * sizeof(int);
float *indices_data = reinterpret_cast<float *>(epsilon->addr) + lens[5] + lens[6];
for (int i = 0; i < lens[7]; i++) {
converted_indices[i] = static_cast<int>(indices_data[i]);
}
ret = memcpy_s(indices->addr, indices_data_size, converted_indices.data(), indices_data_size);
int *indices_data = reinterpret_cast<int *>(epsilon->addr) + lens[5] + lens[6];
ret = memcpy_s(indices->addr, indices_data_size, indices_data, indices_data_size);
if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
}
......@@ -158,7 +156,8 @@ OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values,
const Lengths &lens, const InputsShapePtr &inputs_shape,
size_t worker_num) {
size_t worker_num,
const std::shared_ptr<PServerKernel> &pserver_kernel) {
AddressPtr weight_addr = std::make_shared<kernel::Address>();
weight_addr->addr = weight->data();
weight_addr->size = weight->size() * sizeof(float);
......@@ -167,7 +166,7 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
accum->size = weight->size() * sizeof(float);
for (size_t i = 0; i < weight->size(); i++) {
float *tmp = reinterpret_cast<float *>(accum->addr);
tmp[i] = 1.0;
tmp[i] = std::dynamic_pointer_cast<SparseApplyFtrlPSKernel>(pserver_kernel)->init_accum();
}
AddressPtr linear = std::make_shared<kernel::Address>();
linear->addr = new float[weight->size()];
......@@ -192,13 +191,9 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
std::accumulate((*indices_shape).begin(), (*indices_shape).end(), 1, std::multiplies<size_t>());
AddressPtr indices = std::make_shared<kernel::Address>();
indices->addr = new int[total_indice_size * worker_num];
std::vector<int> converted_indices(lens[1]);
size_t indices_data_size = lens[1] * sizeof(int);
float *indices_data = reinterpret_cast<float *>(values.data()) + lens[0];
for (int i = 0; i < lens[1]; i++) {
converted_indices[i] = static_cast<int>(indices_data[i]);
}
ret = memcpy_s(indices->addr, indices_data_size, converted_indices.data(), indices_data_size);
int *indices_data = reinterpret_cast<int *>(values.data()) + lens[0];
ret = memcpy_s(indices->addr, indices_data_size, indices_data, indices_data_size);
if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
}
......
......@@ -38,7 +38,8 @@ class OptimizerInfoBuilder {
size_t worker_num);
virtual OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values,
const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num) = 0;
const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num,
const std::shared_ptr<PServerKernel> &pserver_kernel) = 0;
virtual void BuildWorkspaces(OptimizerInfo *info, const std::vector<size_t> &ws_sizes, size_t worker_num);
virtual void BuildOutputs(OptimizerInfo *info, size_t worker_num) {}
......@@ -47,19 +48,22 @@ class OptimizerInfoBuilder {
class MomentumOptimInfoBuilder : public OptimizerInfoBuilder {
public:
OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens,
const InputsShapePtr &inputs_shape, size_t worker_num) override;
const InputsShapePtr &inputs_shape, size_t worker_num,
const std::shared_ptr<PServerKernel> &pserver_kernel) override;
};
class SparseAdamOptimInfoBuilder : public OptimizerInfoBuilder {
public:
OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens,
const InputsShapePtr &inputs_shpae, size_t worker_num) override;
const InputsShapePtr &inputs_shpae, size_t worker_num,
const std::shared_ptr<PServerKernel> &pserver_kernel) override;
};
class SparseFtrlOptimInfoBuilder : public OptimizerInfoBuilder {
public:
OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens,
const InputsShapePtr &inputs_shpae, size_t worker_num) override;
const InputsShapePtr &inputs_shpae, size_t worker_num,
const std::shared_ptr<PServerKernel> &pserver_kernel) override;
};
} // namespace ps
} // namespace parallel
......
......@@ -571,11 +571,7 @@ void WorkerProxy<T>::BuildSparseValue(const ::ps::SArray<int> &lengths, const si
int indice_offset = grad_offset + lengths[grad_index];
data_size = lengths[indice_index] * sizeof(T);
T *indice_data = reduced_data->data() + indice_offset;
std::vector<T> convert(lengths[indice_index]);
for (int i = 0; i < lengths[indice_index]; i++) {
convert[i] = static_cast<T>(indices[i]);
}
ret = memcpy_s(indice_data, data_size, convert.data(), data_size);
ret = memcpy_s(indice_data, data_size, indices, data_size);
if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
}
......
......@@ -162,6 +162,7 @@ class FTRL(Optimizer):
self.sparse_opt = P.FusedSparseFtrl(learning_rate, l1, l2, lr_power, use_locking=use_locking)
self._ps_pull = P.Pull()
self._ps_push = P.Push("Ftrl", [0, 1, 2])
self._ps_push.add_prim_attr("init_accum", initial_accum)
self._ps_push.add_prim_attr("lr", learning_rate)
self._ps_push.add_prim_attr("l1", l1)
self._ps_push.add_prim_attr("l2", l2)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册