提交 12ff0be5 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!5716 Unify float to int cast and get initial accum for ps ftrl.

Merge pull request !5716 from ZPaC/master-unify-float-to-int-cast
...@@ -48,6 +48,10 @@ void SparseApplyFtrlPSKernel::InitKernel( ...@@ -48,6 +48,10 @@ void SparseApplyFtrlPSKernel::InitKernel(
if (grad_shape[0] != indices_size_) { if (grad_shape[0] != indices_size_) {
MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices"; MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices";
} }
init_accum_ = AnfAlgo::GetNodeAttr<float>(cnode, "init_accum");
if (init_accum_ < 0) {
MS_LOG(EXCEPTION) << "init_accum should be a non-negative scalar";
}
lr_ = AnfAlgo::GetNodeAttr<float>(cnode, "lr"); lr_ = AnfAlgo::GetNodeAttr<float>(cnode, "lr");
if (lr_ <= 0) { if (lr_ <= 0) {
MS_LOG(EXCEPTION) << "lr should be a positive scalar"; MS_LOG(EXCEPTION) << "lr should be a positive scalar";
......
...@@ -28,7 +28,7 @@ using mindspore::kernel::SparseApplyFtrlCPUKernel; ...@@ -28,7 +28,7 @@ using mindspore::kernel::SparseApplyFtrlCPUKernel;
class SparseApplyFtrlPSKernel : public SparseApplyFtrlCPUKernel, public PServerKernel { class SparseApplyFtrlPSKernel : public SparseApplyFtrlCPUKernel, public PServerKernel {
public: public:
SparseApplyFtrlPSKernel(size_t rank_id, size_t pserver_num, size_t worker_num) SparseApplyFtrlPSKernel(size_t rank_id, size_t pserver_num, size_t worker_num)
: PServerKernel(rank_id, pserver_num, worker_num) {} : PServerKernel(rank_id, pserver_num, worker_num), init_accum_(0.1) {}
~SparseApplyFtrlPSKernel() override = default; ~SparseApplyFtrlPSKernel() override = default;
void InitKernel(const CNodePtr &cnode, void InitKernel(const CNodePtr &cnode,
...@@ -41,9 +41,11 @@ class SparseApplyFtrlPSKernel : public SparseApplyFtrlCPUKernel, public PServerK ...@@ -41,9 +41,11 @@ class SparseApplyFtrlPSKernel : public SparseApplyFtrlCPUKernel, public PServerK
const std::vector<size_t> &input_sizes() const override; const std::vector<size_t> &input_sizes() const override;
const std::vector<size_t> &output_sizes() const override; const std::vector<size_t> &output_sizes() const override;
const std::vector<size_t> &workspace_sizes() const override; const std::vector<size_t> &workspace_sizes() const override;
const float init_accum() const { return init_accum_; }
protected: protected:
void ReInit(const std::vector<AddressPtr> &) override; void ReInit(const std::vector<AddressPtr> &) override;
float init_accum_;
}; };
} // namespace ps } // namespace ps
} // namespace kernel } // namespace kernel
......
...@@ -100,16 +100,11 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) { ...@@ -100,16 +100,11 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
for (size_t i = 0; i < indices_index; i++) { for (size_t i = 0; i < indices_index; i++) {
indice_offset += lengths[i]; indice_offset += lengths[i];
} }
float *incr_indice_data = values.data() + indice_offset; int *incr_indice_data = reinterpret_cast<int *>(values.data()) + indice_offset;
size_t incr_indice_size = lengths[indices_index]; size_t incr_indice_size = lengths[indices_index];
size_t incr_indice_data_size = incr_indice_size * sizeof(int); size_t incr_indice_data_size = incr_indice_size * sizeof(int);
std::vector<int> converted_indices(incr_indice_size); auto ret2 =
for (size_t i = 0; i < incr_indice_size; i++) { memcpy_s(accum_indices_data + indices_offset_, incr_indice_data_size, incr_indice_data, incr_indice_data_size);
converted_indices[i] = static_cast<int>(incr_indice_data[i]);
}
auto ret2 = memcpy_s(accum_indices_data + indices_offset_, incr_indice_data_size, converted_indices.data(),
incr_indice_data_size);
if (ret2 != 0) { if (ret2 != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")"; MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")";
} }
......
...@@ -18,14 +18,16 @@ ...@@ -18,14 +18,16 @@
#include <vector> #include <vector>
#include <memory> #include <memory>
#include <functional> #include <functional>
#include "backend/kernel_compiler/cpu/ps/sparse_apply_ftrl_ps_kernel.h"
namespace mindspore { namespace mindspore {
namespace parallel { namespace parallel {
namespace ps { namespace ps {
using mindspore::kernel::ps::SparseApplyFtrlPSKernel;
OptimizerInfo *OptimizerInfoBuilder::Build(const std::shared_ptr<PServerKernel> &pserver_kernel, OptimizerInfo *OptimizerInfoBuilder::Build(const std::shared_ptr<PServerKernel> &pserver_kernel,
const WeightPtr &weight, const Keys &keys, const Values &values, const WeightPtr &weight, const Keys &keys, const Values &values,
const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num) { const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num) {
OptimizerInfo *optim_info = BuildInputs(weight, keys, values, lens, inputs_shape, worker_num); OptimizerInfo *optim_info = BuildInputs(weight, keys, values, lens, inputs_shape, worker_num, pserver_kernel);
std::vector<size_t> ws_sizes = pserver_kernel->workspace_sizes(); std::vector<size_t> ws_sizes = pserver_kernel->workspace_sizes();
BuildWorkspaces(optim_info, ws_sizes, worker_num); BuildWorkspaces(optim_info, ws_sizes, worker_num);
BuildOutputs(optim_info, worker_num); BuildOutputs(optim_info, worker_num);
...@@ -45,7 +47,7 @@ void OptimizerInfoBuilder::BuildWorkspaces(OptimizerInfo *info, const std::vecto ...@@ -45,7 +47,7 @@ void OptimizerInfoBuilder::BuildWorkspaces(OptimizerInfo *info, const std::vecto
OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values,
const Lengths &lens, const InputsShapePtr &inputs_shape, const Lengths &lens, const InputsShapePtr &inputs_shape,
size_t worker_num) { size_t worker_num, const std::shared_ptr<PServerKernel> &) {
AddressPtr weight_addr = std::make_shared<kernel::Address>(); AddressPtr weight_addr = std::make_shared<kernel::Address>();
weight_addr->addr = weight->data(); weight_addr->addr = weight->data();
weight_addr->size = weight->size() * sizeof(float); weight_addr->size = weight->size() * sizeof(float);
...@@ -74,7 +76,7 @@ OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, co ...@@ -74,7 +76,7 @@ OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, co
OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values,
const Lengths &lens, const InputsShapePtr &inputs_shape, const Lengths &lens, const InputsShapePtr &inputs_shape,
size_t worker_num) { size_t worker_num, const std::shared_ptr<PServerKernel> &) {
AddressPtr weight_addr = std::make_shared<kernel::Address>(); AddressPtr weight_addr = std::make_shared<kernel::Address>();
weight_addr->addr = weight->data(); weight_addr->addr = weight->data();
weight_addr->size = weight->size() * sizeof(float); weight_addr->size = weight->size() * sizeof(float);
...@@ -140,13 +142,9 @@ OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight, ...@@ -140,13 +142,9 @@ OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
std::accumulate((*indices_shape).begin(), (*indices_shape).end(), sizeof(int), std::multiplies<size_t>()); std::accumulate((*indices_shape).begin(), (*indices_shape).end(), sizeof(int), std::multiplies<size_t>());
AddressPtr indices = std::make_shared<kernel::Address>(); AddressPtr indices = std::make_shared<kernel::Address>();
indices->addr = new int[total_indice_size * worker_num]; indices->addr = new int[total_indice_size * worker_num];
std::vector<int> converted_indices(lens[7]);
size_t indices_data_size = lens[7] * sizeof(int); size_t indices_data_size = lens[7] * sizeof(int);
float *indices_data = reinterpret_cast<float *>(epsilon->addr) + lens[5] + lens[6]; int *indices_data = reinterpret_cast<int *>(epsilon->addr) + lens[5] + lens[6];
for (int i = 0; i < lens[7]; i++) { ret = memcpy_s(indices->addr, indices_data_size, indices_data, indices_data_size);
converted_indices[i] = static_cast<int>(indices_data[i]);
}
ret = memcpy_s(indices->addr, indices_data_size, converted_indices.data(), indices_data_size);
if (ret != 0) { if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")"; MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
} }
...@@ -158,7 +156,8 @@ OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight, ...@@ -158,7 +156,8 @@ OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values,
const Lengths &lens, const InputsShapePtr &inputs_shape, const Lengths &lens, const InputsShapePtr &inputs_shape,
size_t worker_num) { size_t worker_num,
const std::shared_ptr<PServerKernel> &pserver_kernel) {
AddressPtr weight_addr = std::make_shared<kernel::Address>(); AddressPtr weight_addr = std::make_shared<kernel::Address>();
weight_addr->addr = weight->data(); weight_addr->addr = weight->data();
weight_addr->size = weight->size() * sizeof(float); weight_addr->size = weight->size() * sizeof(float);
...@@ -167,7 +166,7 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight, ...@@ -167,7 +166,7 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
accum->size = weight->size() * sizeof(float); accum->size = weight->size() * sizeof(float);
for (size_t i = 0; i < weight->size(); i++) { for (size_t i = 0; i < weight->size(); i++) {
float *tmp = reinterpret_cast<float *>(accum->addr); float *tmp = reinterpret_cast<float *>(accum->addr);
tmp[i] = 1.0; tmp[i] = std::dynamic_pointer_cast<SparseApplyFtrlPSKernel>(pserver_kernel)->init_accum();
} }
AddressPtr linear = std::make_shared<kernel::Address>(); AddressPtr linear = std::make_shared<kernel::Address>();
linear->addr = new float[weight->size()]; linear->addr = new float[weight->size()];
...@@ -192,13 +191,9 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight, ...@@ -192,13 +191,9 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
std::accumulate((*indices_shape).begin(), (*indices_shape).end(), 1, std::multiplies<size_t>()); std::accumulate((*indices_shape).begin(), (*indices_shape).end(), 1, std::multiplies<size_t>());
AddressPtr indices = std::make_shared<kernel::Address>(); AddressPtr indices = std::make_shared<kernel::Address>();
indices->addr = new int[total_indice_size * worker_num]; indices->addr = new int[total_indice_size * worker_num];
std::vector<int> converted_indices(lens[1]);
size_t indices_data_size = lens[1] * sizeof(int); size_t indices_data_size = lens[1] * sizeof(int);
float *indices_data = reinterpret_cast<float *>(values.data()) + lens[0]; int *indices_data = reinterpret_cast<int *>(values.data()) + lens[0];
for (int i = 0; i < lens[1]; i++) { ret = memcpy_s(indices->addr, indices_data_size, indices_data, indices_data_size);
converted_indices[i] = static_cast<int>(indices_data[i]);
}
ret = memcpy_s(indices->addr, indices_data_size, converted_indices.data(), indices_data_size);
if (ret != 0) { if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")"; MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
} }
......
...@@ -38,7 +38,8 @@ class OptimizerInfoBuilder { ...@@ -38,7 +38,8 @@ class OptimizerInfoBuilder {
size_t worker_num); size_t worker_num);
virtual OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, virtual OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values,
const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num) = 0; const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num,
const std::shared_ptr<PServerKernel> &pserver_kernel) = 0;
virtual void BuildWorkspaces(OptimizerInfo *info, const std::vector<size_t> &ws_sizes, size_t worker_num); virtual void BuildWorkspaces(OptimizerInfo *info, const std::vector<size_t> &ws_sizes, size_t worker_num);
virtual void BuildOutputs(OptimizerInfo *info, size_t worker_num) {} virtual void BuildOutputs(OptimizerInfo *info, size_t worker_num) {}
...@@ -47,19 +48,22 @@ class OptimizerInfoBuilder { ...@@ -47,19 +48,22 @@ class OptimizerInfoBuilder {
class MomentumOptimInfoBuilder : public OptimizerInfoBuilder { class MomentumOptimInfoBuilder : public OptimizerInfoBuilder {
public: public:
OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens, OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens,
const InputsShapePtr &inputs_shape, size_t worker_num) override; const InputsShapePtr &inputs_shape, size_t worker_num,
const std::shared_ptr<PServerKernel> &pserver_kernel) override;
}; };
class SparseAdamOptimInfoBuilder : public OptimizerInfoBuilder { class SparseAdamOptimInfoBuilder : public OptimizerInfoBuilder {
public: public:
OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens, OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens,
const InputsShapePtr &inputs_shpae, size_t worker_num) override; const InputsShapePtr &inputs_shpae, size_t worker_num,
const std::shared_ptr<PServerKernel> &pserver_kernel) override;
}; };
class SparseFtrlOptimInfoBuilder : public OptimizerInfoBuilder { class SparseFtrlOptimInfoBuilder : public OptimizerInfoBuilder {
public: public:
OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens, OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens,
const InputsShapePtr &inputs_shpae, size_t worker_num) override; const InputsShapePtr &inputs_shpae, size_t worker_num,
const std::shared_ptr<PServerKernel> &pserver_kernel) override;
}; };
} // namespace ps } // namespace ps
} // namespace parallel } // namespace parallel
......
...@@ -571,11 +571,7 @@ void WorkerProxy<T>::BuildSparseValue(const ::ps::SArray<int> &lengths, const si ...@@ -571,11 +571,7 @@ void WorkerProxy<T>::BuildSparseValue(const ::ps::SArray<int> &lengths, const si
int indice_offset = grad_offset + lengths[grad_index]; int indice_offset = grad_offset + lengths[grad_index];
data_size = lengths[indice_index] * sizeof(T); data_size = lengths[indice_index] * sizeof(T);
T *indice_data = reduced_data->data() + indice_offset; T *indice_data = reduced_data->data() + indice_offset;
std::vector<T> convert(lengths[indice_index]); ret = memcpy_s(indice_data, data_size, indices, data_size);
for (int i = 0; i < lengths[indice_index]; i++) {
convert[i] = static_cast<T>(indices[i]);
}
ret = memcpy_s(indice_data, data_size, convert.data(), data_size);
if (ret != 0) { if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")"; MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
} }
......
...@@ -162,6 +162,7 @@ class FTRL(Optimizer): ...@@ -162,6 +162,7 @@ class FTRL(Optimizer):
self.sparse_opt = P.FusedSparseFtrl(learning_rate, l1, l2, lr_power, use_locking=use_locking) self.sparse_opt = P.FusedSparseFtrl(learning_rate, l1, l2, lr_power, use_locking=use_locking)
self._ps_pull = P.Pull() self._ps_pull = P.Pull()
self._ps_push = P.Push("Ftrl", [0, 1, 2]) self._ps_push = P.Push("Ftrl", [0, 1, 2])
self._ps_push.add_prim_attr("init_accum", initial_accum)
self._ps_push.add_prim_attr("lr", learning_rate) self._ps_push.add_prim_attr("lr", learning_rate)
self._ps_push.add_prim_attr("l1", l1) self._ps_push.add_prim_attr("l1", l1)
self._ps_push.add_prim_attr("l2", l2) self._ps_push.add_prim_attr("l2", l2)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册