Add ps ci test cases.

b10d4d6e · ZPaC · 22927dc4 · b10d4d6e · b10d4d6e · b10d4d6e
12 changed file
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_proxy_kernel.cc
@@ -25,9 +25,7 @@ void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) {
  auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
  auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
  auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
-  size_t axis = kShape4dDims - input_shape.size();
-  CPUKernelUtils::ExpandDimsTo4(&input_shape);
-  CPUKernelUtils::ExpandDimsTo4(&output_shape);
+  size_t axis = kShape2dDims - input_shape.size();
  for (auto dim : input_shape) {
    input_dims_ *= dim;
  }
@@ -40,6 +38,8 @@ void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) {
  values.insert(values.end(), input_shape.begin(), input_shape.end());
  values.insert(values.end(), indices_shape.begin(), indices_shape.end());
  values.insert(values.end(), output_shape.begin(), output_shape.end());
+  MS_LOG(INFO) << "Init embedding lookup proxy kernel, input shape:" << input_shape
+               << ", indices_shape:" << indices_shape << ", output_shape:" << output_shape;
  std::vector<int> lens{SizeToInt(input_shape.size()), SizeToInt(indices_shape.size()), SizeToInt(output_shape.size())};
  const char *env_role = getenv(mindspore::parallel::ps::kEnvRole);
  if (env_role != nullptr && strcmp(env_role, mindspore::parallel::ps::kEnvRoleOfWorker) == 0) {

--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/embedding_look_up_ps_kernel.cc
@@ -25,11 +25,15 @@ namespace mindspore {
 namespace kernel {
 namespace ps {
 using mindspore::parallel::ps::Util;
-constexpr int kAxis = 2;
+constexpr int kAxis = 0;
 void EmbeddingLookUpPSKernel::InitKernel(
  const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
  const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
  input_shape_ = *(shape_vec[0]);
+  first_dim_size_ = input_shape_[0];
+  for (size_t i = 1; i < input_shape_.size(); ++i) {
+    outer_dim_size_ *= input_shape_[i];
+  }
  auto indices_shape = *(shape_vec[1]);
  indices_lens_ = 1;
  for (auto shape : indices_shape) {
@@ -49,7 +53,6 @@ void EmbeddingLookUpPSKernel::InitKernel(
  size_t output_size =
    std::accumulate(output_shape.begin(), output_shape.end(), sizeof(float), std::multiplies<size_t>());
  output_size_list_.emplace_back(output_size);
-  CPUKernelUtils::ExpandDimsTo4(&input_shape_);
 }

 void EmbeddingLookUpPSKernel::ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {

--- a/mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.cc
+++ b/mindspore/ccsrc/frontend/parallel/ps/optimizer_info_builder.cc
@@ -77,7 +77,7 @@ OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
                                                       size_t worker_num) {
  AddressPtr weight_addr = std::make_shared<kernel::Address>();
  weight_addr->addr = weight->data();
-  weight_addr->size = weight->size();
+  weight_addr->size = weight->size() * sizeof(float);
  AddressPtr m = std::make_shared<kernel::Address>();
  m->addr = new float[weight->size()];
  m->size = weight->size() * sizeof(float);
@@ -156,7 +156,7 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
                                                       size_t worker_num) {
  AddressPtr weight_addr = std::make_shared<kernel::Address>();
  weight_addr->addr = weight->data();
-  weight_addr->size = weight->size();
+  weight_addr->size = weight->size() * sizeof(float);
  AddressPtr accum = std::make_shared<kernel::Address>();
  accum->addr = new float[weight->size()];
  accum->size = weight->size() * sizeof(float);
@@ -166,7 +166,7 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
  }
  AddressPtr linear = std::make_shared<kernel::Address>();
  linear->addr = new float[weight->size()];
-  auto ret = memset_s(linear->addr, weight->size() * sizeof(float), 0x00, weight->size() * sizeof(float));
+  int ret = memset_s(linear->addr, weight->size() * sizeof(float), 0x00, weight->size() * sizeof(float));
  if (ret != 0) {
    MS_LOG(EXCEPTION) << "memset_s error, errorno(" << ret << ")";
  }
@@ -176,9 +176,9 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
  size_t total_grad_size = std::accumulate((*grad_shape).begin(), (*grad_shape).end(), 1, std::multiplies<size_t>());
  AddressPtr grad = std::make_shared<kernel::Address>();
  grad->addr = new float[total_grad_size * worker_num];
-  auto ret1 = memcpy_s(grad->addr, lens[0] * sizeof(float), values.data(), lens[0] * sizeof(float));
-  if (ret1 != 0) {
-    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret1 << ")";
+  ret = memcpy_s(grad->addr, lens[0] * sizeof(float), values.data(), lens[0] * sizeof(float));
+  if (ret != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  }
  grad->size = lens[0] * sizeof(float);

@@ -187,10 +187,10 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
    std::accumulate((*indices_shape).begin(), (*indices_shape).end(), 1, std::multiplies<size_t>());
  AddressPtr indices = std::make_shared<kernel::Address>();
  indices->addr = new float[total_indice_size * worker_num];
-  auto ret2 = memcpy_s(indices->addr, lens[1] * sizeof(float), reinterpret_cast<float *>(values.data()) + lens[0],
-                       lens[1] * sizeof(float));
-  if (ret2 != 0) {
-    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")";
+  ret = memcpy_s(indices->addr, lens[1] * sizeof(float), reinterpret_cast<float *>(values.data()) + lens[0],
+                 lens[1] * sizeof(float));
+  if (ret != 0) {
+    MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
  }
  indices->size = lens[1] * sizeof(int);


--- a/model_zoo/official/recommend/wide_and_deep/script/run_parameter_server_train_cluster.sh
+++ b/model_zoo/official/recommend/wide_and_deep/script/run_parameter_server_train_cluster.sh
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+execute_path=$(pwd)
+script_self=$(readlink -f "$0")
+self_path=$(dirname "${script_self}")
+
+#bash run_parameter_server_train_cluster.sh RANK_SIZE EPOCHS DATASET RANK_TABLE_FILE 
+#                                            LOCAL_WORKER_NUM LOCAL_SERVER_NUM SERVER_NUM 
+#                                            SCHED_HOST SCHED_PORT ROLE
+export RANK_SIZE=$1
+export EPOCH_SIZE=$2
+export DATASET=$3
+export RANK_TABLE_FILE=$4
+
+export MS_COMM_TYPE=zmq
+export MS_SCHED_NUM=1
+export MS_WORKER_NUM=$RANK_SIZE
+export LOCAL_WORKER_NUM=$5
+export LOCAL_SERVER_NUM=$6
+export MS_SERVER_NUM=$7
+export MS_SCHED_HOST=$8
+export MS_SCHED_PORT=$9
+export MS_ROLE=${10}
+echo  "=====Role is $MS_ROLE======"
+
+
+if [ "$MS_ROLE" == "MS_SCHED" ];then
+for((i=0;i<1;i++));
+do
+  rm -rf ${execute_path}/sched_$i/
+  mkdir ${execute_path}/sched_$i/
+  cd ${execute_path}/sched_$i/ || exit
+  export RANK_ID=$i
+  export DEVICE_ID=$i
+  python -s ${self_path}/../train_and_eval_parameter_server.py --data_path=$DATASET --epochs=$EPOCH_SIZE --parameter_server=1 >sched_$i.log 2>&1 &
+done
+fi
+
+if [ "$MS_ROLE" == "MS_PSERVER" ];then
+for((i=0;i<$LOCAL_SERVER_NUM;i++));
+do
+  rm -rf ${execute_path}/server_$i/
+  mkdir ${execute_path}/server_$i/
+  cd ${execute_path}/server_$i/ || exit
+  export RANK_ID=$i
+  export DEVICE_ID=$i
+  python -s ${self_path}/../train_and_eval_parameter_server.py --data_path=$DATASET --epochs=$EPOCH_SIZE --parameter_server=1 >server_$i.log 2>&1 &
+done
+fi
+
+if [ "$MS_ROLE" == "MS_WORKER" ];then
+for((i=0;i<$LOCAL_WORKER_NUM;i++));
+do
+  rm -rf ${execute_path}/worker_$i/
+  mkdir ${execute_path}/worker_$i/
+  cd ${execute_path}/worker_$i/ || exit
+  export RANK_ID=$i
+  export DEVICE_ID=$i
+  python -s ${self_path}/../train_and_eval_parameter_server.py --data_path=$DATASET --epochs=$EPOCH_SIZE --parameter_server=1 >worker_$i.log 2>&1 &
+done
+fi
--- a/tests/st/ps/cmp_sparse_embedding/shell_run_test.sh
+++ b/tests/st/ps/cmp_sparse_embedding/shell_run_test.sh
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+execute_path=$(pwd)
+self_path=$(dirname "${script_self}")
+export MS_COMM_TYPE=zmq
+export MS_SCHED_NUM=1
+DEVICE_TARGET=$1
+export MS_WORKER_NUM=$2
+export MS_SERVER_NUM=$3
+export MS_SCHED_HOST=$4
+export MS_SCHED_PORT=$5
+
+export MS_ROLE=MS_SCHED
+for((i=0;i<1;i++));
+do
+  rm -rf ${execute_path}/sched_$i/
+  mkdir ${execute_path}/sched_$i/
+  cd ${execute_path}/sched_$i/ || exit
+  python ${self_path}/../test_cmp_sparse_embedding.py &
+done
+
+export MS_ROLE=MS_PSERVER
+for((i=0;i<$MS_SERVER_NUM;i++));
+do
+  rm -rf ${execute_path}/server_$i/
+  mkdir ${execute_path}/server_$i/
+  cd ${execute_path}/server_$i/ || exit
+  python ${self_path}/../test_cmp_sparse_embedding.py &
+done
+
+export MS_ROLE=MS_WORKER
+for((i=0;i<$MS_WORKER_NUM;i++));
+do
+  rm -rf ${execute_path}/worker_$i/
+  mkdir ${execute_path}/worker_$i/
+  cd ${execute_path}/worker_$i/ || exit
+  python ${self_path}/../test_cmp_sparse_embedding.py &
+done
+
+wait $!
+exit $?
--- a/tests/st/ps/cmp_sparse_embedding/test_cmp_sparse_embedding.py
+++ b/tests/st/ps/cmp_sparse_embedding/test_cmp_sparse_embedding.py
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import os
+import argparse
+import numpy as np
+
+import mindspore.context as context
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.common import dtype as mstype
+from mindspore.nn import TrainOneStepCell, WithLossCell
+from mindspore.nn.optim import Adam
+from mindspore.ops import operations as P
+from mindspore.common.initializer import TruncatedNormal, initializer
+from mindspore import Parameter
+
+parser = argparse.ArgumentParser(description="test_sparse_embedding")
+parser.add_argument("--device_target", type=str, default="Ascend")
+args, _ = parser.parse_known_args()
+device_target = args.device_target
+context.set_context(
+    mode=context.GRAPH_MODE, device_target=device_target, enable_sparse=True
+)
+
+
+def fc_with_initialize(input_channels, out_channels):
+    """weight initial for fc layer"""
+    weight = weight_variable()
+    bias = weight_variable()
+    return nn.Dense(input_channels, out_channels, weight, bias)
+
+
+def weight_variable():
+    """weight initial"""
+    return TruncatedNormal(0.02)
+
+
+class LeNet5(nn.Cell):
+    def __init__(self, num_class=10):
+        super(LeNet5, self).__init__()
+        self.cast = P.Cast()
+        self.flatten = nn.Flatten()
+        self.embedding_table = Parameter(
+            initializer("normal", (16, 4), mstype.float32), name="embedding_table"
+        )
+        self.embedding = nn.EmbeddingLookup()
+        self.relu = nn.ReLU()
+        self.fc = fc_with_initialize(12, num_class)
+
+    def construct(self, x):
+        x = self.cast(x, mstype.int32)
+        x = self.embedding(self.embedding_table, x)
+        x = self.flatten(x)
+        x = self.fc(x)
+        return x
+
+
+def do_sparse_embedding(ps=False):
+    epoch = 10
+    net = LeNet5(10)
+    if ps:
+        net.embedding_table.set_param_ps()
+
+    optimizer = Adam(filter(lambda x: x.requires_grad, net.get_parameters()))
+    optimizer.sparse_opt.add_prim_attr("primitive_target", "CPU")
+    criterion = nn.SoftmaxCrossEntropyWithLogits(
+        is_grad=False, sparse=True, reduction="mean"
+    )
+    net_with_criterion = WithLossCell(net, criterion)
+    train_network = TrainOneStepCell(net_with_criterion, optimizer)
+    train_network.set_train()
+    losses = []
+    for _ in range(epoch):
+        data = Tensor(np.random.randint(0, 15, (32, 3), np.int32))
+        label = Tensor(np.random.randint(0, 9, (32), np.int32))
+        loss = train_network(data, label).asnumpy()
+        losses.append(loss)
+    print(losses)
+    return losses
+
+
+envs = os.environ
+if __name__ == "__main__":
+    np.random.seed(0)
+    ps_loss = do_sparse_embedding(True)
+
+    if envs.get("MS_ROLE") == "MS_WORKER":
+        envs["MS_ROLE"] = ""
+        np.random.seed(0)
+        no_ps_loss = do_sparse_embedding()
+        envs["MS_ROLE"] = "MS_WORKER"
+
+    assert np.allclose(ps_loss, no_ps_loss, rtol=1.0e-6, atol=1.0e-6)
--- a/tests/st/ps/cmp_sparse_embedding/test_entry_cmp_sparse_embedding.py
+++ b/tests/st/ps/cmp_sparse_embedding/test_entry_cmp_sparse_embedding.py
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import os
+import pytest
+
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_cmp_sparse_embedding():
+    return_code = os.system("bash shell_run_test.sh Ascend 1 1 127.0.0.1 8081")
+    assert return_code == 0
--- a/tests/st/ps/full_ps/shell_run_test.sh
+++ b/tests/st/ps/full_ps/shell_run_test.sh
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+execute_path=$(pwd)
+self_path=$(dirname "${script_self}")
+export MS_COMM_TYPE=zmq
+export MS_SCHED_NUM=1
+DEVICE_TARGET=$1
+DATASET_PATH=$2
+export MS_WORKER_NUM=$3
+export MS_SERVER_NUM=$4
+export MS_SCHED_HOST=$5
+export MS_SCHED_PORT=$6
+
+export MS_ROLE=MS_SCHED
+for((i=0;i<1;i++));
+do
+  rm -rf ${execute_path}/sched_$i/
+  mkdir ${execute_path}/sched_$i/
+  cd ${execute_path}/sched_$i/ || exit
+  python ${self_path}/../test_full_ps_lenet.py --device_target=$DEVICE_TARGET --dataset_path=$DATASET_PATH &
+done
+
+export MS_ROLE=MS_PSERVER
+for((i=0;i<$MS_SERVER_NUM;i++));
+do
+  rm -rf ${execute_path}/server_$i/
+  mkdir ${execute_path}/server_$i/
+  cd ${execute_path}/server_$i/ || exit
+  python ${self_path}/../test_full_ps_lenet.py --device_target=$DEVICE_TARGET --dataset_path=$DATASET_PATH &
+done
+
+export MS_ROLE=MS_WORKER
+for((i=0;i<$MS_WORKER_NUM;i++));
+do
+  rm -rf ${execute_path}/worker_$i/
+  mkdir ${execute_path}/worker_$i/
+  cd ${execute_path}/worker_$i/ || exit
+  python ${self_path}/../test_full_ps_lenet.py --device_target=$DEVICE_TARGET --dataset_path=$DATASET_PATH &
+done
+
+wait $!
+exit $?
--- a/tests/st/ps/full_ps/test_entry_full_ps_lenet.py
+++ b/tests/st/ps/full_ps/test_entry_full_ps_lenet.py
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import os
+import pytest
+
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_full_ps_ascend_lenet():
+    return_code = os.system(
+        "bash shell_run_test.sh Ascend /home/workspace/mindspore_dataset/mnist 1 1 127.0.0.1 8082"
+    )
+    assert return_code == 0
--- a/tests/st/ps/full_ps/test_run.py
+++ b/tests/st/ps/full_ps/test_run.py
@@ -17,14 +17,16 @@ import os
 # @pytest.mark.level0
 # @pytest.mark.platform_arm_ascend_training
 # @pytest.mark.platform_x86_ascend_training
-# @pytest.mark.env_onecard
-def test_full_ps_ascend_lenet():
-    return_code = os.system("bash run_full_ps_lenet.sh Ascend 1 1 127.0.0.1 8088")
+# @pytest.mark.env_single
+def test_multi_worker_full_ps_ascend_lenet():
+    return_code = os.system("bash shell_run_test.sh Ascend 8 1 127.0.0.1 8088")
    assert return_code == 0

+
 # @pytest.mark.level0
-# @pytest.mark.platform_x86_gpu_training
+# @pytest.mark.platform_arm_ascend_training
+# @pytest.mark.platform_x86_ascend_training
 # @pytest.mark.env_onecard
-def test_full_ps_gpu_lenet():
-    return_code = os.system("bash run_full_ps_lenet.sh GPU 1 1 127.0.0.1 8088")
+def test_full_ps_ascend_lenet():
+    return_code = os.system("bash shell_run_test.sh Ascend 1 1 127.0.0.1 8088")
    assert return_code == 0
--- a/tests/st/ps/full_ps/run_full_ps_lenet.sh
+++ b/tests/st/ps/full_ps/run_full_ps_lenet.sh
@@ -32,7 +32,7 @@ do
  cd ${execute_path}/sched_$i/ || exit
  export RANK_ID=$i
  export DEVICE_ID=$i
-  python -s ${self_path}/../test_full_ps_lenet.py --device_target=$DEVICE_TARGET &
+  python ${self_path}/../test_multi_worker_full_ps_lenet.py --device_target=$DEVICE_TARGET &
 done

 export MS_ROLE=MS_PSERVER
@@ -43,7 +43,7 @@ do
  cd ${execute_path}/server_$i/ || exit
  export RANK_ID=$i
  export DEVICE_ID=$i
-  python -s ${self_path}/../test_full_ps_lenet.py --device_target=$DEVICE_TARGET &
+  python ${self_path}/../test_multi_worker_full_ps_lenet.py --device_target=$DEVICE_TARGET &
 done

 export MS_ROLE=MS_WORKER
@@ -54,7 +54,7 @@ do
  cd ${execute_path}/worker_$i/ || exit
  export RANK_ID=$i
  export DEVICE_ID=$i
-  python -s ${self_path}/../test_full_ps_lenet.py --device_target=$DEVICE_TARGET &
+  python ${self_path}/../test_multi_worker_full_ps_lenet.py --device_target=$DEVICE_TARGET &
 done

 wait $!

--- a/tests/st/ps/multi_worker_full_ps/test_multi_worker_full_ps_lenet.py
+++ b/tests/st/ps/multi_worker_full_ps/test_multi_worker_full_ps_lenet.py
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import argparse
+import numpy as np
+
+import mindspore.context as context
+import mindspore.nn as nn
+from mindspore.common.initializer import TruncatedNormal
+from mindspore import Tensor
+from mindspore.nn import TrainOneStepCell, WithLossCell
+
+parser = argparse.ArgumentParser(description="test_ps_lenet")
+parser.add_argument("--device_target", type=str, default="Ascend")
+args, _ = parser.parse_known_args()
+device_target = args.device_target
+context.set_context(mode=context.GRAPH_MODE, device_target=device_target)
+
+
+def conv(in_channels, out_channels, kernel_size, stride=1, padding=0):
+    """weight initial for conv layer"""
+    weight = weight_variable()
+    return nn.Conv2d(
+        in_channels,
+        out_channels,
+        kernel_size=kernel_size,
+        stride=stride,
+        padding=padding,
+        weight_init=weight,
+        has_bias=False,
+        pad_mode="valid",
+    )
+
+
+def fc_with_initialize(input_channels, out_channels):
+    """weight initial for fc layer"""
+    weight = weight_variable()
+    bias = weight_variable()
+    return nn.Dense(input_channels, out_channels, weight, bias)
+
+
+def weight_variable():
+    """weight initial"""
+    return TruncatedNormal(0.02)
+
+
+class LeNet5(nn.Cell):
+    def __init__(self, num_class=10, channel=3):
+        super(LeNet5, self).__init__()
+        self.num_class = num_class
+        self.conv1 = conv(channel, 6, 5)
+        self.conv2 = conv(6, 16, 5)
+        self.fc1 = fc_with_initialize(16 * 5 * 5, 120)
+        self.fc2 = fc_with_initialize(120, 84)
+        self.fc3 = fc_with_initialize(84, self.num_class)
+        self.relu = nn.ReLU()
+        self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.flatten = nn.Flatten()
+
+    def construct(self, x):
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.max_pool2d(x)
+        x = self.conv2(x)
+        x = self.relu(x)
+        x = self.max_pool2d(x)
+        x = self.flatten(x)
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        x = self.relu(x)
+        x = self.fc3(x)
+        return x
+
+
+if __name__ == "__main__":
+    epoch = 5
+    np.random.seed(0)
+    network = LeNet5(10)
+    network.set_param_ps()
+    criterion = nn.SoftmaxCrossEntropyWithLogits(
+        is_grad=False, sparse=True, reduction="mean"
+    )
+    net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9)
+
+    net_with_criterion = WithLossCell(network, criterion)
+    train_network = TrainOneStepCell(net_with_criterion, net_opt)
+    train_network.set_train()
+    losses = []
+    for _ in range(epoch):
+        data = Tensor(np.random.rand(32, 3, 32, 32).astype(np.float32))
+        label = Tensor(np.random.randint(0, 9, (32)).astype(np.int32))
+        loss = train_network(data, label).asnumpy()
+        losses.append(loss)
+    print(losses)