From 1c4e3e5dd0d32a4216bdad0b1cafcab4ca5ed5bb Mon Sep 17 00:00:00 2001
From: ziyoujiyi <73728031+ziyoujiyi@users.noreply.github.com>
Date: Wed, 2 Mar 2022 16:23:52 +0800
Subject: [PATCH] new fleet_desc builder (#39948)

* delete gloo connect retry

* the_one_ps dirs reconstruct

* .

* .

* create the_one_ps dirs

* create the_one_ps dirs

* create the_one_ps dirs

* create the_one_ps dirs

* create the_one_ps dirs

* create the_one_ps dirs

* the one ps dirs modify

* the one ps dirs modify

* the one ps dirs modify

* the one ps dirs modify

* refactor ps optimize

* refactor ps optimize

* refactor ps optimize

* .

* .

* .

* .

* .

* .

* refactor theoneps

* the_one_ps

* add ps pass unittest

* add ps pass unittest

* ps unitest frame

* ps unittest frame

* ps unittest frame

* ps unittest frame

* ps unittest frame

* ps unittest frame

* ps unittest frame

* ps unittest frame

* ps unittest frame

* ps unittest frame

* ps unittest frame

* ps unittest frame

* ps unittest frame

* ps unittest frame

* ps unittest frame

* ps unittest frame

* ps unittest frame

* add cpu_async_ps_mode test

* add cpu_async_ps_mode test

* add cpu_async_ps_mode test

* ps unittest ready

* ps unittest ready

* solve dist_pass init conflict

* solve import CommContext error

* unittest ok

* implement AllocateFrom

* solve setup.py.in conflict

* solve conflict

* solve conflict

* solve conflict

* .

* .

* cpu-async-ps minimize test ok & gpu minimize test ok

* add heter 2stage unittest

* add heter 2stage unittest

* add heter 2stage unittest

* sync/geo test ok & fix heter_worker program ok

* .

* new fleet desc generator

* new fleet_desc builder

* new fleet_desc builder

* .

* .

* correct ps.proto compile

* .

Co-authored-by: zkh2016 <zhangkaihuo@baidu.com>
---
 paddle/fluid/distributed/ps/ps.proto          |   13 -
 paddle/fluid/framework/CMakeLists.txt         |    5 +-
 paddle/fluid/framework/ps.proto               |  213 ++++
 .../fleet/meta_optimizers/ps_optimizer.py     |    1 +
 python/paddle/distributed/ps/README.md        |    3 -
 python/paddle/distributed/ps/the_one_ps.py    | 1022 ++++++++---------
 .../paddle/distributed/ps/utils/ps_factory.py |    4 +-
 .../ps/utils/ps_program_builder.py            |    5 +-
 python/paddle/distributed/ps/utils/public.py  |    4 +-
 .../fluid/tests/unittests/CMakeLists.txt      |    2 +-
 .../distributed_passes/ps_pass_test_base.py   |   54 +-
 .../test_ps_trainer_pass.py                   |  122 +-
 .../fluid/tests/unittests/ps/CMakeLists.txt   |    4 +-
 .../tests/unittests/ps/ps_dnn_trainer.py      |   86 +-
 .../tests/unittests/ps/test_the_one_ps.py     |   92 +-
 .../fluid/tests/unittests/ps_dnn_model.py     |    1 +
 16 files changed, 961 insertions(+), 670 deletions(-)
 delete mode 100755 paddle/fluid/distributed/ps/ps.proto
 mode change 100644 => 100755 paddle/fluid/framework/CMakeLists.txt
 create mode 100755 paddle/fluid/framework/ps.proto
 delete mode 100755 python/paddle/distributed/ps/README.md
 mode change 100644 => 100755 python/paddle/fluid/tests/unittests/CMakeLists.txt
 mode change 100644 => 100755 python/paddle/fluid/tests/unittests/ps/CMakeLists.txt
 mode change 100644 => 100755 python/paddle/fluid/tests/unittests/ps/test_the_one_ps.py

diff --git a/paddle/fluid/distributed/ps/ps.proto b/paddle/fluid/distributed/ps/ps.proto
deleted file mode 100755
index 2691f63752..0000000000
--- a/paddle/fluid/distributed/ps/ps.proto
+++ /dev/null
@@ -1,13 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
\ No newline at end of file
diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
old mode 100644
new mode 100755
index 14aecb5fd4..02d90b9c6d
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -235,6 +235,7 @@ if(WITH_PYTHON)
   py_proto_compile(trainer_py_proto SRCS trainer_desc.proto data_feed.proto)
   py_proto_compile(distributed_strategy_py_proto SRCS distributed_strategy.proto)
   py_proto_compile(pass_desc_py_proto SRCS pass_desc.proto)
+  py_proto_compile(ps_py_proto SRCS ps.proto)
 #Generate an empty \
     #__init__.py to make framework_py_proto as a valid python module.
   add_custom_target(fleet_proto_init ALL  
@@ -242,12 +243,13 @@ if(WITH_PYTHON)
     COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto/__init__.py 
   )
   add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
-  add_dependencies(framework_py_proto framework_py_proto_init trainer_py_proto distributed_strategy_py_proto fleet_proto_init pass_desc_py_proto)
+  add_dependencies(framework_py_proto framework_py_proto_init trainer_py_proto distributed_strategy_py_proto fleet_proto_init pass_desc_py_proto ps_py_proto)
   if (NOT WIN32)
     add_custom_command(TARGET framework_py_proto POST_BUILD
       COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto
       COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/
       COMMAND cp distributed_strategy_*.py ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto
+      COMMAND cp ps_pb2.py ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto
       COMMENT "Copy generated python proto into directory paddle/fluid/proto."
       WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
     add_custom_target(fleet_executor_proto_init ALL DEPENDS fleet_proto_init fleet_executor_desc_py_proto
@@ -259,6 +261,7 @@ if(WITH_PYTHON)
     add_custom_command(TARGET framework_py_proto POST_BUILD
           COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto
           COMMAND copy /Y *.py ${proto_dstpath}
+      COMMAND copy /Y ps_pb2.py ${fleet_proto_dstpath}
 	  COMMAND copy /Y distributed_strategy_*.py ${fleet_proto_dstpath}
           COMMENT "Copy generated python proto into directory paddle/fluid/proto."
 	  COMMENT "Copy generated python proto into directory paddle/distributed/fleet/proto."
diff --git a/paddle/fluid/framework/ps.proto b/paddle/fluid/framework/ps.proto
new file mode 100755
index 0000000000..0ae87812bc
--- /dev/null
+++ b/paddle/fluid/framework/ps.proto
@@ -0,0 +1,213 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+package paddle.distributed;
+option cc_generic_services = true;
+option cc_enable_arenas = true;
+
+message FsClientParameter {
+  enum FsApiType {
+    HDFS = 0;
+    AFS = 1;
+  }
+  optional FsApiType fs_type = 1 [ default = HDFS ];
+  optional string uri = 2;        // such as afs://xxx.afs.com:9902
+  optional string user = 3;       // user_name to access fs
+  optional string passwd = 4;     // password
+  optional int32 buffer_size = 5; // buffer for read/write
+  optional string hadoop_bin = 51;
+  optional string afs_conf = 101;
+}
+
+message PSParameter {
+  optional string worker_class = 1;
+  optional string server_class = 2;
+  optional string instance_class = 3;
+  optional string init_gflags = 4 [ default = "" ];
+  optional WorkerParameter worker_param = 101;
+  optional ServerParameter server_param = 102;
+  repeated DownpourTrainerParameter trainer_param = 301;
+  optional FsClientParameter fs_client_param = 501;
+}
+
+message WorkerParameter {
+  optional DownpourWorkerParameter downpour_worker_param = 1;
+}
+
+message DownpourWorkerParameter {
+  repeated TableParameter downpour_table_param = 1;
+}
+
+message DownpourServerParameter {
+  repeated TableParameter downpour_table_param = 1;
+  optional ServerServiceParameter service_param = 2;
+}
+
+message ServerParameter {
+  optional DownpourServerParameter downpour_server_param = 1;
+}
+
+message DownpourTrainerParameter {
+  repeated DenseTableParameter dense_table = 1;
+  repeated SparseTableParameter sparse_table = 2;
+  optional int32 push_sparse_per_batch = 3;
+  optional int32 push_dense_per_batch = 4;
+  repeated string skip_op = 5;
+  repeated ProgramConfig program_config = 6;
+}
+
+message DenseTableParameter {
+  optional int32 table_id = 1;
+  repeated string dense_variable_name = 2;
+  repeated string dense_gradient_variable_name = 3;
+  optional int32 fea_dim = 4;
+}
+
+message SparseTableParameter {
+  optional int32 table_id = 1;
+  optional int32 feature_dim = 2;
+  repeated string slot_key = 3;
+  repeated string slot_value = 4;
+  repeated string slot_gradient = 5;
+}
+
+message ServerServiceParameter {
+  optional string server_class = 1 [ default = "BrpcPsServer" ];
+  optional string client_class = 2 [ default = "BrpcPsClient" ];
+  optional string service_class = 3 [ default = "BrpcPsService" ];
+  optional uint32 start_server_port = 4
+      [ default = 0 ]; // will find a avaliable port from it
+  optional uint32 server_thread_num = 5 [ default = 12 ];
+}
+
+message ProgramConfig {
+  required string program_id = 1;
+  repeated int32 push_sparse_table_id = 2;
+  repeated int32 push_dense_table_id = 3;
+  repeated int32 pull_sparse_table_id = 4;
+  repeated int32 pull_dense_table_id = 5;
+}
+
+enum TableType {
+  PS_SPARSE_TABLE = 0;
+  PS_DENSE_TABLE = 1;
+  PS_OTHER_TABLE = 2;
+}
+
+message TableParameter {
+  optional uint64 table_id = 1;
+  optional string table_class = 2;
+  optional uint64 shard_num = 3 [ default = 1000 ];
+  optional TableAccessorParameter accessor = 4;
+  optional TensorAccessorParameter tensor = 5;
+  optional CommonAccessorParameter common = 6;
+  optional TableType type = 7;
+  optional bool compress_in_save = 8 [ default = false ];
+}
+
+message TableAccessorParameter {
+  optional string accessor_class = 1;
+  optional uint32 fea_dim = 4 [ default = 11 ];
+  optional uint32 embedx_dim = 5 [ default = 8 ];
+  optional uint32 embedx_threshold = 6 [ default = 10 ];
+  optional CtrAccessorParameter ctr_accessor_param = 7;
+  repeated TableAccessorSaveParameter table_accessor_save_param = 8;
+  optional SparseCommonSGDRuleParameter embed_sgd_param = 10;
+  optional SparseCommonSGDRuleParameter embedx_sgd_param = 11;
+}
+
+message CtrAccessorParameter {
+  optional float nonclk_coeff = 1
+      [ default = 0.1 ]; // to calculate show_click_score
+  optional float click_coeff = 2
+      [ default = 1 ]; // to calculate show_click_score
+  optional float base_threshold = 3 [
+    default = 1.5
+  ]; // show_click_score > base_threshold, this feature can be saved
+  optional float delta_threshold = 4
+      [ default =
+            0.25 ]; // delta_score > delta_threshold, this feature can be saved
+  optional float delta_keep_days = 5
+      [ default =
+            16 ]; // unseen_day < delta_keep_days, this feature can be saved
+  optional float show_click_decay_rate = 6 [
+    default = 0.98
+  ]; // show/click will update to show/click * show_click_decay_rate after a day
+  optional float delete_threshold = 7
+      [ default = 0.8 ]; // threshold to shrink a feasign
+  optional float delete_after_unseen_days = 8
+      [ default = 30 ]; // unseen_day > delete_after_unseen_days, this feature
+                        // will be delete in shrink_model
+  optional int32 ssd_unseenday_threshold = 9
+      [ default = 1 ]; // threshold to save ssd
+}
+
+message TensorAccessorParameter {
+  optional string feed_var_name = 1;
+  optional string fetch_var_name = 2;
+  optional int64 startup_program_id = 3;
+  optional int64 main_program_id = 4;
+  optional string tensor_table_class = 6;
+}
+
+message CommonAccessorParameter {
+  optional string name = 1;
+  optional string table_name = 2;
+  repeated string attributes = 3;
+  repeated string params = 4;
+  repeated uint32 dims = 5;
+  repeated string initializers = 6;
+  optional string entry = 7;
+  optional int32 trainer_num = 8;
+  optional bool sync = 9;
+  optional uint32 table_num = 10;
+  optional uint32 table_dim = 11;
+}
+
+message TableAccessorSaveParameter {
+  optional uint32 param = 1;
+  optional string converter = 2;
+  optional string deconverter = 3;
+}
+
+message SparseCommonSGDRuleParameter {
+  optional string name = 1;
+  optional SparseNaiveSGDRuleParameter naive = 2;
+  optional SparseAdagradSGDRuleParameter adagrad = 3;
+  optional SparseAdamSGDParameter adam = 4;
+}
+
+message SparseNaiveSGDRuleParameter { // SparseNaiveSGDRule
+  optional double learning_rate = 1 [ default = 0.05 ];
+  optional double initial_range = 2 [ default = 0.0001 ];
+  repeated float weight_bounds = 3;
+}
+
+message
+    SparseAdagradSGDRuleParameter { // SparseAdaGradSGDRule|StdAdaGradSGDRule
+  optional double learning_rate = 1 [ default = 0.05 ];
+  optional double initial_g2sum = 2 [ default = 3.0 ];
+  optional double initial_range = 3 [ default = 0.0001 ];
+  repeated float weight_bounds = 4;
+}
+
+message SparseAdamSGDParameter { // SparseAdamSGDRule
+  optional double learning_rate = 1 [ default = 0.001 ];
+  optional double initial_range = 2 [ default = 0.0001 ];
+  optional double beta1_decay_rate = 3 [ default = 0.9 ];
+  optional double beta2_decay_rate = 4 [ default = 0.999 ];
+  optional double ada_epsilon = 5 [ default = 1e-08 ];
+  repeated float weight_bounds = 6;
+}
diff --git a/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py
index 100a6882b1..00937dbe7a 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py
@@ -54,6 +54,7 @@ class ParameterServerOptimizer(MetaOptimizerBase):
         attrs['cloned_startup'] = attrs['origin_startup_program'].clone()
 
         attrs['user_defined_strategy'] = self.user_defined_strategy
+        attrs['valid_strategy'] = self.user_defined_strategy
         attrs['trainer'] = TrainerRuntimeConfig(self.user_defined_strategy)
         attrs['ps_mode'] = attrs['trainer'].mode
         logger.info("ps_mode: {}".format(attrs['ps_mode']))
diff --git a/python/paddle/distributed/ps/README.md b/python/paddle/distributed/ps/README.md
deleted file mode 100755
index 8d28031794..0000000000
--- a/python/paddle/distributed/ps/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# 目录说明
-
-* 改完之后，上层目录中 fleet 中相关文件（夹）就可以删除
diff --git a/python/paddle/distributed/ps/the_one_ps.py b/python/paddle/distributed/ps/the_one_ps.py
index 14a68ad916..cc744bc9d9 100755
--- a/python/paddle/distributed/ps/the_one_ps.py
+++ b/python/paddle/distributed/ps/the_one_ps.py
@@ -15,10 +15,11 @@
 import warnings
 
 import os
+from paddle.distributed.fleet.proto import ps_pb2
 import paddle.fluid as fluid
 import paddle.distributed.fleet as fleet
 from paddle.fluid import core
-from .utils.public import *
+from paddle.distributed.ps.utils.public import *
 from paddle.fluid.framework import Program
 from paddle.fluid.compiler import CompiledProgram
 from paddle.fluid.executor import Executor
@@ -29,14 +30,10 @@ from paddle.distributed.fleet.base.private_helper_function import wait_server_re
 from paddle.fluid.communicator import Communicator, HeterClient
 from google.protobuf import text_format
 
-__all__ = []
-
-
-def conv_indent(indent):
-    return "".join([" "] * indent)
-
-
-PSERVER_SAVE_SUFFIX = ".shard"
+__all__ = [
+    'Table', 'SparseTable', 'GeoSparseTable', 'BarrierTable', 'TensorTable',
+    'DenseTable'
+]
 
 
 def get_program_by_id(context, program_id):
@@ -62,129 +59,140 @@ def parse_table_class(varname, program_id, context):
                 return "MemorySparseTable"
 
 
-def get_default_accessor_proto(accessor, varname, program_id, context):
+def check_embedding_dim(accessor_proto, varname, program_id, context):
     main_program, startup_program = get_program_by_id(context, program_id)
     embedding_dim = 0
     for var in main_program.list_vars():
         if var.name == varname:
             embedding_dim = var.shape[1]
+            print('new var: {}, {}, {}'.format(var, embedding_dim,
+                                               accessor_proto.fea_dim))
             break
-
-    if not accessor.HasField("accessor_class"):
-        accessor.accessor_class = "CtrCommonAccessor"
-    if not accessor.HasField("fea_dim"):
-        accessor.fea_dim = embedding_dim + 2
-    if not accessor.HasField("embedx_dim"):
-        accessor.embedx_dim = embedding_dim - 1
-    if not accessor.HasField("embedx_threshold"):
-        accessor.embedx_threshold = 0
-
-    ctr_accessor_param = accessor.ctr_accessor_param
-    if not ctr_accessor_param.HasField("nonclk_coeff"):
-        ctr_accessor_param.nonclk_coeff = 0.1
-    if not ctr_accessor_param.HasField("click_coeff"):
-        ctr_accessor_param.click_coeff = 1.0
-    if not ctr_accessor_param.HasField("base_threshold"):
-        ctr_accessor_param.base_threshold = 0
-    if not ctr_accessor_param.HasField("delta_threshold"):
-        ctr_accessor_param.delta_threshold = 0
-    if not ctr_accessor_param.HasField("delta_keep_days"):
-        ctr_accessor_param.delta_keep_days = 16
-    if not ctr_accessor_param.HasField("show_click_decay_rate"):
-        ctr_accessor_param.show_click_decay_rate = 1
-    if not ctr_accessor_param.HasField("delete_threshold"):
-        ctr_accessor_param.delete_threshold = 0
-    if not ctr_accessor_param.HasField("delete_after_unseen_days"):
-        ctr_accessor_param.delete_after_unseen_days = 30
-    if not ctr_accessor_param.HasField("ssd_unseenday_threshold"):
-        ctr_accessor_param.ssd_unseenday_threshold = 1
-
-    for sgd_param in [accessor.embed_sgd_param, accessor.embedx_sgd_param]:
-        if not sgd_param.HasField("name"):
-            sgd_param.name = "SparseAdaGradSGDRule"
-        if sgd_param.name == "SparseAdaGradSGDRule" or sgd_param.name == "StdAdaGradSGDRule":
-            if not sgd_param.adagrad.HasField("learning_rate"):
-                sgd_param.adagrad.learning_rate = 0.05
-            if not sgd_param.adagrad.HasField("initial_g2sum"):
-                sgd_param.adagrad.initial_g2sum = 3.0
-            if not sgd_param.adagrad.HasField("initial_range"):
-                sgd_param.adagrad.initial_range = 0.0001
-            if len(sgd_param.adagrad.weight_bounds) == 0:
-                sgd_param.adagrad.weight_bounds.extend([-10.0, 10.0])
-        if sgd_param.name == "SparseNaiveSGDRule":
-            if not sgd_param.naive.HasField("learning_rate"):
-                sgd_param.naive.learning_rate = 0.05
-            if not sgd_param.naive.HasField("initial_range"):
-                sgd_param.naive.initial_range = 0.0001
-            if len(sgd_param.naive.weight_bounds) == 0:
-                sgd_param.naive.weight_bounds.extend([-10.0, 10.0])
-        if sgd_param.name == "SparseAdamSGDRule":
-            if not sgd_param.adam.HasField("learning_rate"):
-                sgd_param.adam.learning_rate = 0.001
-            if not sgd_param.adam.HasField("initial_range"):
-                sgd_param.adam.initial_range = 0.0001
-            if not sgd_param.adam.HasField("beta1_decay_rate"):
-                sgd_param.adam.beta1_decay_rate = 0.9
-            if not sgd_param.adam.HasField("beta2_decay_rate"):
-                sgd_param.adam.beta2_decay_rate = 0.999
-            if not sgd_param.adam.HasField("ada_epsilon"):
-                sgd_param.adam.ada_epsilon = 1e-08
-            if len(sgd_param.adam.weight_bounds) == 0:
-                sgd_param.adam.weight_bounds.extend([-10.0, 10.0])
-
-
-def check_embedding_dim(accessor, varname, program_id, context):
-    main_program, startup_program = get_program_by_id(context, program_id)
-    embedding_dim = 0
-    for var in main_program.list_vars():
-        if var.name == varname:
-            embedding_dim = var.shape[1]
-            break
-    fea_dim = accessor.fea_dim
+    fea_dim = accessor_proto.fea_dim
     if fea_dim != embedding_dim + 2:
         raise ValueError(
             "The fea_dim is wrong, it will be sparse_embedding_dim + 2: {}, but got {}".
             format(embedding_dim + 2, fea_dim))
-    embedx_dim = accessor.embedx_dim
+    embedx_dim = accessor_proto.embedx_dim
     if embedx_dim != embedding_dim - 1:
         raise ValueError(
             "The embedx_dim is wrong, it will be sparse_embedding_dim - 1: {}, but got {}".
             format(embedding_dim - 1, embedx_dim))
 
 
+class Service:
+    def __init__(self):
+        pass
+
+    def _set(self, service_proto):
+        service_proto.server_class = "BrpcPsServer"
+        service_proto.client_class = "BrpcPsClient"
+        service_proto.service_class = "BrpcPsService"
+        service_proto.start_server_port = 0
+        service_proto.server_thread_num = 12
+
+
+class GpuService(Service):
+    def __init__(self):
+        super(GpuService).__init__(self)
+
+    def _set(self, service_proto):
+        super(GpuService)._set(service_proto)
+        service_proto.server_class = 'PsLocalServer'
+        service_proto.client_class = 'PsLocalClient'
+
+
 class Accessor:
     def __init__(self):
         self.accessor_class = ""
         self.optimizer = None
-        self.feature_dim = -1
-        self.embedding_dim = -1
-        self.optimizer = None
-
-    def to_string(self, indent):
-        accessor_str = "{}accessor {{{}\n{}}}"
-        attrs = ""
-        attrs += "accessor_class: \"{}\" ".format(self.accessor_class)
-        attrs += "fea_dim: {} ".format(self.feature_dim)
-        attrs += "embedx_dim: {} ".format(self.embedding_dim)
-        attrs += "\n"
-        if self.optimizer is not None:
-            attrs += self.optimizer.to_string(indent)
-        return accessor_str.format(
-            conv_indent(indent), attrs, conv_indent(indent))
+        self.feature_dim = 0
+        self.embedding_dim = 0
 
+    # TableAccessorParameter accessor
+    def _set(self, accessor_proto, varname, program_id, context):
+        main_program, startup_program = get_program_by_id(context, program_id)
+        embedding_dim = 0
+        for var in main_program.list_vars():
+            if var.name == varname:
+                embedding_dim = var.shape[1]
+                break
 
-class CommonAccessor:
+        if not accessor_proto.HasField("accessor_class"):
+            accessor_proto.accessor_class = "CtrCommonAccessor"
+        if not accessor_proto.HasField("fea_dim"):
+            accessor_proto.fea_dim = embedding_dim + 2
+        if not accessor_proto.HasField("embedx_dim"):
+            accessor_proto.embedx_dim = embedding_dim - 1
+        if not accessor_proto.HasField("embedx_threshold"):
+            accessor_proto.embedx_threshold = 0
+
+        ctr_accessor_param = accessor_proto.ctr_accessor_param
+        if not ctr_accessor_param.HasField("nonclk_coeff"):
+            ctr_accessor_param.nonclk_coeff = 0.1
+        if not ctr_accessor_param.HasField("click_coeff"):
+            ctr_accessor_param.click_coeff = 1.0
+        if not ctr_accessor_param.HasField("base_threshold"):
+            ctr_accessor_param.base_threshold = 0
+        if not ctr_accessor_param.HasField("delta_threshold"):
+            ctr_accessor_param.delta_threshold = 0
+        if not ctr_accessor_param.HasField("delta_keep_days"):
+            ctr_accessor_param.delta_keep_days = 16
+        if not ctr_accessor_param.HasField("show_click_decay_rate"):
+            ctr_accessor_param.show_click_decay_rate = 1
+        if not ctr_accessor_param.HasField("delete_threshold"):
+            ctr_accessor_param.delete_threshold = 0
+        if not ctr_accessor_param.HasField("delete_after_unseen_days"):
+            ctr_accessor_param.delete_after_unseen_days = 30
+        if not ctr_accessor_param.HasField("ssd_unseenday_threshold"):
+            ctr_accessor_param.ssd_unseenday_threshold = 1
+
+        for sgd_param in [
+                accessor_proto.embed_sgd_param, accessor_proto.embedx_sgd_param
+        ]:
+            if not sgd_param.HasField("name"):
+                sgd_param.name = "SparseAdaGradSGDRule"
+            if sgd_param.name == "SparseAdaGradSGDRule" or sgd_param.name == "StdAdaGradSGDRule":
+                if not sgd_param.adagrad.HasField("learning_rate"):
+                    sgd_param.adagrad.learning_rate = 0.05
+                if not sgd_param.adagrad.HasField("initial_g2sum"):
+                    sgd_param.adagrad.initial_g2sum = 3.0
+                if not sgd_param.adagrad.HasField("initial_range"):
+                    sgd_param.adagrad.initial_range = 0.0001
+                if len(sgd_param.adagrad.weight_bounds) == 0:
+                    sgd_param.adagrad.weight_bounds.extend([-10.0, 10.0])
+            if sgd_param.name == "SparseNaiveSGDRule":
+                if not sgd_param.naive.HasField("learning_rate"):
+                    sgd_param.naive.learning_rate = 0.05
+                if not sgd_param.naive.HasField("initial_range"):
+                    sgd_param.naive.initial_range = 0.0001
+                if len(sgd_param.naive.weight_bounds) == 0:
+                    sgd_param.naive.weight_bounds.extend([-10.0, 10.0])
+            if sgd_param.name == "SparseAdamSGDRule":
+                if not sgd_param.adam.HasField("learning_rate"):
+                    sgd_param.adam.learning_rate = 0.001
+                if not sgd_param.adam.HasField("initial_range"):
+                    sgd_param.adam.initial_range = 0.0001
+                if not sgd_param.adam.HasField("beta1_decay_rate"):
+                    sgd_param.adam.beta1_decay_rate = 0.9
+                if not sgd_param.adam.HasField("beta2_decay_rate"):
+                    sgd_param.adam.beta2_decay_rate = 0.999
+                if not sgd_param.adam.HasField("ada_epsilon"):
+                    sgd_param.adam.ada_epsilon = 1e-08
+                if len(sgd_param.adam.weight_bounds) == 0:
+                    sgd_param.adam.weight_bounds.extend([-10.0, 10.0])
+
+
+class CommonAccessor(Accessor):
     def __init__(self):
-        self.accessor_class = ""
-        self.table_name = None
-        self.entry = None
+        super(CommonAccessor, self).__init__()
+        self.table_name = ''
+        self.entry = 'none'
         self.attrs = []
         self.params = []
         self.dims = []
         self.trainer_num = 0
-        self.sync = "false"
-        self.table_num = None
-        self.table_dim = None
+        self.sync = False
         self.initializers = []
         self.opt_input_map = {}
         self.opt_attr_map = {}
@@ -422,233 +430,361 @@ class CommonAccessor:
         self.initializers = initializers
         self.attrs = attrs
 
-    def to_string(self, indent):
-        accessor_str = "{}common {{{}\n{}}}"
-        attrs = ""
-        attrs += "name: \"{}\" ".format(self.accessor_class)
-
-        if self.table_name:
-            attrs += "table_name: \"{}\" ".format(self.table_name)
-
-        if self.entry:
-            attrs += "entry: \"{}\" ".format(self.entry)
-        attrs += "trainer_num: {} ".format(self.trainer_num)
-        attrs += "sync: {} ".format(self.sync)
-        if self.table_num:
-            attrs += "table_num: {} ".format(self.table_num)
-        if self.table_dim:
-            attrs += "table_dim: {} ".format(self.table_dim)
-
-        for param in self.params:
-            attrs += "params: \"{}\" ".format(param)
-
-        for dim in self.dims:
-            attrs += "dims: {} ".format(dim)
-
-        for initializer in self.initializers:
-            attrs += "initializers: \"{}\" ".format(initializer)
-
-        attrs += "\n"
-        return accessor_str.format(
-            conv_indent(indent), attrs, conv_indent(indent))
+    # CommonAccessorParameter common
+    def _set(self, proto):
+        proto.name = self.accessor_class
+        proto.table_name = self.table_name
+        proto.params.extend(self.params)
+        proto.dims.extend(self.dims)
+        proto.initializers.extend(self.initializers)
+        proto.entry = self.entry
+        proto.trainer_num = self.trainer_num
+        proto.sync = self.sync
+        proto.table_num = self.table_num
+        proto.table_dim = self.table_dim
 
 
 class Tensor:
-    def __init__(self):
-        self.main_program_id = None
-        self.startup_program_id = None
-        self.feed_var_name = None
-        self.fetch_var_name = None
-        self.tensor_table_class = False
-
-    def to_string(self, indent):
-        program_str = "{}tensor {{{}\n{}}}"
-        attrs = ""
-        attrs += "feed_var_name: \"{}\" ".format(str(self.feed_var_name))
-        attrs += "fetch_var_name: \"{}\" ".format(str(self.fetch_var_name))
-        attrs += "startup_program_id: {} ".format(str(self.startup_program_id))
-        attrs += "main_program_id: {} ".format(str(self.main_program_id))
-        attrs += "tensor_table_class: \"{}\" ".format(
-            str(self.tensor_table_class))
-        attrs += "\n"
-        return program_str.format(
-            conv_indent(indent), attrs, conv_indent(indent))
+    def __init__(self, tesnor_dcit):
+        self.tensor_dict = tesnor_dcit
+
+    def _set(self, tensor_proto):
+        tensor_proto.main_program_id = self.tensor_dict.get("main_program_id",
+                                                            0)
+        tensor_proto.startup_program_id = self.tensor_dict.get(
+            "startup_program_id", 0)
+        tensor_proto.feed_var_name = self.tensor_dict.get("feed_var_name", '')
+        tensor_proto.fetch_var_name = self.tensor_dict.get("fetch_var_name", '')
+        tensor_proto.tensor_table_class = self.tensor_dict.get(
+            "tensor_table_class", '')
 
 
 class Table:
     def __init__(self):
-        self.id = -1
         self.table_class = None
         self.shard_num = -1
         self.type = None
-        self.accessor = None
-        self.common = None
+        self.accessor = Accessor()
+        self.shard_num = 256
+        self.common = CommonAccessor()
         self.tensor = None
-        self.accessor_proto = None
-
-    def to_string(self, indent):
-        # if self.id == 1:
-        #     proto_txt = ''
-        #     with open('./sparse_table.prototxt') as f:
-        #         proto_txt = f.read()
-        #     return proto_txt
-        table_str = "{}downpour_table_param {{{}\n{}}}"
-
-        attrs = ""
-        attrs += "table_id: {} ".format(self.id)
-        attrs += "table_class: \"{}\" ".format(self.table_class)
-        attrs += "shard_num: {} ".format(self.shard_num)
-        attrs += "type: {}".format(self.type)
-        attrs += "\n"
-        indent += 2
-
-        if self.accessor_proto is not None:
-            accessor_str = "{}accessor {{{}\n{}}}"
-            accessor_str = accessor_str.format(
-                conv_indent(indent), self.accessor_proto, conv_indent(indent))
-            attrs += accessor_str + "\n"
-        elif self.accessor is not None:
-            attrs += self.accessor.to_string(indent)
-            attrs += "\n"
-
-        if self.tensor is not None:
-            attrs += self.tensor.to_string(indent)
-            attrs += "\n"
-
-        if self.common is not None:
-            attrs += self.common.to_string(indent)
-            attrs += "\n"
-
-        return table_str.format(conv_indent(indent), attrs, conv_indent(indent))
 
+    def _set(self, table_proto):
+        pass
 
-class Service:
-    def __init__(self):
-        self.server_class = "BrpcPsServer"
-        self.client_class = "BrpcPsClient"
-        self.service_class = "BrpcPsService"
-        self.start_server_port = 0
-        self.server_thread_num = 12
 
-    def to_string(self, indent):
-        service_str = "{}service_param {{{}\n{}}}"
+class BarrierTable(Table):
+    def __init__(self, context, idx):
+        super(BarrierTable, self).__init__()
+        self.type = None
+        self.shard_num = 256
+        self.accessor.accessor_class = 'CommMergeAccessor'
+        self.common.attrs = ""
+        self.common.dims = []
+        self.common.params = []
+        self.is_heter_ps_mode = context['is_heter_ps_mode']
+        self.role_maker = context['role_maker']
+        self.idx = idx
+        self.is_sync = context['is_sync']
+
+    def _set(self, table_proto):
+        table_proto.table_id = self.idx
+        table_proto.table_class = 'BarrierTable'
+        table_proto.shard_num = 256
+        table_proto.type = ps_pb2.PS_OTHER_TABLE
+
+        table_proto.accessor.accessor_class = "CommMergeAccessor"
+        table_proto.accessor.fea_dim = 0
+        table_proto.accessor.embedx_dim = 0
+
+        table_proto.common.name = ""
+        table_proto.common.table_name = "barrier_table"
+        table_proto.common.sync = self.is_sync
+        table_proto.common.entry = 'none'
+
+        trainer_num = get_trainers(self.role_maker)
+        if self.is_heter_ps_mode:
+            trainer_num += len(self.role_maker._get_heter_worker_endpoints())
+        table_proto.common.trainer_num = trainer_num
 
-        attrs = ""
-        attrs += "server_class: \"{}\" ".format(self.server_class)
-        attrs += "client_class: \"{}\" ".format(self.client_class)
-        attrs += "service_class: \"{}\" ".format(self.service_class)
-        attrs += "start_server_port: {} ".format(self.start_server_port)
-        attrs += "server_thread_num: {} ".format(self.server_thread_num)
 
-        return service_str.format(
-            conv_indent(indent), attrs, conv_indent(indent))
+class TensorTable(Table):
+    def __init__(self, idx, tensor_dict, role_maker):
+        super(TensorTable, self).__init__()
+        self.idx = idx
+        self.tensor_dict = tensor_dict
+        self.role_maker = role_maker
 
+    def _set(self, table_proto):
+        table_proto.table_id = self.idx
+        table_proto.type = ps_pb2.PS_OTHER_TABLE
+        table_proto.table_class = self.tensor_dict.get("tensor_table_class", '')
 
-class DownpourServer:
-    def __init__(self):
-        self.service = None
-        self.tables = []
+        table_proto.accessor.accessor_class = "CommMergeAccessor"
 
-    def set_service_param(self, service):
-        self.service = service
+        table_proto.common.table_name = self.tensor_dict.get("feed_var_name",
+                                                             '')
+        table_proto.common.trainer_num = get_trainers(self.role_maker)
 
-    def append_tables(self, table):
-        if not isinstance(table, Table):
-            raise ValueError("only support instance Table")
-        self.tables.append(table)
+        tensor = Tensor(self.tensor_dict)
+        tensor._set(table_proto.tensor)
 
-    def to_string(self, indent):
-        server_str = "{}downpour_server_param {{{}\n{}}}"
 
-        table_strs = ""
-        indent += 2
+class SparseTable(Table):
+    def __init__(self, context, send_ctx):
+        super(SparseTable, self).__init__()
+        self.context = context
+        self.ctx = send_ctx
+        self.type = None
+        self.table_class = 'MemorySparseTable'
+        self.accessor = Accessor()
 
-        table_strs += "\n"
-        table_strs += self.service.to_string(indent)
+    def _set(self, table_proto):
+        ctx = self.ctx
+        if ctx.is_tensor_table() or len(ctx.origin_varnames()) < 1 or (
+                ctx.is_sparse() == False):
+            return
+        table_proto.table_id = ctx.table_id()
+        table_proto.table_class = self.table_class
+        table_proto.type = ps_pb2.PS_SPARSE_TABLE
+        table_proto.shard_num = self.shard_num
+
+        self.common.table_name = self.context['grad_name_to_param_name'][
+            ctx.origin_varnames()[0]]
+
+        print('new table_name: {}'.format(self.common.table_name))
+        all_table_proto = self.context[
+            "user_defined_strategy"].sparse_table_configs
+        usr_table_proto = all_table_proto.add()
+        for proto in all_table_proto:
+            if proto.table_name == self.common.table_name:
+                usr_table_proto = proto
+                break
+        table_proto.table_class = 'MemorySparseTable'
+        warnings.warn("The PS mode must use MemorySparseTable.")
+        if usr_table_proto.HasField("shard_num"):
+            table_proto.shard_num = usr_table_proto.shard_num
+        else:
+            table_proto.shard_num = 1000
+            warnings.warn(
+                "The shard_num of sparse table is not set, use default value 1000."
+            )
 
-        for table in self.tables:
-            table_strs += "\n"
-            table_strs += table.to_string(indent)
-        return server_str.format(
-            conv_indent(indent), table_strs, conv_indent(indent))
+        if usr_table_proto.accessor.ByteSize() == 0:
+            warnings.warn(
+                "The accessor of sparse table is not set, use default value.")
 
+        table_proto.accessor.ParseFromString(
+            usr_table_proto.accessor.SerializeToString())
+        self.accessor._set(table_proto.accessor, self.common.table_name,
+                           ctx.program_id(), self.context)
 
-class Server:
-    def __init__(self):
-        self.servers = []
+        check_embedding_dim(table_proto.accessor, self.common.table_name,
+                            ctx.program_id(), self.context)
 
-    def add_server(self, server):
-        if not isinstance(server, DownpourServer):
-            raise ValueError("only support instance DownpourServer")
-        self.servers.append(server)
+        adam_d2sum = self.context["user_defined_strategy"].adam_d2sum
+        self.common.parse_by_optimizer(ctx, self.context)
+        self.common.parse_entry(self.common.table_name,
+                                ctx.program_id(), self.context)
+        self.common.sync = True if self.context['is_sync'] else False
 
-    def __str__(self):
-        server_str = "server_param {{{}\n}}"
-        indent = 2
-        servers_str = ""
-        for server in self.servers:
-            servers_str += "\n"
-            servers_str += server.to_string(indent)
+        self.common._set(table_proto.common)
 
-        return server_str.format(servers_str)
 
+class GeoSparseTable(SparseTable):
+    def __init__(self, context, send_ctx):
+        super(GeoSparseTable, self).__init__(context, send_ctx)
+        self.table_class = "SparseGeoTable"
+        if self.context['ps_mode'] != DistributedMode.GEO:
+            raise ValueError("not geo sparse table!")
+
+    def _set(self, table_proto):
+        ctx = self.ctx
+        if ctx.is_tensor_table() or len(ctx.origin_varnames()) < 1 or (
+                ctx.is_sparse() == False):
+            return
+        table_proto.table_id = ctx.table_id()
+        table_proto.table_class = self.table_class
+        table_proto.type = ps_pb2.PS_SPARSE_TABLE
+        table_proto.shard_num = self.shard_num
+
+        table_proto.accessor.accessor_class = 'CommMergeAccessor'
+        table_proto.accessor.fea_dim = ctx.sections()[0]
+        table_proto.accessor.embedx_dim = ctx.sections()[1]
+
+        self.common.table_name = self.context['grad_name_to_param_name'][
+            ctx.origin_varnames()[0]]
+        adam_d2sum = self.context["user_defined_strategy"].adam_d2sum
+        self.common.parse_by_optimizer(ctx, self.context)
+        self.common.parse_entry(self.common.table_name,
+                                ctx.program_id(), self.context)
+        self.common.sync = False
+        self.common._set(table_proto.common)
+
+
+class DenseTable(Table):
+    def __init__(self, context, send_ctx):
+        super(DenseTable, self).__init__()
+        self.context = context
+        self.ctx = send_ctx
+        self.accessor = Accessor()
 
-class DownpourWorker:
+    def _set(self, table_proto):
+        ctx = self.ctx
+        if ctx.is_tensor_table() or len(ctx.origin_varnames()) < 1 or (
+                ctx.is_sparse() == True):
+            return
+
+        table_proto.table_id = ctx.table_id()
+
+        table_proto.type = ps_pb2.PS_DENSE_TABLE
+        table_proto.table_class = "CommonDenseTable"
+        table_proto.shard_num = 256
+
+        table_proto.accessor.accessor_class = 'CommMergeAccessor'
+        table_proto.accessor.fea_dim = ctx.sections()[0]
+        table_proto.accessor.embedx_dim = 1
+
+        self.common.table_name = "MergedDense"
+        adam_d2sum = self.context["user_defined_strategy"].adam_d2sum
+        self.common.parse_by_optimizer(ctx, self.context)
+        self.common.parse_entry(self.common.table_name,
+                                ctx.program_id(), self.context)
+        self.common.sync = True if self.context['is_sync'] else False
+
+        self.common._set(table_proto.common)
+
+
+class Server:
     def __init__(self):
-        self.tables = []
+        pass
 
-    def append_tables(self, table):
-        if not isinstance(table, Table):
-            raise ValueError("only support instance Table")
-        self.tables.append(table)
+    def _set(self):
+        pass
 
-    def to_string(self, indent):
-        worker_str = "{}downpour_worker_param {{{}\n{}}}"
-        table_strs = ""
-        indent += 2
-        for table in self.tables:
-            table_strs += "\n"
-            table_strs += table.to_string(indent)
 
-        return worker_str.format(
-            conv_indent(indent), table_strs, conv_indent(indent))
+class DownpourServer(Server):
+    def __init__(self):
+        super(DownpourServer, self).__init__()
+
+    def _set(self):
+        pass
 
 
 class Worker:
     def __init__(self):
-        self.workers = []
+        pass
 
-    def add_worker(self, worker):
-        if not isinstance(worker, DownpourWorker):
-            raise ValueError("only support instance DownpourWorker")
-        self.workers.append(worker)
+    def _set(self):
+        pass
 
-    def __str__(self):
-        worker_str = "worker_param {{{}\n}}"
-        indent = 2
-        workers_str = ""
-        for worker in self.workers:
-            workers_str += "\n"
-            workers_str += worker.to_string(indent)
 
-        return worker_str.format(workers_str)
+class DownpourWorker(Worker):
+    def __init__(self):
+        super(DownpourWorker, self).__init__()
+
+    def _set(self):
+        pass
 
 
 class fsClient:
-    def __init__(self, proto):
-        self.proto = proto
-        self.uri = proto.uri
-        self.user = proto.user
-        self.passwd = proto.passwd
-        self.hadoop_bin = proto.hadoop_bin
-
-    def to_string(self):
-        proto_txt = text_format.MessageToString(self.proto)
-        if proto_txt:
-            fs_str = "fs_client_param {{\n{}}}"
-            return fs_str.format(proto_txt)
+    def __init__(self, fs_client_param):
+        self.fs_client_param = fs_client_param
+
+    def _set(self, proto):
+        if not text_format.MessageToString(self.fs_client_param):
+            return
+        proto.uri = self.fs_client_param.uri
+        proto.user = self.fs_client_param.user
+        proto.passwd = self.fs_client_param.passwd
+        proto.hadoop_bin = self.fs_client_param.hadoop_bin
+
+
+class PsDescBuilder(object):
+    def __init__(self, context):
+        self.context = context
+        self.is_sync = context['is_sync']
+        self.ps_mode = context['ps_mode']
+        self.is_heter_ps_mode = context['is_heter_ps_mode']
+        self.use_ps_gpu = context['use_ps_gpu']
+        self.send_ctx = get_the_one_send_context(
+            self.context,
+            use_origin_program=True,
+            split_dense_table=self.is_heter_ps_mode)
+
+        self.tensor_table_dict = {}  # TODO
+        self._server_sub_program = []
+
+        self.tables = self._get_tables()
+
+        self.service = self._get_service()
+        self.fs_client = self._get_fs_client()
+
+        self.ps_desc = ps_pb2.PSParameter()
+
+    def _get_tensor_tables(self):
+        program_idx = 0
+        if not self.tensor_table_dict:
+            self._server_sub_program.append(Program().desc)
+        tables = []
+        for table_name in self.tensor_table_dict:
+            tables.append(globals()['TensorTable'](len(tables), tensor_dict,
+                                                   self.context['role_maker']))
+            program_idx += 1
+        return tables
+
+    def _get_tables(self):
+        tables = []
+        for idx, (name, ctx) in enumerate(self.send_ctx.items()):
+            print('####### {}\n'.format(ctx.is_sparse()))
+            if ctx.is_sparse():
+                if self.ps_mode == DistributedMode.GEO:
+                    tables.append(globals()['GeoSparseTable'](self.context,
+                                                              ctx))
+                else:
+                    tables.append(globals()['SparseTable'](self.context, ctx))
+            else:
+                tables.append(globals()['DenseTable'](self.context, ctx))
+        self.tensor_tables = self._get_tensor_tables()
+        tables.extend(self.tensor_tables)
+        tables.append(globals()['BarrierTable'](self.context, len(tables)))
+        return tables
+
+    def _get_service(self):
+        if self.use_ps_gpu:
+            return GpuService()
         else:
-            return ""
+            return Service()
+
+    def _get_fs_client(self):
+        return fsClient(self.context["user_defined_strategy"].fs_client_param)
+
+    def build_worker_desc(self):
+        for table in self.tables:
+            table_proto = self.ps_desc.worker_param.downpour_worker_param.downpour_table_param.add(
+            )
+            table._set(table_proto)
+            table_proto = self.ps_desc.server_param.downpour_server_param.downpour_table_param.add(
+            )
+            table._set(table_proto)
+        self.service._set(
+            self.ps_desc.server_param.downpour_server_param.service_param)
+        return text_format.MessageToString(self.ps_desc)
+
+    def build_server_desc(self):
+        for table in self.tables:
+            table_proto = self.ps_desc.server_param.downpour_server_param.downpour_table_param.add(
+            )
+            table._set(table_proto)
+            self.sparse_table_maps = {}
+            if table_proto.type == ps_pb2.PS_SPARSE_TABLE and table_proto.common is not None:
+                self.sparse_table_maps[
+                    table_proto.common.table_name] = table_proto.table_id
+
+        self.service._set(
+            self.ps_desc.server_param.downpour_server_param.service_param)
+        self.fs_client._set(self.ps_desc.fs_client_param)
+        return text_format.MessageToString(self.ps_desc)
 
 
 class TheOnePSRuntime(RuntimeBase):
@@ -665,8 +801,11 @@ class TheOnePSRuntime(RuntimeBase):
         self.role_maker = context["role_maker"]
 
         self.origin_main_program = context["origin_main_program"]
-        self.origin_main_programs = context["origin_main_programs"]
-
+        self.origin_main_programs = context.get("origin_main_programs",
+                                                [self.origin_main_program])
+        self.context["origin_main_programs"] = self.origin_main_programs
+        self.context["origin_startup_programs"] = context.get(
+            'origin_startup_programs', [context['origin_startup_program']])
         self.context[
             'is_heter_ps_mode'] = self.role_maker._is_heter_parameter_server_mode
         self.is_heter_ps_mode = self.context['is_heter_ps_mode']
@@ -675,15 +814,23 @@ class TheOnePSRuntime(RuntimeBase):
         self.context['ps_mode'] = self.context['trainer'].mode
         self.context['use_ps_gpu'] = context['valid_strategy'].a_sync_configs[
             'use_ps_gpu']
-        self.is_sync = True if self.context[
+        self.context['is_sync'] = True if self.context[
             'ps_mode'] == DistributedMode.SYNC else False
         self.context['grad_name_to_param_name'] = {}
         self.context['tensor_table'] = {}
         build_var_distributed(self.context)
 
+        endpoints = get_ps_endpoints(self.role_maker)
+        self.string_hosts = []
+        for idx, ep in enumerate(endpoints):
+            host, port = ep.split(":")
+            pshost = fluid.core.PSHost(host, int(port), idx)
+            self.string_hosts.append(pshost.serialize_to_string())
+
+        self.ps_desc_builder = PsDescBuilder(self.context)
+
     def _init_worker(self):
-        worker = self._get_fleet_proto(is_server=False, is_sync=self.is_sync)
-        server = self._get_fleet_proto(is_server=True, is_sync=self.is_sync)
+        worker_desc = self.ps_desc_builder.build_worker_desc()
 
         if self.context['use_ps_gpu']:
             main_program = self.context['loss'].block.program
@@ -701,23 +848,11 @@ class TheOnePSRuntime(RuntimeBase):
             kwargs["trainer_id"] = self.role_maker._worker_index()
             return kwargs
 
-        proto_txt = str(worker) + "\n" + str(server)
-        with open('proto_txt', 'w') as f:
-            f.write(proto_txt)
-
+        proto_txt = worker_desc + "\n" + server_desc
         debug = bool(int(os.getenv("PSERVER_DEBUG", "0")))
-
         if debug:
             print("worker: \n{}".format(proto_txt))
 
-        endpoints = get_ps_endpoints(self.role_maker)
-
-        string_hosts = []
-        for idx, ep in enumerate(endpoints):
-            host, port = ep.split(":")
-            pshost = fluid.core.PSHost(host, int(port), idx)
-            string_hosts.append(pshost.serialize_to_string())
-
         dense_map = get_the_one_recv_context(
             self.context, split_dense_table=self.is_heter_ps_mode)
         send_ctx = get_the_one_send_context(
@@ -741,7 +876,7 @@ class TheOnePSRuntime(RuntimeBase):
         kwargs["trainer_id"] = self.role_maker._role_id()
         kwargs["trainers"] = self.role_maker._worker_num()
 
-        for table in server.servers[0].tables:
+        for table in server.servers[0].tables:  #TODO
             if table.table_class == "BarrierTable":
                 kwargs["barrier_table_id"] = table.id
                 break
@@ -755,7 +890,8 @@ class TheOnePSRuntime(RuntimeBase):
             trainer_config.mode, kwargs,
             trainer_config.get_communicator_flags())
         self._communicator.init_with_ctx(send_ctx, dense_map, proto_txt,
-                                         string_hosts, fluid.global_scope())
+                                         self.string_hosts,
+                                         fluid.global_scope())
 
         fleet.util.barrier()
         info = self._communicator.get_client_info()
@@ -812,275 +948,16 @@ class TheOnePSRuntime(RuntimeBase):
                                                  previous_trainers,
                                                  self.role_maker._role_id())
 
-    def _push_sparse_param(self,
-                           var_name,
-                           table_id=-1,
-                           scope=fluid.global_scope()):
-        self._communicator.push_sparse_param(var_name, table_id, scope)
-
-    def _get_executor(self):
-        executor = fluid.Executor(fluid.CPUPlace())
-        if self.is_heter_ps_mode:
-            if self.role_maker._is_heter_worker():
-                heter_device_type = self.role_maker._heter_device_type().upper()
-                if heter_device_type not in ["GPU", "XPU", "CPU"]:
-                    raise ValueError("Heter Worker Not Support Device {}".
-                                     format(device_type))
-                if heter_device_type == "GPU":
-                    executor = Executor(
-                        fluid.CUDAPlace(
-                            int(os.getenv("FLAGS_selected_gpus", "0"))))
-                elif heter_device_type == "XPU":
-                    executor = Executor(
-                        fluid.XPUPlace(
-                            int(os.getenv("FLAGS_selected_xpus", "0"))))
-        return executor
-
-    def _get_fleet_proto(self, is_server, is_sync, **kwargs):
-        def _build_merge_accessor(ctx):
-            accessor = Accessor()
-            accessor.accessor_class = "CommMergeAccessor"
-            accessor.optimizer = None
-
-            if ctx.is_sparse():
-                accessor.feature_dim = ctx.sections()[0]
-                accessor.embedding_dim = ctx.sections()[1]
-            else:
-                accessor.feature_dim = ctx.sections()[0]
-                accessor.embedding_dim = 1
-
-            return accessor
-
-        def _build_barrier_table(idx):
-            table = Table()
-            table.id = idx
-            table.type = "PS_OTHER_TABLE"
-            table.table_class = "BarrierTable"
-            table.shard_num = 256
-
-            accessor = Accessor()
-            accessor.accessor_class = "CommMergeAccessor"
-            accessor.optimizer = None
-            accessor.feature_dim = 0
-            accessor.embedding_dim = 0
-            table.accessor = accessor
-
-            common = CommonAccessor()
-            common.table_name = "barrier_table"
-            trainer_num = get_trainers(self.context['role_maker'])
-            if self.is_heter_ps_mode:
-                trainer_num += len(self.role_maker._get_heter_worker_endpoints(
-                ))
-            common.trainer_num = trainer_num
-            common.attrs = ""
-            common.dims = []
-            common.params = []
-            table.common = common
-            return table
-
-        def _build_tensor_table(idx, tensor_dict):
-            table = Table()
-            table.id = idx
-            table.type = "PS_OTHER_TABLE"
-            table.table_class = tensor_dict["tensor_table_class"]
-            table.shard_num = 256
-
-            accessor = Accessor()
-            accessor.accessor_class = "CommMergeAccessor"
-            accessor.optimizer = None
-            accessor.feature_dim = 0
-            accessor.embedding_dim = 0
-            table.accessor = accessor
-
-            common = CommonAccessor()
-            common.table_name = tensor_dict["feed_var_name"]
-            common.trainer_num = get_trainers(self.role_maker)
-            common.attrs = ""
-            common.dims = []
-            common.params = []
-            table.common = common
-
-            tensor = Tensor()
-            tensor.main_program_id = tensor_dict["main_program_id"]
-            tensor.startup_program_id = tensor_dict["startup_program_id"]
-            tensor.feed_var_name = tensor_dict["feed_var_name"]
-            tensor.fetch_var_name = tensor_dict["fetch_var_name"]
-            tensor.tensor_table_class = tensor_dict["tensor_table_class"]
-            table.tensor = tensor
-
-            return table
-
-        def _add_tensor_table(tables):
-            tensor_table_dict = {}
-            program_idx = 0
-            for table_name in tensor_table_dict:
-                if tensor_table_dict[table_name]["startup_program"] != None:
-                    tensor_table_dict[table_name][
-                        "startup_program_id"] = program_idx
-                    self._server_sub_program.append(tensor_table_dict[
-                        table_name]["startup_program"].desc)
-                    program_idx += 1
-                if tensor_table_dict[table_name]["main_program"] != None:
-                    tensor_table_dict[table_name][
-                        "main_program_id"] = program_idx
-                    self._server_sub_program.append(tensor_table_dict[
-                        table_name]["main_program"].desc)
-                    program_idx += 1
-                # Todo: Hard code for lr_decay table apply table id
-                new_table = _build_tensor_table(
-                    len(tables), tensor_table_dict[table_name])
-                tables.append(new_table)
-            return tables
-
-        def _get_tables():
-            send_ctx = get_the_one_send_context(
-                self.context,
-                use_origin_program=True,
-                split_dense_table=self.is_heter_ps_mode)
-
-            tables = []
-            for idx, (name, ctx) in enumerate(send_ctx.items()):
-                print(" wxm python test send_ctx.items-->", idx, (name, ctx))
-                if ctx.is_tensor_table() or len(ctx.origin_varnames()) < 1:
-                    continue
-
-                table = Table()
-                table.id = ctx.table_id()
-                common = CommonAccessor()
-
-                if ctx.is_sparse():
-                    table.type = "PS_SPARSE_TABLE"
-                    table.shard_num = 256
-
-                    common.table_name = self.context['grad_name_to_param_name'][
-                        ctx.origin_varnames()[0]]
-
-                    if self.context['ps_mode'] == DistributedMode.GEO:
-                        table.table_class = "SparseGeoTable"
-                    else:
-                        all_table_proto = self.context[
-                            "user_defined_strategy"].sparse_table_configs
-                        table_proto = all_table_proto.add()
-                        for proto in all_table_proto:
-                            if proto.table_name == common.table_name:
-                                table_proto = proto
-                                break
-                        if table_proto.HasField("table_class"):
-                            table.table_class = table_proto.table_class
-                        else:
-                            table.table_class = parse_table_class(
-                                common.table_name,
-                                ctx.program_id(), self.context)
-                        if table.table_class != 'MemorySparseTable':
-                            table.table_class = 'MemorySparseTable'
-                            warnings.warn(
-                                "The PS mode must use MemorySparseTable.")
-
-                        if table_proto.HasField("shard_num"):
-                            table.shard_num = table_proto.shard_num
-                        else:
-                            table.shard_num = 1000
-                            warnings.warn(
-                                "The shard_num of sparse table is not set, use default value 1000."
-                            )
-
-                        if table_proto.accessor.ByteSize() == 0:
-                            warnings.warn(
-                                "The accessor of sparse table is not set, use default value."
-                            )
-                        get_default_accessor_proto(
-                            table_proto.accessor, common.table_name,
-                            ctx.program_id(), self.context)
-                        check_embedding_dim(table_proto.accessor,
-                                            common.table_name,
-                                            ctx.program_id(), self.context)
-                        table.accessor_proto = text_format.MessageToString(
-                            table_proto.accessor)
-                else:
-                    table.type = "PS_DENSE_TABLE"
-                    table.table_class = "CommonDenseTable"
-                    table.shard_num = 256
-                    common.table_name = "MergedDense"
-
-                adam_d2sum = self.context["user_defined_strategy"].adam_d2sum
-                common.parse_by_optimizer(ctx, self.context)
-
-                if ctx.is_sparse():
-                    common.parse_entry(common.table_name,
-                                       ctx.program_id(), self.context)
-
-                if is_sync:
-                    common.sync = "true"
-                else:
-                    common.sync = "false"
-                table.common = common
-
-                if table.table_class != 'MemorySparseTable':
-                    accessor = _build_merge_accessor(ctx)
-                    table.accessor = accessor
-                tables.append(table)
-
-            tensor_table_dict = {}
-            if len(tensor_table_dict) > 0:
-                tables = _add_tensor_table(tables)
-            else:
-                empty_porgram = Program()
-                self._server_sub_program.append(empty_porgram.desc)
-
-            barrier_table = _build_barrier_table(len(tables))
-            tables.append(barrier_table)
-            return tables
-
-        if is_server:
-            server = Server()
-            downpour_server = DownpourServer()
-
-            service = Service()
-            dist_strategy = self.context["valid_strategy"]
-            use_ps_gpu = dist_strategy.a_sync_configs["use_ps_gpu"]
-            if use_ps_gpu:
-                service.server_class = "PsLocalServer"
-                service.client_class = "PsLocalClient"
-            downpour_server.set_service_param(service)
-
-            tables = _get_tables()
-            downpour_server.tables = tables
-            server.add_server(downpour_server)
-            return server
-        else:
-            worker = Worker()
-            downpour_worker = DownpourWorker()
-
-            tables = _get_tables()
-            downpour_worker.tables = tables
-            worker.add_worker(downpour_worker)
-            return worker
-
     def _init_server(self, dirname=None, var_names=None, **kwargs):
+        server_desc = self.ps_desc_builder.build_server_desc()
         role_id = get_role_id(self.role_maker)
-        endpoints = get_ps_endpoints(self.role_maker)
         trainers = get_trainers(self.role_maker)
         if self.is_heter_ps_mode:
             trainers += len(self.role_maker._get_heter_worker_endpoints())
-        server = self._get_fleet_proto(is_server=True, is_sync=self.is_sync)
-        proto_txt = str(server)
-        fs_client = fsClient(self.context["user_defined_strategy"]
-                             .fs_client_param)
-        proto_txt = proto_txt + "\n" + fs_client.to_string()
-
-        debug = bool(int(os.getenv("PSERVER_DEBUG", "0")))
-        if debug:
-            print("server: \n{}".format(proto_txt))
-
-        string_hosts = []
-        for idx, ep in enumerate(endpoints):
-            host, port = ep.split(":")
-            pshost = fluid.core.PSHost(host, int(port), idx)
-            string_hosts.append(pshost.serialize_to_string())
 
         self._server = fluid.core.DistFleetWrapper()
-        self._server.init_server(proto_txt, string_hosts, role_id, trainers,
-                                 self._server_sub_program)
+        self._server.init_server(server_desc, self.string_hosts, role_id,
+                                 trainers, self._server_sub_program)
 
         dist_varnames = get_sparse_tablenames(self.origin_main_programs, True)
         sparse_varnames = get_sparse_tablenames(self.origin_main_programs,
@@ -1101,10 +978,7 @@ class TheOnePSRuntime(RuntimeBase):
         if dirname is None or not load_varnames:
             return
 
-        sparse_table_maps = {}
-        for table in server.servers[0].tables:
-            if table.type == "PS_SPARSE_TABLE" and table.common is not None:
-                sparse_table_maps[table.common.table_name] = table.id
+        sparse_table_maps = self.ps_desc_builder.sparse_table_maps
 
         dirname = os.path.normpath(dirname)
         pserver_id = self.role_maker._role_id()
@@ -1186,7 +1060,7 @@ class TheOnePSRuntime(RuntimeBase):
         sparses = get_the_one_recv_context(
             self.context,
             is_dense=False,
-            split_dense_table=self.is_heter_ps_mod,
+            split_dense_table=self.is_heter_ps_mode,
             use_origin_program=True)
 
         sparse_varnames = self._save_sparse_params(executor, dirname, sparses,
@@ -1413,7 +1287,7 @@ class TheOnePSRuntime(RuntimeBase):
 
         fleet.util.barrier()
         if self.role_maker._is_first_worker():
-            sparses = sget_the_one_recv_context(
+            sparses = get_the_one_recv_context(
                 self.context,
                 is_dense=False,
                 split_dense_table=self.role_maker.
diff --git a/python/paddle/distributed/ps/utils/ps_factory.py b/python/paddle/distributed/ps/utils/ps_factory.py
index 1a426f3ad6..701ae8be6c 100755
--- a/python/paddle/distributed/ps/utils/ps_factory.py
+++ b/python/paddle/distributed/ps/utils/ps_factory.py
@@ -38,5 +38,7 @@ class PsProgramBuilderFactory(object):
         elif 'is_fl_ps_mode' in attrs and attrs[
                 'is_fl_ps_mode'] == DistributedMode.FL:
             return globals()['FlPsProgramBuilder'](pass_ctx)
-        else:
+        elif attrs['ps_mode'] == DistributedMode.SYNC:
             return globals()['CpuSyncPsProgramBuilder'](pass_ctx)
+        else:
+            return globals()['CpuAsyncPsProgramBuilder'](pass_ctx)
diff --git a/python/paddle/distributed/ps/utils/ps_program_builder.py b/python/paddle/distributed/ps/utils/ps_program_builder.py
index 25e4dc28bd..d737542f32 100755
--- a/python/paddle/distributed/ps/utils/ps_program_builder.py
+++ b/python/paddle/distributed/ps/utils/ps_program_builder.py
@@ -95,11 +95,12 @@ class GeoPsProgramBuilder(PsProgramBuilder):  # 仅 CPU 模式
 
 class CpuSyncPsProgramBuilder(PsProgramBuilder):
     def __init__(self, pass_ctx):
-        logger.info("start building cpu-sync-ps program")
         super(CpuSyncPsProgramBuilder, self).__init__(pass_ctx)
+        if self.ps_mode == DistributedMode.SYNC:
+            logger.info("start building cpu-sync-ps program")
         if self.ps_mode != DistributedMode.SYNC and self.ps_mode != DistributedMode.ASYNC:
             raise ValueError("ps mode: {} not matched {}",
-                             format(self.ps_mode, "CpuSyncPsProgramBuilder"))
+                             format(self.ps_mode, "PsProgramBuilder"))
 
     def _build_trainer_programs(self):
         add_lr_decay_table_pass = new_pass("add_lr_decay_table_pass",
diff --git a/python/paddle/distributed/ps/utils/public.py b/python/paddle/distributed/ps/utils/public.py
index ebec6900e3..ab5bd7da09 100755
--- a/python/paddle/distributed/ps/utils/public.py
+++ b/python/paddle/distributed/ps/utils/public.py
@@ -73,7 +73,9 @@ def logger_config(log_path, logging_name):
     return logger
 
 
-logger = logger_config(log_path='/ps_log', logging_name='ps_log')
+ps_log_root_dir = '/ps_log/'
+logger = logger_config(
+    log_path='/ps_usr_print_log', logging_name='ps_usr_print_log')
 
 
 class DistributedMode:
diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt
old mode 100644
new mode 100755
index 2f6df07547..1443eebf29
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -627,7 +627,7 @@ set_tests_properties(test_norm_nn_grad PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE")
 set_tests_properties(test_nn_grad PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE")
 if(WITH_DISTRIBUTE)
     add_subdirectory(distributed_passes)
-  
+    add_subdirectory(ps)
     add_subdirectory(auto_parallel)
 
     # FIXME(typhoonzero): add these tests back
diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/ps_pass_test_base.py b/python/paddle/fluid/tests/unittests/distributed_passes/ps_pass_test_base.py
index 63dd4b8e21..93a0044a5e 100755
--- a/python/paddle/fluid/tests/unittests/distributed_passes/ps_pass_test_base.py
+++ b/python/paddle/fluid/tests/unittests/distributed_passes/ps_pass_test_base.py
@@ -23,13 +23,24 @@ import unittest
 import numpy as np
 from collections import OrderedDict
 from paddle.distributed.ps.utils.public import logger
-from dist_pass_test_base import prepare_python_path_and_return_module, remove_path_if_exists
+from paddle.fluid.tests.unittests.distributed_passes.dist_pass_test_base import prepare_python_path_and_return_module, remove_path_if_exists
 import paddle.distributed.fleet as fleet
 
 
 class PsPassTestBase(unittest.TestCase):
     def init(self):
-        raise NotImplementedError
+        self.config = {}
+        self.config['ps_mode_config'] = ""
+        self.config['worker_num'] = "1"
+        self.config['server_num'] = "1"
+        self.config['run_minimize'] = "0"
+        self.config['run_single_pass'] = "0"
+        self.config['run_the_one_ps'] = '0'
+        self.config['debug_new_minimize'] = "0"
+        self.config['debug_new_pass'] = "0"
+        self.config['debug_the_one_ps'] = '0'
+        self.config['log_dir'] = ""
+        self.config['applied_pass_name'] = ""
 
     def setUp(self):
         print('Ps setUp...')
@@ -37,7 +48,7 @@ class PsPassTestBase(unittest.TestCase):
     def tearDown(self):
         print('Ps tearDown...')
 
-    def ps_launch(self, config, ps_mode="cpu-ps"):
+    def ps_launch(self, ps_mode="cpu-ps"):
         if ps_mode == "cpu-ps" or ps_mode == 'heter-ps':
             os.environ['WITH_DISTRIBUTE'] = 'ON'
 
@@ -45,23 +56,26 @@ class PsPassTestBase(unittest.TestCase):
                 sys.executable,
                 "-u",
             ] + [
-                "-m", "launch", "--log_dir", config['log_dir'], "--worker_num",
-                config['worker_num'], "--server_num", config['server_num']
+                "-m", "launch", "--log_dir", self.config['log_dir'],
+                "--worker_num", self.config['worker_num'], "--server_num",
+                self.config['server_num']
             ]
             if ps_mode == 'heter-ps':
                 os.environ['FLAGS_START_PORT'] = '12004'
                 cmd += [
-                    '--heter_worker_num', config['heter_worker_num'],
-                    '--heter_devices', config['heter_devices']
+                    '--heter_worker_num', self.config['heter_worker_num'],
+                    '--heter_devices', self.config['heter_devices']
                 ]
 
             cmd += [
-                "../ps/ps_dnn_trainer.py", "-m", config['ps_mode_config'],
-                "--run_minimize", config['run_minimize'], "--run_single_pass",
-                config['run_single_pass'], "--debug_new_pass",
-                config['debug_new_pass'], "--debug_new_minimize",
-                config['debug_new_minimize'], "--applied_pass_name",
-                config['applied_pass_name']
+                "../ps/ps_dnn_trainer.py", "-m", self.config['ps_mode_config'],
+                "--run_minimize", self.config['run_minimize'],
+                "--run_single_pass", self.config['run_single_pass'],
+                "--run_the_one_ps", self.config['run_the_one_ps'],
+                "--debug_new_pass", self.config['debug_new_pass'],
+                "--debug_new_minimize", self.config['debug_new_minimize'],
+                "--applied_pass_name", self.config['applied_pass_name'],
+                "--debug_the_one_ps", self.config['debug_the_one_ps']
             ]
         elif ps_mode == "gpu-ps":
             os.environ['FLAGS_LAUNCH_BARRIER'] = '0'
@@ -80,12 +94,14 @@ class PsPassTestBase(unittest.TestCase):
 
             cmd = [
                 sys.executable, "-u", "../ps/ps_dnn_trainer.py", "-m",
-                config['ps_mode_config'], "--run_minimize",
-                config['run_minimize'], "--run_single_pass",
-                config['run_single_pass'], "--debug_new_pass",
-                config['debug_new_pass'], "--debug_new_minimize",
-                config['debug_new_minimize'], "--applied_pass_name",
-                config['applied_pass_name']
+                self.config['ps_mode_config'], "--run_minimize",
+                self.config['run_minimize'], "--run_single_pass",
+                self.config['run_single_pass'], "--run_the_one_ps",
+                self.config['run_the_one_ps'], "--debug_new_pass",
+                self.config['debug_new_pass'], "--debug_new_minimize",
+                self.config['debug_new_minimize'], "--applied_pass_name",
+                self.config['applied_pass_name'], "--debug_the_one_ps",
+                self.config['debug_the_one_ps']
             ]
 
         cmd = [shlex.quote(c) for c in cmd]
diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_ps_trainer_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_ps_trainer_pass.py
index b186869ee9..fd558ef040 100755
--- a/python/paddle/fluid/tests/unittests/distributed_passes/test_ps_trainer_pass.py
+++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_ps_trainer_pass.py
@@ -21,31 +21,26 @@ import numpy as np
 
 import paddle
 from ps_pass_test_base import *
-from paddle.distributed.ps.utils.public import logger
+from paddle.distributed.ps.utils.public import logger, ps_log_root_dir
 from paddle.fluid.tests.unittests.ps.ps_dnn_trainer import DnnTrainer
 
 
 class TestPsTrainerPass(PsPassTestBase):
-    def init(self):
-        self.config = {}
-        self.config['ps_mode_config'] = ""
-        self.config['worker_num'] = "1"
-        self.config['server_num'] = "1"
-        self.config['run_minimize'] = "0"
-        self.config['run_single_pass'] = "0"
-        self.config['debug_new_minimize'] = "0"
-        self.config['debug_new_pass'] = "0"
-        self.config['log_dir'] = ""
-        self.config['applied_pass_name'] = ""
-
     def setUp(self):
         pass
 
     def tearDown(self):
         pass
 
-    def check(self):
-        pass
+    def check(self, file1, file2):
+        with open(file1, 'r', encoding='utf-8') as f:
+            text1 = f.read()
+        with open(file2, 'r', encoding='utf-8') as f:
+            text2 = f.read()
+        if text1 == text2:
+            return True
+        else:
+            return False
 
     def test_ps_optimizer_minimize_cpu_async(self):
         self.init()
@@ -53,16 +48,21 @@ class TestPsTrainerPass(PsPassTestBase):
         self.config['run_minimize'] = '1'
 
         self.config['debug_new_minimize'] = '0'
-        self.config['log_dir'] = "/async_cpu_log_old_minimize"
+        self.config['log_dir'] = ps_log_root_dir + "async_cpu_log_old_minimize"
         remove_path_if_exists(self.config['log_dir'])
-        self.ps_launch(self.config)
+        self.ps_launch()
 
         self.config['debug_new_minimize'] = '1'
-        self.config['log_dir'] = "/async_cpu_log_new_minimize"
+        self.config['log_dir'] = ps_log_root_dir + "async_cpu_log_new_minimize"
         remove_path_if_exists(self.config['log_dir'])
-        self.ps_launch(self.config)
+        self.ps_launch()
 
-        self.check()
+        file1 = '/ps_log/async_run_minimize_debug:_0_worker_main.prototxt'
+        file2 = '/ps_log/async_run_minimize_debug:_1_worker_main.prototxt'
+        if self.check(file1, file2):
+            logger.info('test_ps_optimizer_minimize_cpu_async passed!')
+        else:
+            logger.error('test_ps_optimizer_minimize_cpu_async failed!')
 
     def test_ps_optimizer_minimize_cpu_sync(self):
         self.init()
@@ -70,16 +70,22 @@ class TestPsTrainerPass(PsPassTestBase):
         self.config['run_minimize'] = '1'
 
         self.config['debug_new_minimize'] = '0'
-        self.config['log_dir'] = "/sync_cpu_log_old_minimize"
+        self.config['log_dir'] = ps_log_root_dir + "sync_cpu_log_old_minimize"
         remove_path_if_exists(self.config['log_dir'])
-        self.ps_launch(self.config)
+        self.ps_launch()
 
         self.config['debug_new_minimize'] = '1'
-        self.config['log_dir'] = "/sync_cpu_log_new_minimize"
+        self.config['log_dir'] = ps_log_root_dir + "sync_cpu_log_new_minimize"
         remove_path_if_exists(self.config['log_dir'])
-        self.ps_launch(self.config)
-
-        self.check()
+        self.ps_launch()
+        '''
+        file1 = '/ps_log/sync_run_minimize_debug:_0_worker_main.prototxt'
+        file2 = '/ps_log/sync_run_minimize_debug:_1_worker_main.prototxt'
+        if self.check(file1, file2):
+            logger.info('test_ps_optimizer_minimize_cpu_sync passed!')
+        else:
+            logger.error('test_ps_optimizer_minimize_cpu_sync failed!')
+        '''
 
     def test_ps_optimizer_minimize_cpu_geo(self):
         self.init()
@@ -87,16 +93,21 @@ class TestPsTrainerPass(PsPassTestBase):
         self.config['run_minimize'] = '1'
 
         self.config['debug_new_minimize'] = '0'
-        self.config['log_dir'] = "/geo_cpu_log_old_minimize"
+        self.config['log_dir'] = ps_log_root_dir + "geo_cpu_log_old_minimize"
         remove_path_if_exists(self.config['log_dir'])
-        self.ps_launch(self.config)
+        self.ps_launch()
 
         self.config['debug_new_minimize'] = '1'
-        self.config['log_dir'] = "/geo_cpu_log_new_minimize"
+        self.config['log_dir'] = ps_log_root_dir + "geo_cpu_log_new_minimize"
         remove_path_if_exists(self.config['log_dir'])
-        self.ps_launch(self.config)
+        self.ps_launch()
 
-        self.check()
+        file1 = '/ps_log/geo_run_minimize_debug:_0_worker_main.prototxt'
+        file2 = '/ps_log/geo_run_minimize_debug:_1_worker_main.prototxt'
+        if self.check(file1, file2):
+            logger.info('test_ps_optimizer_minimize_cpu_geo passed!')
+        else:
+            logger.error('test_ps_optimizer_minimize_cpu_geo failed!')
 
     # heter ps 二阶段
     def test_ps_optimizer_minimize_heter(self):
@@ -110,14 +121,24 @@ class TestPsTrainerPass(PsPassTestBase):
         self.config['ps_mode_config'] = "../ps/heter_ps_config.yaml"
 
         self.config['debug_new_minimize'] = '0'
-        self.config['log_dir'] = "/heter_log_old_minimize"
+        self.config['log_dir'] = ps_log_root_dir + "heter_log_old_minimize"
         remove_path_if_exists(self.config['log_dir'])
-        self.ps_launch(self.config, 'heter-ps')
+        self.ps_launch('heter-ps')
 
         self.config['debug_new_minimize'] = '1'
-        self.config['log_dir'] = "/heter_log_new_minimize"
+        self.config['log_dir'] = ps_log_root_dir + "heter_log_new_minimize"
         remove_path_if_exists(self.config['log_dir'])
-        self.ps_launch(self.config, 'heter-ps')
+        self.ps_launch('heter-ps')
+        '''
+        file1 = '/ps_log/heter_run_minimize_debug:_0_worker_main.prototxt'
+        file2 = '/ps_log/heter_run_minimize_debug:_1_worker_main.prototxt'
+        file3 = '/ps_log/heter_run_minimize_debug:_0_heter_worker_main.prototxt'
+        file4 = '/ps_log/heter_run_minimize_debug:_1_heter_worker_main.prototxt'
+        if self.check(file1, file2) and self.check(file3, file4):
+            logger.info('test_ps_optimizer_minimize_heter passed!')
+        else:
+            logger.error('test_ps_optimizer_minimize_heter failed!')
+        '''
 
     def test_ps_optimizer_minimize_gpu(self):
         self.init()
@@ -125,29 +146,42 @@ class TestPsTrainerPass(PsPassTestBase):
         self.config['ps_mode_config'] = "../ps/gpu_ps_config.yaml"
 
         self.config['debug_new_minimize'] = '0'
-        self.ps_launch(self.config, "gpu-ps")
+        self.ps_launch("gpu-ps")
 
         self.config['debug_new_minimize'] = '1'
-        self.ps_launch(self.config, "gpu-ps")
+        self.ps_launch("gpu-ps")
 
-        self.check()
+        file1 = '/ps_log/gpubox_run_minimize_debug:_0_worker_main.prototxt'
+        file2 = '/ps_log/gpubox_run_minimize_debug:_1_worker_main.prototxt'
+        if self.check(file1, file2):
+            logger.info('test_ps_optimizer_minimize_gpu passed!')
+        else:
+            logger.error('test_ps_optimizer_minimize_gpu failed!')
 
     def test_append_send_ops_pass(self):
         self.init()
         self.config['run_single_pass'] = '1'
+        self.config['ps_mode_config'] = "../ps/cpu_async_ps_config.yaml"
         self.config['applied_pass_name'] = "append_send_ops_pass"
 
         self.config['debug_new_pass'] = '0'
-        self.config['log_dir'] = "/log_old_" + self.config['applied_pass_name']
+        self.config['log_dir'] = ps_log_root_dir + "log_old_" + self.config[
+            'applied_pass_name']
         remove_path_if_exists(self.config['log_dir'])
-        self.ps_launch(self.config, "cpu-ps")
+        self.ps_launch("cpu-ps")
 
         self.config['debug_new_pass'] = '1'
-        self.config['log_dir'] = "/log_new_" + self.config['applied_pass_name']
+        self.config['log_dir'] = ps_log_root_dir + "log_new_" + self.config[
+            'applied_pass_name']
         remove_path_if_exists(self.config['log_dir'])
-        self.ps_launch(self.config, "cpu-ps")
-
-        self.check()
+        self.ps_launch("cpu-ps")
+
+        file1 = '/ps_log/async_append_send_ops_pass_debug:_0_worker_main.prototxt'
+        file2 = '/ps_log/async_append_send_ops_pass_debug:_1_worker_main.prototxt'
+        if self.check(file1, file2):
+            logger.info('test_append_send_ops_pass passed!')
+        else:
+            logger.info('test_append_send_ops_pass failed!')
 
     def test_distributed_ops_pass(self):
         pass
diff --git a/python/paddle/fluid/tests/unittests/ps/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ps/CMakeLists.txt
old mode 100644
new mode 100755
index 3aef3283b8..9af32a8aca
--- a/python/paddle/fluid/tests/unittests/ps/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/ps/CMakeLists.txt
@@ -3,6 +3,6 @@ string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
 
 foreach(TEST_OP ${TEST_OPS})
     py_test_modules(${TEST_OP} MODULES ${TEST_OP})
+    list(APPEND TEST_OPS ${TEST_OP})
+    set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 50)
 endforeach(TEST_OP)
-
-set_tests_properties(test_the_one_ps PROPERTIES TIMEOUT 50)
diff --git a/python/paddle/fluid/tests/unittests/ps/ps_dnn_trainer.py b/python/paddle/fluid/tests/unittests/ps/ps_dnn_trainer.py
index d08c1d41c8..bc87fc255a 100755
--- a/python/paddle/fluid/tests/unittests/ps/ps_dnn_trainer.py
+++ b/python/paddle/fluid/tests/unittests/ps/ps_dnn_trainer.py
@@ -264,12 +264,16 @@ def parse_args():
         '--run_minimize', type=int, default=0, help="test single pass")
     parser.add_argument(
         '--run_single_pass', type=int, default=0, help="test single pass")
+    parser.add_argument(
+        '--run_the_one_ps', type=int, default=0, help="test the_one_ps")
     parser.add_argument(
         '--debug_new_minimize', type=int, default=0, help="test single pass")
     parser.add_argument(
         '--debug_new_pass', type=int, default=0, help="test single pass")
     parser.add_argument(
         '--applied_pass_name', type=str, default="", help="test single pass")
+    parser.add_argument(
+        '--debug_the_one_ps', type=int, default=0, help="test the_one_ps")
 
     args = parser.parse_args()
     args.abs_dir = os.path.dirname(os.path.abspath(args.config_yaml))
@@ -280,9 +284,11 @@ def parse_args():
     config["pure_bf16"] = args.pure_bf16
     config['run_minimize'] = args.run_minimize
     config['run_single_pass'] = args.run_single_pass
+    config['run_the_one_ps'] = args.run_the_one_ps
     config['debug_new_minimize'] = args.debug_new_minimize
     config['debug_new_pass'] = args.debug_new_pass
     config['applied_pass_name'] = args.applied_pass_name
+    config['debug_the_one_ps'] = args.debug_the_one_ps
     yaml_helper.print_yaml(config)
     return config
 
@@ -344,15 +350,15 @@ class DnnTrainer(object):
             fleet_obj.minimize(loss)
 
         if fleet.is_server():
-            _main_file = '/' + sync_mode + '_run_minimize' + '_debug:_' + str(
+            _main_file = ps_log_root_dir + sync_mode + '_run_minimize' + '_debug:_' + str(
                 self.config['debug_new_minimize']) + '_server_main.prototxt'
             debug_program(_main_file, loss.block.program)
         elif fleet.is_worker():
-            _main_file = '/' + sync_mode + '_run_minimize' + '_debug:_' + str(
+            _main_file = ps_log_root_dir + sync_mode + '_run_minimize' + '_debug:_' + str(
                 self.config['debug_new_minimize']) + '_worker_main.prototxt'
             debug_program(_main_file, loss.block.program)
         elif self.role_maker._is_heter_worker():
-            _main_file = '/' + sync_mode + '_run_minimize' + '_debug:_' + str(
+            _main_file = ps_log_root_dir + sync_mode + '_run_minimize' + '_debug:_' + str(
                 self.config[
                     'debug_new_minimize']) + '_heter_worker_main.prototxt'
             debug_program(_main_file, loss.block.program)
@@ -397,16 +403,84 @@ class DnnTrainer(object):
             _main = worker.append_send_ops_pass(_main, compiled_config)
 
         if fleet.is_server():
-            _main_file = '/' + sync_mode + "_" + str(config[
+            _main_file = ps_log_root_dir + sync_mode + "_" + str(config[
                 "applied_pass_name"]) + '_debug:_' + str(self.config[
                     'debug_new_pass']) + '_server_main.prototxt'
             debug_program(_main_file, _main)
         elif fleet.is_worker():
-            _main_file = '/' + sync_mode + "_" + str(config[
+            _main_file = ps_log_root_dir + sync_mode + "_" + str(config[
                 "applied_pass_name"]) + '_debug:_' + str(self.config[
                     'debug_new_pass']) + '_worker_main.prototxt'
             debug_program(_main_file, _main)
 
+    def run_the_one_ps(self):
+        self.init_fleet_with_gloo()
+        self.model = get_model(self.config)
+        self.input_data = self.model.create_feeds()
+        self.metrics = self.model.net(self.input_data)
+        loss = self.model._cost
+        user_defined_strategy = get_user_defined_strategy(self.config)
+        learning_rate = self.config.get(
+            "hyper_parameters.optimizer.learning_rate")
+        sync_mode = self.config.get("runner.sync_mode")
+        inner_optimizer = paddle.optimizer.Adam(learning_rate, lazy_mode=True)
+
+        self.role_maker._generate_role()  # 必要
+        if self.config['debug_the_one_ps'] == 1:
+            logger.info("entering run_the_one_ps -- new")
+
+            from paddle.distributed.fleet.meta_optimizers.ps_optimizer import ParameterServerOptimizer
+            ps_optimizer = ParameterServerOptimizer(inner_optimizer)
+            ps_optimizer._set_basic_info(loss, self.role_maker, inner_optimizer,
+                                         user_defined_strategy)
+            ps_optimizer.minimize_impl(loss)
+
+            from paddle.distributed.ps.the_one_ps import TheOnePSRuntime
+            _runtime_handle = TheOnePSRuntime()  # ps 目录下重构版的 TheOnePSRuntime
+            _runtime_handle._set_basic_info(ps_optimizer.pass_ctx._attrs)
+            if fleet.is_worker():
+                worker_desc = _runtime_handle.ps_desc_builder.build_worker_desc(
+                )
+                with open(ps_log_root_dir + sync_mode + '_' +
+                          'new_worker_ps_desc', 'w') as f:
+                    f.write(worker_desc)
+            if fleet.is_server():
+                server_desc = _runtime_handle.ps_desc_builder.build_server_desc(
+                )
+                with open(ps_log_root_dir + sync_mode + '_' +
+                          'new_server_ps_desc', 'w') as f:
+                    f.write(server_desc)
+
+        else:
+            pass
+        '''          
+            logger.info("entering run_the_one_ps -- old")
+            fleet_obj = fleet.distributed_optimizer(
+                inner_optimizer, user_defined_strategy)  
+            fleet_obj.minimize(loss)  
+            if fleet.is_worker():
+                worker_desc = fleet_obj._runtime_handle._get_fleet_proto(is_server=False, is_sync=False)
+                server_desc = fleet_obj._runtime_handle._get_fleet_proto(is_server=True, is_sync=False)
+                with open(ps_log_root_dir + sync_mode + '_' + 'worker_ps_desc', 'w') as f:
+                    f.write(str(worker_desc) + str(server_desc))
+            if fleet.is_server():
+                server_desc = fleet_obj._runtime_handle._get_fleet_proto(is_server=True, is_sync=False)
+                with open(ps_log_root_dir + sync_mode + '_' + 'server_ps_desc', 'w') as f:
+                    f.write(str(server_desc) + str(fleet_obj._runtime_handle._get_fs_client_desc().to_string()))
+        '''
+        if fleet.is_server():
+            _main_file = ps_log_root_dir + sync_mode + '_run_the_one_ps' + '_debug:_' + str(
+                self.config['debug_the_one_ps']) + '_server_main.prototxt'
+            debug_program(_main_file, loss.block.program)
+        elif fleet.is_worker():
+            _main_file = ps_log_root_dir + sync_mode + '_run_the_one_ps' + '_debug:_' + str(
+                self.config['debug_the_one_ps']) + '_worker_main.prototxt'
+            debug_program(_main_file, loss.block.program)
+        elif self.role_maker._is_heter_worker():
+            _main_file = ps_log_root_dir + sync_mode + '_run_the_one_ps' + '_debug:_' + str(
+                self.config['debug_the_one_ps']) + '_heter_worker_main.prototxt'
+            debug_program(_main_file, loss.block.program)
+
 
 if __name__ == "__main__":
     paddle.enable_static()
@@ -418,3 +492,5 @@ if __name__ == "__main__":
         benchmark_main.run_single_pass()
     elif config['run_minimize'] == 1:
         benchmark_main.run_minimize()
+    elif config['run_the_one_ps'] == 1:
+        benchmark_main.run_the_one_ps()
diff --git a/python/paddle/fluid/tests/unittests/ps/test_the_one_ps.py b/python/paddle/fluid/tests/unittests/ps/test_the_one_ps.py
old mode 100644
new mode 100755
index 78bae0e50c..8dddc6abd4
--- a/python/paddle/fluid/tests/unittests/ps/test_the_one_ps.py
+++ b/python/paddle/fluid/tests/unittests/ps/test_the_one_ps.py
@@ -22,16 +22,100 @@ import numpy as np
 import paddle
 import paddle.fluid as fluid
 
+import paddle
+from paddle.fluid.tests.unittests.distributed_passes.ps_pass_test_base import *
+from paddle.distributed.ps.utils.public import logger, ps_log_root_dir
+from ps_dnn_trainer import DnnTrainer
+from paddle.distributed.fleet.proto import ps_pb2
+from google.protobuf import text_format
+
 
-class TestTheOnePs(unittest.TestCase):
+class TestTheOnePs(PsPassTestBase):
     def setUp(self):
-        print('setUp...')
+        pass
 
     def tearDown(self):
-        print('tearDown...')
+        pass
 
-    def test_main(self):
+    def check(self, file1, file2):
         pass
+        '''
+        f = open(file1, "rb")
+        ps_desc_1 = ps_pb2.PSParameter()
+        text_format.Parse(f.read(), ps_desc_1)
+        f.close()
+
+        f = open(file2, "rb")
+        ps_desc_2 = ps_pb2.PSParameter()
+        text_format.Parse(f.read(), ps_desc_2)
+        f.close()
+        str1 = text_format.MessageToString(ps_desc_1)
+        str2 = text_format.MessageToString(ps_desc_2)
+        #logger.info('### msg10: {}'.format(str1))
+        #logger.info('### msg20: {}'.format(str2))
+        if str1 == str2:
+            return True
+        else:
+            return False
+        '''
+
+    def test_ps_cpu_async(self):
+        self.init()
+        self.config['ps_mode_config'] = "../ps/cpu_async_ps_config.yaml"
+        self.config['run_the_one_ps'] = '1'
+
+        self.config['debug_the_one_ps'] = '0'
+        self.config[
+            'log_dir'] = ps_log_root_dir + "async_cpu_log_old_the_one_ps"
+        remove_path_if_exists(self.config['log_dir'])
+        self.ps_launch()
+
+        self.config['debug_the_one_ps'] = '1'
+        self.config[
+            'log_dir'] = ps_log_root_dir + "async_cpu_log_new_the_one_ps"
+        remove_path_if_exists(self.config['log_dir'])
+        self.ps_launch()
+
+        desc1 = '/ps_desc_baseline/async_worker_ps_desc'
+        desc2 = '/ps_log/async_new_worker_ps_desc'
+        desc3 = '/ps_desc_baseline/async_server_ps_desc'
+        desc4 = '/ps_log/async_new_server_ps_desc'
+        if self.check(desc1, desc2):
+            logger.info('test_ps_cpu_async ps_desc: worker passed!')
+        else:
+            logger.info('test_ps_cpu_async ps_desc: worker failed!')
+        if self.check(desc3, desc4):
+            logger.info('test_ps_cpu_async ps_desc: server passed!')
+        else:
+            logger.info('test_ps_cpu_async ps_desc: server failed!')
+
+    def test_ps_cpu_geo(self):
+        self.init()
+        self.config['ps_mode_config'] = "../ps/cpu_geo_ps_config.yaml"
+        self.config['run_the_one_ps'] = '1'
+
+        self.config['debug_the_one_ps'] = '0'
+        self.config['log_dir'] = ps_log_root_dir + "geo_cpu_log_old_the_one_ps"
+        remove_path_if_exists(self.config['log_dir'])
+        self.ps_launch()
+
+        self.config['debug_the_one_ps'] = '1'
+        self.config['log_dir'] = ps_log_root_dir + "geo_cpu_log_new_the_one_ps"
+        remove_path_if_exists(self.config['log_dir'])
+        self.ps_launch()
+
+        desc1 = '/ps_desc_baseline/geo_worker_ps_desc'
+        desc2 = '/ps_log/geo_new_worker_ps_desc'
+        desc3 = '/ps_desc_baseline/geo_server_ps_desc'
+        desc4 = '/ps_log/geo_new_server_ps_desc'
+        if self.check(desc1, desc2):
+            logger.info('test_ps_cpu_geo ps_desc: worker passed!')
+        else:
+            logger.info('test_ps_cpu_geo ps_desc: worker failed!')
+        if self.check(desc3, desc4):
+            logger.info('test_ps_cpu_geo ps_desc: server passed!')
+        else:
+            logger.info('test_ps_cpu_geo ps_desc: server failed!')
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/ps_dnn_model.py b/python/paddle/fluid/tests/unittests/ps_dnn_model.py
index 0a147334da..8d91e0f467 100755
--- a/python/paddle/fluid/tests/unittests/ps_dnn_model.py
+++ b/python/paddle/fluid/tests/unittests/ps_dnn_model.py
@@ -74,6 +74,7 @@ class DNNLayer(nn.Layer):
             else:
                 emb = self.embedding(s_input)
             emb = paddle.reshape(emb, shape=[-1, self.sparse_feature_dim])
+            # emb.stop_gradient = True
             sparse_embs.append(emb)
 
         y_dnn = paddle.concat(x=sparse_embs + [dense_inputs], axis=1)
-- 
GitLab