dijob-cartpole.yaml 7.1 KB
Newer Older
L
liqingping 已提交
1 2 3
apiVersion: diengine.opendilab.org/v1alpha1
kind: DIJob
metadata:
L
liqingping 已提交
4
  name: cartpole-dqn
L
liqingping 已提交
5 6 7 8 9 10 11 12 13 14 15
spec:
  group: xxx
  priorityClassName: ""
  cleanPodPolicy: "Running"
  volumes:
  - name: cache-volume
    emptyDir:
      medium: Memory
      sizeLimit: 128Mi
  - name: work-dir
    hostPath:
L
liqingping 已提交
16
      path: /data/nfs/ding/cartpole
L
liqingping 已提交
17 18 19 20
  coordinator:
    template:
      spec:
        containers:
L
liqingping 已提交
21
        - name: di-container
L
liqingping 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
          image: diorchestrator/ding:v0.1.0-df39b81c
          imagePullPolicy: Always
          env:
          - name: PYTHONUNBUFFERED
            value: "1"
          resources:
            requests:
              cpu: 3
              memory: "10Gi"
            limits:
              cpu: 3
              memory: "10Gi"
          command: ["/bin/bash", "-c",]
          args:
          - |
L
liqingping 已提交
37
            cat <<EOF > cartpole_dqn_config_k8s.py
L
liqingping 已提交
38 39
            from easydict import EasyDict

L
liqingping 已提交
40
            cartpole_dqn_config = dict(
L
liqingping 已提交
41
                env=dict(
L
liqingping 已提交
42
                    collector_env_num=8,
L
liqingping 已提交
43
                    collector_episode_num=2,
L
liqingping 已提交
44
                    evaluator_env_num=5,
L
liqingping 已提交
45
                    evaluator_episode_num=1,
L
liqingping 已提交
46
                    stop_value=195,
L
liqingping 已提交
47 48 49 50
                ),
                policy=dict(
                    cuda=False,
                    model=dict(
L
liqingping 已提交
51 52 53 54
                        obs_shape=4,
                        action_shape=2,
                        encoder_hidden_size_list=[128, 128, 64],
                        dueling=True,
L
liqingping 已提交
55 56
                    ),
                    nstep=3,
L
liqingping 已提交
57
                    discount_factor=0.97,
L
liqingping 已提交
58 59
                    learn=dict(
                        batch_size=32,
L
liqingping 已提交
60
                        learning_rate=0.001,
L
liqingping 已提交
61 62 63 64 65 66 67 68 69 70 71 72
                        learner=dict(
                            learner_num=1,
                            send_policy_freq=1,
                        ),
                    ),
                    collect=dict(
                        n_sample=16,
                        collector=dict(
                            collector_num=2,
                            update_policy_second=3,
                        ),
                    ),
L
liqingping 已提交
73
                    eval=dict(evaluator=dict(eval_freq=50, )),
L
liqingping 已提交
74 75 76
                    other=dict(
                        eps=dict(
                            type='exp',
L
liqingping 已提交
77 78 79
                            start=0.95,
                            end=0.1,
                            decay=100000,
L
liqingping 已提交
80 81
                        ),
                        replay_buffer=dict(
L
liqingping 已提交
82 83
                            replay_buffer_size=100000,
                            enable_track_used_data=False,
L
liqingping 已提交
84 85
                        ),
                        commander=dict(
L
liqingping 已提交
86
                            # increase collector task space when get rs from server
L
liqingping 已提交
87 88
                            collector_task_space=0,
                            learner_task_space=1,
L
liqingping 已提交
89
                            eval_interval=5,
L
liqingping 已提交
90 91 92 93
                        ),
                    ),
                ),
            )
L
liqingping 已提交
94 95
            cartpole_dqn_config = EasyDict(cartpole_dqn_config)
            main_config = cartpole_dqn_config
L
liqingping 已提交
96

L
liqingping 已提交
97
            cartpole_dqn_create_config = dict(
L
liqingping 已提交
98
                env=dict(
L
liqingping 已提交
99 100
                    type='cartpole',
                    import_names=['dizoo.classic_control.cartpole.envs.cartpole_env'],
L
liqingping 已提交
101
                ),
L
liqingping 已提交
102
                env_manager=dict(type='base'),
L
liqingping 已提交
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
                policy=dict(type='dqn_command'),
                learner=dict(type='base', import_names=['ding.worker.learner.base_learner']),
                collector=dict(
                    type='zergling',
                    import_names=['ding.worker.collector.zergling_collector'],
                ),
                commander=dict(
                    type='solo',
                    import_names=['ding.worker.coordinator.solo_parallel_commander'],
                ),
                comm_learner=dict(
                    type='flask_fs',
                    import_names=['ding.worker.learner.comm.flask_fs_learner'],
                ),
                comm_collector=dict(
                    type='flask_fs',
                    import_names=['ding.worker.collector.comm.flask_fs_collector'],
                ),
            )
L
liqingping 已提交
122 123
            cartpole_dqn_create_config = EasyDict(cartpole_dqn_create_config)
            create_config = cartpole_dqn_create_config
L
liqingping 已提交
124

L
liqingping 已提交
125
            cartpole_dqn_system_config = dict(
L
liqingping 已提交
126 127
                coordinator=dict(
                    operator_server=dict(
L
liqingping 已提交
128
                        system_addr='di-server.di-system:8080',
L
liqingping 已提交
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
                        api_version='/v1alpha1',
                        init_replicas_request=dict(
                            collectors={
                                "replicas": 2,
                            },
                            learners={
                                "gpus": "0",
                                "replicas": 1,
                            },
                        ),
                        collector_target_num=2,
                        learner_target_num=1,
                    ),
                ),
                path_data='./data',
                path_policy='./policy',
                communication_mode='auto',
                learner_gpu_num=1,
            )
L
liqingping 已提交
148 149
            cartpole_dqn_system_config = EasyDict(cartpole_dqn_system_config)
            system_config = cartpole_dqn_system_config
L
liqingping 已提交
150 151
            EOF

L
liqingping 已提交
152 153
            ding -m dist --module config -P k8s -c ./cartpole_dqn_config_k8s.py -s 0;
            ding -m dist --module coordinator -c ./cartpole_dqn_config_k8s.py.pkl -s 0 -cdp $COORDINATOR_PORT
L
liqingping 已提交
154
          ports:
L
liqingping 已提交
155
          - name: di-port
L
liqingping 已提交
156
            containerPort: 22270
L
liqingping 已提交
157 158 159 160 161 162 163
          volumeMounts:
          - name: work-dir
            mountPath: /ding
  collector:
    template:
      spec:
        containers:
L
liqingping 已提交
164
        - name: di-container
L
liqingping 已提交
165 166 167 168 169 170 171
          image: diorchestrator/ding:v0.1.0-df39b81c
          imagePullPolicy: Always
          env:
          - name: PYTHONUNBUFFERED
            value: "1"
          resources:
            requests:
L
liqingping 已提交
172
              cpu: 8
L
liqingping 已提交
173 174
              memory: "10Gi"
            limits:
L
liqingping 已提交
175
              cpu: 8
L
liqingping 已提交
176 177 178 179
              memory: "10Gi"
          command: ["/bin/bash", "-c",]
          args:
          - |
L
liqingping 已提交
180
            ding -m dist --module collector -c ./cartpole_dqn_config_k8s.py.pkl -s 0 -clp $COLLECTOR_PORT
L
liqingping 已提交
181
          ports:
L
liqingping 已提交
182
          - name: di-port
L
liqingping 已提交
183 184 185 186 187 188 189 190
            containerPort: 22270
          volumeMounts:
          - name: work-dir
            mountPath: /ding
  learner:
    template:
      spec:
        containers:
L
liqingping 已提交
191
        - name: di-container
L
liqingping 已提交
192 193 194 195 196 197 198
          image: diorchestrator/ding:v0.1.0-df39b81c
          imagePullPolicy: Always
          env:
          - name: PYTHONUNBUFFERED
            value: "1"
          resources:
            requests:
L
liqingping 已提交
199 200
              cpu: 8
              memory: "20Gi"
L
liqingping 已提交
201
            limits:
L
liqingping 已提交
202 203
              cpu: 8
              memory: "20Gi"
L
liqingping 已提交
204 205 206
          command: ["/bin/bash", "-c",]
          args:
          - |
L
liqingping 已提交
207
            ding -m dist --module learner -c ./cartpole_dqn_config_k8s.py.pkl -s 0 -lp $LEARNER_PORT
L
liqingping 已提交
208
          ports:
L
liqingping 已提交
209
          - name: di-port
L
liqingping 已提交
210
            containerPort: 22270
L
liqingping 已提交
211 212 213 214 215
          volumeMounts:
          - name: cache-volume
            mountPath: /dev/shm
          - name: work-dir
            mountPath: /ding