dijob-qbert.yaml 7.6 KB
Newer Older
L
liqingping 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
apiVersion: diengine.opendilab.org/v1alpha1
kind: DIJob
metadata:
  name: qbert-dqn
spec:
  group: xxx
  priorityClassName: ""
  cleanPodPolicy: "Running"
  volumes:
  - name: cache-volume
    emptyDir:
      medium: Memory
      sizeLimit: 128Mi
  - name: work-dir
    hostPath:
      path: /data/nfs/ding/qbert
  coordinator:
    template:
      spec:
        containers:
L
liqingping 已提交
21
        - name: di-container
L
liqingping 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
          image: diorchestrator/ding:v0.1.0-df39b81c
          imagePullPolicy: Always
          env:
          - name: PYTHONUNBUFFERED
            value: "1"
          resources:
            requests:
              cpu: 3
              memory: "10Gi"
            limits:
              cpu: 3
              memory: "10Gi"
          command: ["/bin/bash", "-c",]
          args:
          - |
            cat <<EOF > qbert_dqn_config_k8s.py
            from easydict import EasyDict

            qbert_dqn_config = dict(
                env=dict(
                    collector_env_num=16,
                    collector_episode_num=2,
                    evaluator_env_num=8,
                    evaluator_episode_num=1,
                    stop_value=30000,
                    env_id='QbertNoFrameskip-v4',
                    frame_stack=4,
                    manager=dict(
                        shared_memory=False,
                    ),
                ),
                policy=dict(
L
liqingping 已提交
54
                    cuda=True,
L
liqingping 已提交
55 56 57 58 59 60 61 62 63
                    priority=True,
                    model=dict(
                        obs_shape=[4, 84, 84],
                        action_shape=6,
                        encoder_hidden_size_list=[128, 128, 512],
                    ),
                    nstep=3,
                    discount_factor=0.99,
                    learn=dict(
L
liqingping 已提交
64
                        multi_gpu=True,
L
liqingping 已提交
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
                        batch_size=32,
                        learning_rate=0.0001,
                        learner=dict(
                            learner_num=1,
                            send_policy_freq=1,
                        ),
                    ),
                    collect=dict(
                        n_sample=16,
                        collector=dict(
                            collector_num=2,
                            update_policy_second=3,
                        ),
                    ),
                    eval=dict(evaluator=dict(eval_freq=500, )),
                    other=dict(
                        eps=dict(
                            type='exp',
                            start=1.,
                            end=0.05,
                            decay=250000,
                        ),
                        replay_buffer=dict(
                            replay_buffer_size=400000,
                            enable_track_used_data=True,
                        ),
                        commander=dict(
                            collector_task_space=0,
                            learner_task_space=1,
                            eval_interval=30,
                        ),
                    ),
                ),
            )
            qbert_dqn_config = EasyDict(qbert_dqn_config)
            main_config = qbert_dqn_config

            qbert_dqn_create_config = dict(
                env=dict(
                    type='atari',
                    import_names=['dizoo.atari.envs.atari_env'],
                ),
                env_manager=dict(type='subprocess'),
                policy=dict(type='dqn_command'),
                learner=dict(type='base', import_names=['ding.worker.learner.base_learner']),
                collector=dict(
                    type='zergling',
                    import_names=['ding.worker.collector.zergling_collector'],
                ),
                commander=dict(
                    type='solo',
                    import_names=['ding.worker.coordinator.solo_parallel_commander'],
                ),
                comm_learner=dict(
                    type='flask_fs',
                    import_names=['ding.worker.learner.comm.flask_fs_learner'],
                ),
                comm_collector=dict(
                    type='flask_fs',
                    import_names=['ding.worker.collector.comm.flask_fs_collector'],
                ),
            )
            qbert_dqn_create_config = EasyDict(qbert_dqn_create_config)
            create_config = qbert_dqn_create_config

            qbert_dqn_system_config = dict(
                coordinator=dict(
                    operator_server=dict(
L
liqingping 已提交
133
                        system_addr='di-server.di-system:8080',
L
liqingping 已提交
134 135 136 137 138 139
                        api_version='/v1alpha1',
                        init_replicas_request=dict(
                            collectors={
                                "replicas": 2,
                            },
                            learners={
L
liqingping 已提交
140
                                "gpus": "2",
L
liqingping 已提交
141 142 143 144 145 146 147 148 149 150
                                "replicas": 1,
                            },
                        ),
                        collector_target_num=2,
                        learner_target_num=1,
                    ),
                ),
                path_data='./data',
                path_policy='./policy',
                communication_mode='auto',
L
liqingping 已提交
151
                learner_gpu_num=2,
L
liqingping 已提交
152 153 154 155 156 157 158 159 160 161 162
            )
            qbert_dqn_system_config = EasyDict(qbert_dqn_system_config)
            system_config = qbert_dqn_system_config
            EOF

            # if code has been changed in the mount path, we have to reinstall ding cli
            # pip install --no-cache-dir -e .;

            ding -m dist --module config -P k8s -c qbert_dqn_config_k8s.py -s 0;
            ding -m dist --module coordinator -c qbert_dqn_config_k8s.py.pkl -s 0 --disable-flask-log 0 -cdp $COORDINATOR_PORT
          ports:
L
liqingping 已提交
163
          - name: di-port
L
liqingping 已提交
164 165 166 167 168 169 170 171
            containerPort: 22273
          volumeMounts:
          - name: work-dir
            mountPath: /ding
  collector:
    template:
      spec:
        containers:
L
liqingping 已提交
172
        - name: di-container
L
liqingping 已提交
173 174 175 176 177 178 179
          image: diorchestrator/ding:v0.1.0-df39b81c
          imagePullPolicy: Always
          env:
          - name: PYTHONUNBUFFERED
            value: "1"
          resources:
            requests:
L
liqingping 已提交
180
              cpu: 16
L
liqingping 已提交
181 182
              memory: "10Gi"
            limits:
L
liqingping 已提交
183
              cpu: 16
L
liqingping 已提交
184 185 186 187 188 189 190 191 192
              memory: "10Gi"
          command: ["/bin/bash", "-c",]
          args:
          - |
            # if code has been changed in the mount path, we have to reinstall ding cli
            # pip install --no-cache-dir -e .;

            ding -m dist --module collector -c qbert_dqn_config_k8s.py.pkl -s 0 -clp $COLLECTOR_PORT --disable-flask-log 0
          ports:
L
liqingping 已提交
193
          - name: di-port
L
liqingping 已提交
194 195 196 197 198 199 200 201
            containerPort: 22270
          volumeMounts:
          - name: work-dir
            mountPath: /ding
  learner:
    template:
      spec:
        containers:
L
liqingping 已提交
202
        - name: di-container
L
liqingping 已提交
203 204 205 206 207 208 209
          image: diorchestrator/ding:v0.1.0-df39b81c
          imagePullPolicy: Always
          env:
          - name: PYTHONUNBUFFERED
            value: "1"
          resources:
            requests:
L
liqingping 已提交
210 211
              cpu: 16
              memory: "20Gi"
L
liqingping 已提交
212
            limits:
L
liqingping 已提交
213 214
              cpu: 16
              memory: "20Gi"
L
liqingping 已提交
215 216 217 218 219 220 221 222
          command: ["/bin/bash", "-c",]
          args:
          - |
            # if code has been changed in the mount path, we have to reinstall ding cli
            # pip install --no-cache-dir -e .;

            ding -m dist --module spawn_learner -c qbert_dqn_config_k8s.py.pkl -s 0 -lp $LEARNER_PORT --disable-flask-log 0
          ports:
L
liqingping 已提交
223
          - name: di-port
L
liqingping 已提交
224 225 226 227 228 229
            containerPort: 22271
          volumeMounts:
          - name: cache-volume
            mountPath: /dev/shm
          - name: work-dir
            mountPath: /ding