diff --git a/doc/resource/ctr.yaml b/doc/resource/ctr.yaml index 40a3b9a1f6dc8a973432c519017be7e49c734407..2fce73b441ab8d0e4cd6a1f706c99fb0cb8982a0 100755 --- a/doc/resource/ctr.yaml +++ b/doc/resource/ctr.yaml @@ -26,7 +26,7 @@ spec: type: "" name: seqdata containers: - - image: wangjiawei1993/edldemo:v19 + - image: hub.baidubce.com/ctr/edldemo:latest command: - paddle_k8s - start_fluid @@ -116,7 +116,7 @@ spec: type: "" name: seqdata containers: - - image: wangjiawei1993/edldemo:v19 + - image: hub.baidubce.com/ctr/edldemo:latest command: - paddle_k8s - start_fluid @@ -206,7 +206,7 @@ spec: type: "" name: seqdata containers: - - image: wangjiawei1993/edldemo:v19 + - image: hub.baidubce.com/ctr/edldemo:latest command: - paddle_k8s - start_fluid diff --git a/doc/resource/cube.yaml b/doc/resource/cube.yaml index 08d10bcd0f3e573d4c9d4d1dfd5d78e91975e89a..efe732489d67bcde78bd898dd73b05cd6b188237 100755 --- a/doc/resource/cube.yaml +++ b/doc/resource/cube.yaml @@ -7,7 +7,7 @@ metadata: spec: containers: - name: cube-0 - image: wangjiawei1993/cube:v11 + image: hub.baidubce.com/ctr/cube:latest workingDir: /cube command: ['/bin/bash'] args: ['start.sh'] @@ -28,7 +28,7 @@ metadata: spec: containers: - name: cube-1 - image: wangjiawei1993/cube:v11 + image: hub.baidubce.com/ctr/cube:latest workingDir: /cube command: ['/bin/bash'] args: ['start.sh'] diff --git a/doc/resource/ftp.yaml b/doc/resource/ftp.yaml index 7576be2ddfea7b3421fd2498e951670df1a290bd..9ab309fcf669d8a78eecb560e7d2ce19964cbf54 100755 --- a/doc/resource/ftp.yaml +++ b/doc/resource/ftp.yaml @@ -12,7 +12,7 @@ spec: name: file-home containers: - name: file-server - image: halverneus/static-file-server + image: hub.baidubce.com/ctr/file-server:latest ports: - containerPort: 8080 volumeMounts: diff --git a/doc/resource/paddle-suite.yaml b/doc/resource/paddle-suite.yaml old mode 100755 new mode 100644 index 107988a2dd990b961bb14cc8263c55d061faf624..1e440fd7acc9cacbfbc644f929d9e80d039cc253 --- a/doc/resource/paddle-suite.yaml +++ b/doc/resource/paddle-suite.yaml @@ -12,7 +12,7 @@ spec: name: file-home containers: - name: file-server - image: halverneus/static-file-server + image: hub.baidubce.com/ctr/file-server:latest ports: - containerPort: 8080 volumeMounts: @@ -37,7 +37,6 @@ spec: --- - apiVersion: v1 kind: Pod metadata: @@ -47,7 +46,7 @@ metadata: spec: containers: - name: cube-0 - image: wangjiawei1993/cube:v11 + image: hub.baidubce.com/ctr/cube:latest workingDir: /cube command: ['/bin/bash'] args: ['start.sh'] @@ -68,7 +67,7 @@ metadata: spec: containers: - name: cube-1 - image: wangjiawei1993/cube:v11 + image: hub.baidubce.com/ctr/cube:latest workingDir: /cube command: ['/bin/bash'] args: ['start.sh'] @@ -123,7 +122,7 @@ metadata: spec: containers: - name: cube-transfer - image: wangjiawei1993/cube-transfer:v18 + image: hub.baidubce.com/ctr/cube-transfer:latest workingDir: / env: - name: POD_IP @@ -150,7 +149,7 @@ metadata: spec: containers: - name: paddleserving - image: wangjiawei1993/paddleserving:v7-debug + image: hub.baidubce.com/ctr/paddleserving:latest workingDir: /serving command: ['/bin/bash'] args: ['run.sh'] @@ -183,7 +182,7 @@ metadata: spec: containers: - name: pdservingclient - image: wangjiawei1993/pdservingclient:v4 + image: hub.baidubce.com/ctr/pdservingclient:latest workingDir: / command: ['bash'] args: ['nonstop.sh'] @@ -196,7 +195,7 @@ kind: Job metadata: name: edl-demo spec: - minAvailable: 6 + minAvailable: 4 schedulerName: volcano policies: - event: PodEvicted @@ -204,7 +203,7 @@ spec: - event: PodFailed action: RestartJob tasks: - - replicas: 3 + - replicas: 2 name: pserver template: metadata: @@ -219,7 +218,7 @@ spec: type: "" name: seqdata containers: - - image: wangjiawei1993/edldemo:v19 + - image: hub.baidubce.com/ctr/edldemo:latest command: - paddle_k8s - start_fluid @@ -273,9 +272,9 @@ spec: - name: PADDLE_IS_LOCAL value: "0" - name: PADDLE_TRAINERS_NUM - value: "3" + value: "2" - name: PADDLE_PSERVERS_NUM - value: "3" + value: "2" - name: FLAGS_rpc_deadline value: "36000000" - name: ENTRY @@ -309,7 +308,7 @@ spec: type: "" name: seqdata containers: - - image: wangjiawei1993/edldemo:v19 + - image: hub.baidubce.com/ctr/edldemo:latest command: - paddle_k8s - start_fluid @@ -368,9 +367,9 @@ spec: - name: PADDLE_PORT value: "30240" - name: PADDLE_PSERVERS_NUM - value: "3" + value: "2" - name: PADDLE_TRAINERS_NUM - value: "3" + value: "2" - name: PADDLE_TRAINING_ROLE value: TRAINER - name: TRAINING_ROLE @@ -381,7 +380,7 @@ spec: value: (/postprocess &) && cd /workspace/ctr && python train.py --is_local 0 --cloud_train 1 restartPolicy: OnFailure - - replicas: 2 + - replicas: 1 policies: - event: TaskCompleted action: CompleteJob @@ -399,7 +398,7 @@ spec: type: "" name: seqdata containers: - - image: wangjiawei1993/edldemo:v19 + - image: hub.baidubce.com/ctr/edldemo:latest command: - paddle_k8s - start_fluid @@ -458,9 +457,9 @@ spec: - name: PADDLE_PORT value: "30240" - name: PADDLE_PSERVERS_NUM - value: "3" + value: "2" - name: PADDLE_TRAINERS_NUM - value: "3" + value: "2" - name: PADDLE_TRAINING_ROLE value: TRAINER - name: TRAINING_ROLE @@ -470,3 +469,4 @@ spec: - name: ENTRY value: (/postprocess &) && cd /workspace/ctr && python train.py --is_local 0 --cloud_train 1 restartPolicy: OnFailure + diff --git a/doc/resource/pdclient.yaml b/doc/resource/pdclient.yaml index 302c1611107133438402646d5b1d144539c53b68..29961875550891e13352ae39f32705f81b1e8114 100755 --- a/doc/resource/pdclient.yaml +++ b/doc/resource/pdclient.yaml @@ -7,7 +7,7 @@ metadata: spec: containers: - name: pdservingclient - image: wangjiawei1993/pdservingclient:v4 + image: hub.baidubce.com/ctr/pdservingclient:latest workingDir: / command: ['bash'] args: ['nonstop.sh'] diff --git a/doc/resource/pdserving.yaml b/doc/resource/pdserving.yaml index 42474bc6f10cf0e4f894bffbde44ecb27fa3a400..5e70a894a17673b267bc774adddb3af8230f93fc 100755 --- a/doc/resource/pdserving.yaml +++ b/doc/resource/pdserving.yaml @@ -7,7 +7,7 @@ metadata: spec: containers: - name: paddleserving - image: wangjiawei1993/paddleserving:v7-debug + image: hub.baidubce.com/ctr/paddleserving:latest workingDir: /serving command: ['/bin/bash'] args: ['run.sh'] diff --git a/doc/resource/transfer.yaml b/doc/resource/transfer.yaml index 6da4a7e57b5a4a1b591c33de1563fb0627397124..717e95bb91aa75af3d84f6ee95d4f862dea4b82a 100755 --- a/doc/resource/transfer.yaml +++ b/doc/resource/transfer.yaml @@ -7,7 +7,7 @@ metadata: spec: containers: - name: cube-transfer - image: wangjiawei1993/cube-transfer:v18 + image: hub.baidubce.com/ctr/cube-transfer:latest workingDir: / env: - name: POD_IP diff --git a/doc/resource/volcano-ctr-demo-baiduyun.yaml b/doc/resource/volcano-ctr-demo-baiduyun.yaml deleted file mode 100755 index 6b3bbbaa883100ab0f56f3b6a18cafce1ebd40ae..0000000000000000000000000000000000000000 --- a/doc/resource/volcano-ctr-demo-baiduyun.yaml +++ /dev/null @@ -1,189 +0,0 @@ -apiVersion: batch.volcano.sh/v1alpha1 -kind: Job -metadata: - name: edl-demo -spec: - minAvailable: 4 - schedulerName: volcano - policies: - - event: PodEvicted - action: RestartJob - - event: PodFailed - action: RestartJob - tasks: - - replicas: 2 - name: pserver - template: - metadata: - labels: - paddle-job-pserver: fluid-ctr - spec: - imagePullSecrets: - - name: default-secret - volumes: - - hostPath: - path: /home/work/ - type: "" - name: seqdata - containers: - - image: wangjiawei1993/edldemo:v19 - command: - - paddle_k8s - - start_fluid - imagePullPolicy: IfNotPresent - name: pserver - volumeMounts: - - mountPath: /data - name: seqdata - resources: - limits: - cpu: 10 - memory: 30Gi - ephemeral-storage: 10Gi - requests: - cpu: 1 - memory: 100M - ephemeral-storage: 1Gi - env: - - name: GLOG_v - value: "0" - - name: GLOG_logtostderr - value: "1" - - name: TOPOLOGY - value: "" - - name: TRAINER_PACKAGE - value: /workspace - - name: PADDLE_INIT_NICS - value: eth2 - - name: NAMESPACE - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: metadata.namespace - - name: POD_IP - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: status.podIP - - name: POD_NAME - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: metadata.name - - name: PADDLE_CURRENT_IP - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: status.podIP - - name: PADDLE_JOB_NAME - value: fluid-ctr - - name: PADDLE_IS_LOCAL - value: "0" - - name: PADDLE_TRAINERS_NUM - value: "2" - - name: PADDLE_PSERVERS_NUM - value: "2" - - name: FLAGS_rpc_deadline - value: "36000000" - - name: ENTRY - value: cd /workspace/ctr && python train.py --is_local 0 --cloud_train 1 - - name: PADDLE_PORT - value: "30240" - - name: LD_LIBRARY_PATH - value: /usr/local/lib:/usr/local/nvidia/lib64:/usr/local/rdma/lib64:/usr/lib64/mlnx_ofed/valgrind - - name: PADDLE_TRAINING_ROLE - value: PSERVER - - name: TRAINING_ROLE - value: PSERVER - restartPolicy: OnFailure - - replicas: 2 - policies: - - event: TaskCompleted - action: CompleteJob - name: trainer - template: - metadata: - labels: - paddle-job: fluid-ctr - spec: - nodeSelector: - nodeType: model - imagePullSecrets: - - name: default-secret - volumes: - - hostPath: - path: /home/work/data - type: "" - name: seqdata - containers: - - image: wangjiawei1993/edldemo:v19 - command: - - paddle_k8s - - start_fluid - imagePullPolicy: IfNotPresent - name: trainer - volumeMounts: - - mountPath: /data - name: seqdata - resources: - limits: - cpu: 10 - memory: 30Gi - ephemeral-storage: 10Gi - requests: - cpu: 1 - memory: 100M - ephemeral-storage: 10Gi - env: - - name: GLOG_v - value: "0" - - name: GLOG_logtostderr - value: "1" - - name: TOPOLOGY - - name: TRAINER_PACKAGE - value: /workspace - - name: PADDLE_INIT_NICS - value: eth2 - - name: CPU_NUM - value: "2" - - name: NAMESPACE - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: metadata.namespace - - name: POD_IP - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: status.podIP - - name: POD_NAME - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: metadata.name - - name: PADDLE_CURRENT_IP - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: status.podIP - - name: PADDLE_JOB_NAME - value: fluid-ctr - - name: PADDLE_IS_LOCAL - value: "0" - - name: FLAGS_rpc_deadline - value: "36000000" - - name: PADDLE_PORT - value: "30240" - - name: PADDLE_PSERVERS_NUM - value: "2" - - name: PADDLE_TRAINERS_NUM - value: "2" - - name: PADDLE_TRAINING_ROLE - value: TRAINER - - name: TRAINING_ROLE - value: TRAINER - - name: LD_LIBRARY_PATH - value: /usr/local/lib:/usr/local/nvidia/lib64:/usr/local/rdma/lib64:/usr/lib64/mlnx_ofed/valgrind - - name: ENTRY - value: (/postprocess &) && cd /workspace/ctr && python train.py --is_local 0 --cloud_train 1 - restartPolicy: OnFailure