apiVersion: extensions/v1beta1 kind: ReplicaSet metadata: name: vgg16job-pserver spec: replicas: 10 template: metadata: labels: paddle-job-pserver: vgg16job spec: hostNetwork: true imagePullSecrets: - name: job-registry-secret containers: - name: pserver image: "registry.baidu.com/paddlepaddle/rawjob:vgg16_fluid" imagePullPolicy: Always ports: - name: jobport-30236 containerPort: 30236 env: - name: PADDLE_JOB_NAME value: vgg16job - name: MKL_NUM_THREADS value: "1" - name: TRAINING_ROLE value: "PSERVER" - name: TRAINERS value: "20" - name: PSERVERS value: "10" - name: TOPOLOGY value: "" - name: ENTRY value: "MKL_NUM_THREADS=1 python /workspace/vgg16.py --local 0" - name: TRAINER_PACKAGE value: "/workspace" - name: PADDLE_INIT_PORT value: "30236" - name: PADDLE_INIT_NICS value: "xgbe0" - name: PADDLE_INIT_TRAINER_COUNT value: "1" - name: PADDLE_INIT_PORTS_NUM value: "1" - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE value: "1" - name: PADDLE_INIT_NUM_GRADIENT_SERVERS value: "20" - name: PADDLE_INIT_NUM_PASSES value: "1" - name: PADDLE_INIT_USE_GPU value: "0" - name: LD_LIBRARY_PATH value: "/usr/local/nvidia/lib64" - name: NAMESPACE valueFrom: fieldRef: fieldPath: "metadata.namespace" - name: POD_IP valueFrom: fieldRef: fieldPath: "status.podIP" command: ["paddle_k8s", "start_fluid"] resources: requests: memory: 10Gi cpu: 4 limits: memory: 10Gi cpu: 4