job.yaml 1.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
apiVersion: batch/v1
kind: Job
metadata:
  name: paddle-cluster-job
spec:
  parallelism: 3
  completions: 3
  template:
    metadata:
      name: paddle-cluster-job
    spec:
      volumes:
      - name: jobpath
        hostPath: 
          path: /home/work/paddle_output              
      containers:
      - name: trainer
        image: registry.baidu.com/public/paddle:mypaddle
        command: ["bin/bash",  "-c", "/root/start.sh"]        
        env:
        - name: JOB_NAME
          value: paddle-cluster-job
        - name: JOB_PATH
          value: /home/jobpath     
        - name: JOB_NAMESPACE
          value: default         
        - name: TRAIN_CONFIG_DIR
          value: recommendation
        - name: CONF_PADDLE_NIC
          value: eth0  
        - name: CONF_PADDLE_PORT
          value: "7164"
        - name: CONF_PADDLE_PORTS_NUM
          value: "2"     
        - name: CONF_PADDLE_PORTS_NUM_SPARSE
          value: "2"  
        - name: CONF_PADDLE_GRADIENT_NUM
          value: "3"                                                               
        volumeMounts:
        - name: jobpath
          mountPath: /home/jobpath       
      restartPolicy: Never