diff --git a/Makefile b/Makefile index 2b203b1bff48de3a5a42aa1577e9080488a64602..72f7f019cefc3055b60a4d0ba01d628e846ace3f 100644 --- a/Makefile +++ b/Makefile @@ -89,11 +89,6 @@ test: ginkgo ## Run tests. go tool cover -func=./server/http/coverage.out go tool cover -func=./common/gpuallocator/coverage.out -.PHONY: e2e-test -e2e-test: ## Run e2e tests. - go test -timeout 20m -v ./e2e -shared-volumes-dir /data/nfs/ding/cartpole --kubeconfig ~/.kube/config - go tool cover -func=./e2e/coverage.out - ##@ Build build: generate ## Build di-operator binary. diff --git a/config/samples/dijob-cartpole.yaml b/config/samples/dijob-cartpole.yaml index 3ac79a9b8264f861461f55e8ee9a8e016fab3af6..ac016b151806ea9e561eb52b5e16c9deb669fa4c 100644 --- a/config/samples/dijob-cartpole.yaml +++ b/config/samples/dijob-cartpole.yaml @@ -169,11 +169,11 @@ spec: value: "1" resources: requests: - cpu: 8 - memory: "10Gi" + cpu: 16 + memory: "5Gi" limits: - cpu: 8 - memory: "10Gi" + cpu: 16 + memory: "5Gi" command: ["/bin/bash", "-c",] args: - | @@ -196,11 +196,11 @@ spec: value: "1" resources: requests: - cpu: 8 - memory: "20Gi" + cpu: 16 + memory: "5Gi" limits: - cpu: 8 - memory: "20Gi" + cpu: 16 + memory: "5Gi" command: ["/bin/bash", "-c",] args: - | diff --git a/config/samples/dijob-qbert.yaml b/config/samples/dijob-qbert.yaml index a1124c0a27fbeb700cef2133d1bc871910ca8131..a3eb7119ef1ae6a2048aeccc709cbc0c3f005204 100644 --- a/config/samples/dijob-qbert.yaml +++ b/config/samples/dijob-qbert.yaml @@ -177,10 +177,10 @@ spec: value: "1" resources: requests: - cpu: 8 + cpu: 16 memory: "10Gi" limits: - cpu: 8 + cpu: 16 memory: "10Gi" command: ["/bin/bash", "-c",] args: @@ -207,13 +207,11 @@ spec: value: "1" resources: requests: - cpu: 3 - nvidia.com/gpu: 2 - memory: "30Gi" + cpu: 16 + memory: "20Gi" limits: - cpu: 3 - nvidia.com/gpu: 2 - memory: "30Gi" + cpu: 16 + memory: "20Gi" command: ["/bin/bash", "-c",] args: - | diff --git a/e2e/README.md b/e2e/README.md new file mode 100644 index 0000000000000000000000000000000000000000..40a25ccc6f443bc1945fcfd34c4bf6e4e5e5467e --- /dev/null +++ b/e2e/README.md @@ -0,0 +1,10 @@ +# E2E Tests +Through the e2e test, we can test the robustness of DI-engine, ensuring that DIJobs can tolerate common exceptions. + +## Run +```bash +go test -timeout 20m -cover -v ./e2e --ginkgo.v --shared-volumes-dir /data/nfs/ding --kubeconfig ~/.kube/config +``` +- `shared-volumes-dir` represents the shared volumes directory for DI-engine modules (coordinator, collector, etc.) to exchange data and models. Different jobs's shared volumes are placed under this directory. Default `/data/nfs/ding`. +- `kubeconfig` represents path to kubeconfig file to access kubernetes cluster. Default `$HOME/.kube/config`. +- `timeout` can be set according to how long the test will last. diff --git a/e2e/config/dijob.yaml b/e2e/config/dijob.yaml index d27c7da3bc8e7e93c85fb16883a3b51d15dab8df..4b71046baa0ab9bb9a4009c444cef79af5f67e58 100644 --- a/e2e/config/dijob.yaml +++ b/e2e/config/dijob.yaml @@ -26,11 +26,11 @@ spec: value: "1" resources: requests: - cpu: 3 - memory: "10Gi" + cpu: 2 + memory: "5Gi" limits: - cpu: 3 - memory: "10Gi" + cpu: 2 + memory: "5Gi" command: ["/bin/bash", "-c",] args: - | @@ -169,10 +169,10 @@ spec: value: "1" resources: requests: - cpu: 16 + cpu: 8 memory: "5Gi" limits: - cpu: 16 + cpu: 8 memory: "5Gi" command: ["/bin/bash", "-c",] args: @@ -196,10 +196,10 @@ spec: value: "1" resources: requests: - cpu: 16 + cpu: 5 memory: "5Gi" limits: - cpu: 16 + cpu: 5 memory: "5Gi" command: ["/bin/bash", "-c",] args: