Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenDILab开源决策智能平台
DI-orchestrator
提交
70468e89
D
DI-orchestrator
项目概览
OpenDILab开源决策智能平台
/
DI-orchestrator
上一次同步 大约 2 年
通知
1
Star
78
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DI-orchestrator
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
70468e89
编写于
8月 04, 2021
作者:
L
liqingping
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
test: add unit test for ddp learner services
上级
024e3dab
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
65 addition
and
0 deletion
+65
-0
controllers/dijob_controller_test.go
controllers/dijob_controller_test.go
+64
-0
utils/util.go
utils/util.go
+1
-0
未找到文件。
controllers/dijob_controller_test.go
浏览文件 @
70468e89
...
...
@@ -7,12 +7,14 @@ import (
.
"github.com/onsi/ginkgo"
.
"github.com/onsi/gomega"
corev1
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
div1alpha1
"opendilab.org/di-orchestrator/api/v1alpha1"
dicommon
"opendilab.org/di-orchestrator/common"
commontypes
"opendilab.org/di-orchestrator/common/types"
diutil
"opendilab.org/di-orchestrator/utils"
testutil
"opendilab.org/di-orchestrator/utils/testutils"
)
...
...
@@ -217,6 +219,58 @@ var _ = Describe("DIJob Controller", func() {
}
checkReplicasStatuses
(
ctx
,
k8sClient
,
jobKey
,
replicasStatuses
)
err
=
testutil
.
CleanUpJob
(
ctx
,
k8sClient
,
&
dijob
)
Expect
(
err
)
.
NotTo
(
HaveOccurred
())
}
})
It
(
"Should build right gpu ports and master port when the pod is ddp learner"
,
func
()
{
type
replica
struct
{
name
string
ddpLearnerType
string
gpus
int
expectedPorts
int
}
testCases
:=
[]
replica
{
{
name
:
"job-ddp-learner-sdf"
,
ddpLearnerType
:
dicommon
.
DDPLearnerTypeMaster
,
gpus
:
4
,
expectedPorts
:
5
},
{
name
:
"job-ddp-learner-sdf"
,
ddpLearnerType
:
dicommon
.
DDPLearnerTypeWorker
,
gpus
:
6
,
expectedPorts
:
6
},
{
name
:
"job-ddp-learner-sdf"
,
ddpLearnerType
:
dicommon
.
DDPLearnerTypeMaster
,
gpus
:
1
,
expectedPorts
:
2
},
{
name
:
"job-ddp-learner-sdf"
,
ddpLearnerType
:
dicommon
.
DDPLearnerTypeMaster
,
gpus
:
0
,
expectedPorts
:
2
},
{
name
:
"job-ddp-learner-sdf"
,
ddpLearnerType
:
dicommon
.
DDPLearnerTypeWorker
,
gpus
:
0
,
expectedPorts
:
1
},
}
for
i
:=
range
testCases
{
c
:=
testCases
[
i
]
By
(
fmt
.
Sprintf
(
"Create %dth DIJob"
,
i
+
1
))
var
err
error
ctx
:=
context
.
Background
()
jobTmpl
:=
testutil
.
NewDIJob
()
dijob
,
_
:=
createDIJob
(
ctx
,
k8sClient
,
jobTmpl
)
// build owner reference
ownRefer
:=
diutil
.
NewOwnerReference
(
div1alpha1
.
GroupVersion
.
String
(),
div1alpha1
.
KindDIJob
,
dijob
.
Name
,
dijob
.
UID
,
true
)
By
(
fmt
.
Sprintf
(
"Create replicas for DIJob %s"
,
dijob
.
Name
))
pod
:=
buildPod
(
c
.
name
,
dijob
.
Name
,
dicommon
.
DDPLearnerName
,
ownRefer
)
pod
.
Labels
[
dicommon
.
DDPLearnerTypeLabel
]
=
c
.
ddpLearnerType
resources
:=
commontypes
.
ResourceQuantity
{
GPU
:
resource
.
MustParse
(
fmt
.
Sprint
(
c
.
gpus
))}
diutil
.
SetPodResources
(
pod
,
resources
)
err
=
k8sClient
.
Create
(
ctx
,
pod
,
&
client
.
CreateOptions
{})
Expect
(
err
)
.
NotTo
(
HaveOccurred
())
By
(
"Checking the # of service's ports are as expected"
)
Eventually
(
func
()
int
{
svcs
,
err
:=
diutil
.
ListServices
(
ctx
,
k8sClient
,
&
dijob
)
Expect
(
err
)
.
NotTo
(
HaveOccurred
())
_
,
_
,
_
,
_
,
DDPLearners
,
err
:=
diutil
.
ClassifyServices
(
svcs
)
Expect
(
err
)
.
NotTo
(
HaveOccurred
())
if
len
(
DDPLearners
)
==
0
{
return
-
1
}
return
len
(
DDPLearners
[
0
]
.
Spec
.
Ports
)
},
timeout
,
interval
)
.
Should
(
Equal
(
c
.
expectedPorts
))
err
=
testutil
.
CleanUpJob
(
ctx
,
k8sClient
,
&
dijob
)
Expect
(
err
)
.
NotTo
(
HaveOccurred
())
}
...
...
@@ -256,13 +310,23 @@ func createAndUpdatePodPhase(
ctx
context
.
Context
,
k8sClient
client
.
Client
,
name
,
jobName
string
,
status
corev1
.
PodPhase
,
replicaType
string
,
ownRefer
metav1
.
OwnerReference
,
statuses
[]
int
)
{
pod
:=
buildPod
(
name
,
jobName
,
replicaType
,
ownRefer
)
createPodAndUpdatePhase
(
ctx
,
k8sClient
,
pod
,
status
,
statuses
)
}
func
buildPod
(
name
,
jobName
string
,
replicaType
string
,
ownRefer
metav1
.
OwnerReference
)
*
corev1
.
Pod
{
pod
:=
testutil
.
NewPod
(
name
,
jobName
,
ownRefer
)
labs
:=
diutil
.
GenLabels
(
jobName
)
labs
[
dicommon
.
ReplicaTypeLabel
]
=
replicaType
labs
[
dicommon
.
PodNameLabel
]
=
pod
.
Name
pod
.
SetLabels
(
labs
)
return
pod
}
func
createPodAndUpdatePhase
(
ctx
context
.
Context
,
k8sClient
client
.
Client
,
pod
*
corev1
.
Pod
,
status
corev1
.
PodPhase
,
statuses
[]
int
)
{
err
:=
k8sClient
.
Create
(
ctx
,
pod
,
&
client
.
CreateOptions
{})
Expect
(
err
)
.
NotTo
(
HaveOccurred
())
...
...
utils/util.go
浏览文件 @
70468e89
...
...
@@ -261,6 +261,7 @@ func AddGPUPortsToPod(pod *corev1.Pod, total int, startPort int32) {
}
func
AddGPUPortsToService
(
service
*
corev1
.
Service
,
total
int
,
startPort
int32
)
{
// gpu 0's port has already been created
for
i
:=
1
;
i
<
total
;
i
++
{
pname
:=
fmt
.
Sprintf
(
"%s-%d"
,
dicommon
.
DDPLearnerPortPrefix
,
i
)
pport
:=
startPort
+
int32
(
i
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录