Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
6a0b3657
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6a0b3657
编写于
7月 28, 2017
作者:
H
helinwang
提交者:
GitHub
7月 28, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3062 from helinwang/grace
gracefully shutdown master, pserver, fix gometalinter errors
上级
35b6415f
6fab04f4
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
134 addition
and
60 deletion
+134
-60
go/cmd/master/master.go
go/cmd/master/master.go
+24
-4
go/cmd/pserver/pserver.go
go/cmd/pserver/pserver.go
+26
-5
go/master/etcd_client.go
go/master/etcd_client.go
+19
-6
go/master/inmem_store.go
go/master/inmem_store.go
+5
-0
go/master/service.go
go/master/service.go
+1
-0
go/pserver/client/c/cclient.go
go/pserver/client/c/cclient.go
+3
-3
go/pserver/etcd_client.go
go/pserver/etcd_client.go
+56
-42
未找到文件。
go/cmd/master/master.go
浏览文件 @
6a0b3657
...
@@ -19,6 +19,8 @@ import (
...
@@ -19,6 +19,8 @@ import (
"net"
"net"
"net/http"
"net/http"
"net/rpc"
"net/rpc"
"os"
"os/signal"
"strconv"
"strconv"
"strings"
"strings"
"time"
"time"
...
@@ -68,6 +70,20 @@ func main() {
...
@@ -68,6 +70,20 @@ func main() {
store
=
&
master
.
InMemStore
{}
store
=
&
master
.
InMemStore
{}
}
}
shutdown
:=
func
()
{
log
.
Infoln
(
"shutting down gracefully"
)
err
:=
store
.
Shutdown
()
if
err
!=
nil
{
log
.
Errorln
(
err
)
}
}
// Guaranteed to run even panic happens.
defer
shutdown
()
c
:=
make
(
chan
os
.
Signal
,
1
)
signal
.
Notify
(
c
,
os
.
Interrupt
)
s
,
err
:=
master
.
NewService
(
store
,
*
chunkPerTask
,
*
taskTimeoutDur
,
*
taskTimeoutMax
)
s
,
err
:=
master
.
NewService
(
store
,
*
chunkPerTask
,
*
taskTimeoutDur
,
*
taskTimeoutMax
)
if
err
!=
nil
{
if
err
!=
nil
{
log
.
Fatal
(
err
)
log
.
Fatal
(
err
)
...
@@ -84,8 +100,12 @@ func main() {
...
@@ -84,8 +100,12 @@ func main() {
log
.
Fatal
(
err
)
log
.
Fatal
(
err
)
}
}
go
func
()
{
err
=
http
.
Serve
(
l
,
nil
)
err
=
http
.
Serve
(
l
,
nil
)
if
err
!=
nil
{
if
err
!=
nil
{
log
.
Fatal
(
err
)
log
.
Fatal
(
err
)
}
}
}()
<-
c
}
}
go/cmd/pserver/pserver.go
浏览文件 @
6a0b3657
...
@@ -18,6 +18,8 @@ import (
...
@@ -18,6 +18,8 @@ import (
"net"
"net"
"net/http"
"net/http"
"net/rpc"
"net/rpc"
"os"
"os/signal"
"strconv"
"strconv"
"time"
"time"
...
@@ -33,7 +35,8 @@ func main() {
...
@@ -33,7 +35,8 @@ func main() {
index
:=
flag
.
Int
(
"index"
,
-
1
,
"index of this pserver, should be larger or equal than 0"
)
index
:=
flag
.
Int
(
"index"
,
-
1
,
"index of this pserver, should be larger or equal than 0"
)
etcdEndpoint
:=
flag
.
String
(
"etcd-endpoint"
,
"http://127.0.0.1:2379"
,
etcdEndpoint
:=
flag
.
String
(
"etcd-endpoint"
,
"http://127.0.0.1:2379"
,
"comma separated endpoint string for pserver to connect to etcd"
)
"comma separated endpoint string for pserver to connect to etcd"
)
etcdTimeout
:=
flag
.
Duration
(
"etcd-timeout"
,
5
*
time
.
Second
,
"timeout for etcd calls"
)
dialTimeout
:=
flag
.
Duration
(
"dial-timeout"
,
5
*
time
.
Second
,
"dial timeout"
)
etcdTTL
:=
flag
.
Int
(
"etcd-ttl"
,
5
,
"etcd time to live in seconds"
)
numPservers
:=
flag
.
Int
(
"num-pservers"
,
1
,
"total pserver count in a training job"
)
numPservers
:=
flag
.
Int
(
"num-pservers"
,
1
,
"total pserver count in a training job"
)
checkpointPath
:=
flag
.
String
(
"checkpoint-path"
,
"/checkpoints/"
,
"save checkpoint path"
)
checkpointPath
:=
flag
.
String
(
"checkpoint-path"
,
"/checkpoints/"
,
"save checkpoint path"
)
checkpointInterval
:=
flag
.
Duration
(
"checkpoint-interval"
,
600
*
time
.
Second
,
"save checkpoint per interval seconds"
)
checkpointInterval
:=
flag
.
Duration
(
"checkpoint-interval"
,
600
*
time
.
Second
,
"save checkpoint per interval seconds"
)
...
@@ -53,7 +56,7 @@ func main() {
...
@@ -53,7 +56,7 @@ func main() {
if
*
index
>=
0
{
if
*
index
>=
0
{
idx
=
*
index
idx
=
*
index
}
else
{
}
else
{
e
=
pserver
.
NewEtcdClient
(
*
etcdEndpoint
,
*
numPservers
,
*
etcdTimeout
)
e
=
pserver
.
NewEtcdClient
(
*
etcdEndpoint
,
*
numPservers
,
*
dialTimeout
,
*
etcdTTL
)
idx
,
err
=
e
.
Register
(
*
port
)
idx
,
err
=
e
.
Register
(
*
port
)
candy
.
Must
(
err
)
candy
.
Must
(
err
)
...
@@ -67,6 +70,20 @@ func main() {
...
@@ -67,6 +70,20 @@ func main() {
}
}
}
}
shutdown
:=
func
()
{
log
.
Infoln
(
"shutting down gracefully"
)
sErr
:=
e
.
Shutdown
()
if
sErr
!=
nil
{
log
.
Errorln
(
sErr
)
}
}
// Guaranteed to run even panic happens.
defer
shutdown
()
c
:=
make
(
chan
os
.
Signal
,
1
)
signal
.
Notify
(
c
,
os
.
Interrupt
)
s
,
err
:=
pserver
.
NewService
(
idx
,
*
checkpointInterval
,
*
checkpointPath
,
e
,
cp
)
s
,
err
:=
pserver
.
NewService
(
idx
,
*
checkpointInterval
,
*
checkpointPath
,
e
,
cp
)
candy
.
Must
(
err
)
candy
.
Must
(
err
)
...
@@ -77,7 +94,11 @@ func main() {
...
@@ -77,7 +94,11 @@ func main() {
l
,
err
:=
net
.
Listen
(
"tcp"
,
":"
+
strconv
.
Itoa
(
*
port
))
l
,
err
:=
net
.
Listen
(
"tcp"
,
":"
+
strconv
.
Itoa
(
*
port
))
candy
.
Must
(
err
)
candy
.
Must
(
err
)
go
func
()
{
log
.
Infof
(
"start pserver at port %d"
,
*
port
)
log
.
Infof
(
"start pserver at port %d"
,
*
port
)
err
=
http
.
Serve
(
l
,
nil
)
err
=
http
.
Serve
(
l
,
nil
)
candy
.
Must
(
err
)
candy
.
Must
(
err
)
}()
<-
c
}
}
go/master/etcd_client.go
浏览文件 @
6a0b3657
...
@@ -39,15 +39,12 @@ type EtcdClient struct {
...
@@ -39,15 +39,12 @@ type EtcdClient struct {
statePath
string
statePath
string
client
*
clientv3
.
Client
client
*
clientv3
.
Client
lock
*
concurrency
.
Mutex
lock
*
concurrency
.
Mutex
sess
*
concurrency
.
Session
}
}
// NewEtcdClient creates a new EtcdClient.
// NewEtcdClient creates a new EtcdClient.
func
NewEtcdClient
(
endpoints
[]
string
,
addr
string
,
lockPath
,
addrPath
,
statePath
string
,
ttlSec
int
)
(
*
EtcdClient
,
error
)
{
func
NewEtcdClient
(
endpoints
[]
string
,
addr
string
,
lockPath
,
addrPath
,
statePath
string
,
ttlSec
int
)
(
*
EtcdClient
,
error
)
{
log
.
Debugf
(
"Connecting to etcd at %v"
,
endpoints
)
log
.
Debugf
(
"Connecting to etcd at %v"
,
endpoints
)
// TODO(helin): gracefully shutdown etcd store. Because etcd
// store holds a etcd lock, even though the lock will expire
// when the lease timeout, we need to implement graceful
// shutdown to release the lock.
cli
,
err
:=
clientv3
.
New
(
clientv3
.
Config
{
cli
,
err
:=
clientv3
.
New
(
clientv3
.
Config
{
Endpoints
:
endpoints
,
Endpoints
:
endpoints
,
DialTimeout
:
dialTimeout
,
DialTimeout
:
dialTimeout
,
...
@@ -67,12 +64,12 @@ func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePat
...
@@ -67,12 +64,12 @@ func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePat
// one master running, but split-brain problem may cause
// one master running, but split-brain problem may cause
// multiple master servers running), and the cluster management
// multiple master servers running), and the cluster management
// software will kill one of them.
// software will kill one of them.
log
.
Debug
f
(
"Trying to acquire lock at %s."
,
lockPath
)
log
.
Info
f
(
"Trying to acquire lock at %s."
,
lockPath
)
err
=
lock
.
Lock
(
context
.
TODO
())
err
=
lock
.
Lock
(
context
.
TODO
())
if
err
!=
nil
{
if
err
!=
nil
{
return
nil
,
err
return
nil
,
err
}
}
log
.
Debug
f
(
"Successfully acquired lock at %s."
,
lockPath
)
log
.
Info
f
(
"Successfully acquired lock at %s."
,
lockPath
)
put
:=
clientv3
.
OpPut
(
addrPath
,
addr
)
put
:=
clientv3
.
OpPut
(
addrPath
,
addr
)
resp
,
err
:=
cli
.
Txn
(
context
.
Background
())
.
If
(
lock
.
IsOwner
())
.
Then
(
put
)
.
Commit
()
resp
,
err
:=
cli
.
Txn
(
context
.
Background
())
.
If
(
lock
.
IsOwner
())
.
Then
(
put
)
.
Commit
()
...
@@ -89,6 +86,7 @@ func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePat
...
@@ -89,6 +86,7 @@ func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePat
statePath
:
statePath
,
statePath
:
statePath
,
client
:
cli
,
client
:
cli
,
lock
:
lock
,
lock
:
lock
,
sess
:
sess
,
}
}
return
e
,
nil
return
e
,
nil
...
@@ -157,6 +155,21 @@ func (e *EtcdClient) Load() ([]byte, error) {
...
@@ -157,6 +155,21 @@ func (e *EtcdClient) Load() ([]byte, error) {
return
state
,
nil
return
state
,
nil
}
}
// Shutdown shuts down the etcd client gracefully.
func
(
e
*
EtcdClient
)
Shutdown
()
error
{
err
:=
e
.
sess
.
Close
()
newErr
:=
e
.
client
.
Close
()
if
newErr
!=
nil
{
if
err
==
nil
{
err
=
newErr
}
else
{
log
.
Errorln
(
newErr
)
}
}
return
err
}
// GetKey gets the value by the specify key.
// GetKey gets the value by the specify key.
func
GetKey
(
c
*
clientv3
.
Client
,
key
string
,
timeout
time
.
Duration
)
(
string
,
error
)
{
func
GetKey
(
c
*
clientv3
.
Client
,
key
string
,
timeout
time
.
Duration
)
(
string
,
error
)
{
ctx
,
cancel
:=
context
.
WithTimeout
(
context
.
Background
(),
timeout
)
ctx
,
cancel
:=
context
.
WithTimeout
(
context
.
Background
(),
timeout
)
...
...
go/master/inmem_store.go
浏览文件 @
6a0b3657
...
@@ -40,3 +40,8 @@ func (m *InMemStore) Load() ([]byte, error) {
...
@@ -40,3 +40,8 @@ func (m *InMemStore) Load() ([]byte, error) {
return
m
.
buf
,
nil
return
m
.
buf
,
nil
}
}
// Shutdown shuts down the in mem store.
func
(
m
*
InMemStore
)
Shutdown
()
error
{
return
nil
}
go/master/service.go
浏览文件 @
6a0b3657
...
@@ -50,6 +50,7 @@ var ErrPassAfter = errors.New("pass number larger than master")
...
@@ -50,6 +50,7 @@ var ErrPassAfter = errors.New("pass number larger than master")
type
Store
interface
{
type
Store
interface
{
Save
([]
byte
)
error
Save
([]
byte
)
error
Load
()
([]
byte
,
error
)
Load
()
([]
byte
,
error
)
Shutdown
()
error
}
}
// Chunk is a chunk of data consisted of several data instances.
// Chunk is a chunk of data consisted of several data instances.
...
...
go/pserver/client/c/cclient.go
浏览文件 @
6a0b3657
...
@@ -55,10 +55,10 @@ var curHandle C.paddle_pserver_client
...
@@ -55,10 +55,10 @@ var curHandle C.paddle_pserver_client
func
add
(
c
*
client
.
Client
)
C
.
paddle_pserver_client
{
func
add
(
c
*
client
.
Client
)
C
.
paddle_pserver_client
{
mu
.
Lock
()
mu
.
Lock
()
defer
mu
.
Unlock
()
defer
mu
.
Unlock
()
cli
ent
:=
curHandle
cli
:=
curHandle
curHandle
++
curHandle
++
handleMap
[
cli
ent
]
=
c
handleMap
[
cli
]
=
c
return
cli
ent
return
cli
}
}
func
get
(
client
C
.
paddle_pserver_client
)
*
client
.
Client
{
func
get
(
client
C
.
paddle_pserver_client
)
*
client
.
Client
{
...
...
go/pserver/etcd_client.go
浏览文件 @
6a0b3657
...
@@ -34,16 +34,19 @@ const (
...
@@ -34,16 +34,19 @@ const (
PsPath
=
"/ps/"
PsPath
=
"/ps/"
// PsCheckpoint is the etcd path for store checkpoints information
// PsCheckpoint is the etcd path for store checkpoints information
PsCheckpoint
=
"/checkpoints/"
PsCheckpoint
=
"/checkpoints/"
retryTimeout
=
5
*
time
.
Second
)
)
// EtcdClient is the etcd client that the pserver uses for fault
// EtcdClient is the etcd client that the pserver uses for fault
// tolerance, service registry and coordination.
// tolerance, service registry and coordination.
type
EtcdClient
struct
{
type
EtcdClient
struct
{
numPservers
int
numPservers
int
etcdEndpoints
string
endpoints
string
etcdClient
*
clientv3
.
Client
client
*
clientv3
.
Client
// etcdTimeout is also used as retry intervals.
sess
*
concurrency
.
Session
etcdTimeout
time
.
Duration
dialTimeout
time
.
Duration
ttlSec
int
// FIXME: ensure GetExternalIP gets the correct ip for trainers to connect.
// FIXME: ensure GetExternalIP gets the correct ip for trainers to connect.
externalIP
string
externalIP
string
// desired number of pservers in the job.
// desired number of pservers in the job.
...
@@ -52,11 +55,12 @@ type EtcdClient struct {
...
@@ -52,11 +55,12 @@ type EtcdClient struct {
}
}
// NewEtcdClient creates an EtcdClient
// NewEtcdClient creates an EtcdClient
func
NewEtcdClient
(
endpoints
string
,
numPservers
int
,
timeout
time
.
Duration
)
*
EtcdClient
{
func
NewEtcdClient
(
endpoints
string
,
numPservers
int
,
dialtimeout
time
.
Duration
,
ttlSec
int
)
*
EtcdClient
{
return
&
EtcdClient
{
return
&
EtcdClient
{
etcdTimeout
:
timeout
,
dialTimeout
:
dialtimeout
,
ttlSec
:
ttlSec
,
numPservers
:
numPservers
,
numPservers
:
numPservers
,
e
tcdEndpoints
:
endpoints
,
e
ndpoints
:
endpoints
,
}
}
}
}
...
@@ -64,7 +68,6 @@ func NewEtcdClient(endpoints string, numPservers int, timeout time.Duration) *Et
...
@@ -64,7 +68,6 @@ func NewEtcdClient(endpoints string, numPservers int, timeout time.Duration) *Et
//
//
// Register returns the index of the current pserver.
// Register returns the index of the current pserver.
func
(
e
*
EtcdClient
)
Register
(
port
int
)
(
int
,
error
)
{
func
(
e
*
EtcdClient
)
Register
(
port
int
)
(
int
,
error
)
{
var
err
error
var
err
error
e
.
externalIP
,
err
=
networkhelper
.
GetExternalIP
()
e
.
externalIP
,
err
=
networkhelper
.
GetExternalIP
()
if
err
!=
nil
{
if
err
!=
nil
{
...
@@ -72,19 +75,26 @@ func (e *EtcdClient) Register(port int) (int, error) {
...
@@ -72,19 +75,26 @@ func (e *EtcdClient) Register(port int) (int, error) {
}
}
// initialize connection to etcd.
// initialize connection to etcd.
ep
:=
strings
.
Split
(
e
.
e
tcdE
ndpoints
,
","
)
ep
:=
strings
.
Split
(
e
.
endpoints
,
","
)
for
{
for
{
cli
,
err
:=
clientv3
.
New
(
clientv3
.
Config
{
cli
,
err
:=
clientv3
.
New
(
clientv3
.
Config
{
Endpoints
:
ep
,
Endpoints
:
ep
,
DialTimeout
:
e
.
etcd
Timeout
,
DialTimeout
:
e
.
dial
Timeout
,
})
})
if
err
!=
nil
{
if
err
!=
nil
{
log
.
Errorf
(
"connect to etcd error: %v"
,
err
)
log
.
Errorf
(
"connect to etcd error: %v"
,
err
)
time
.
Sleep
(
e
.
etcdTimeout
)
time
.
Sleep
(
retryTimeout
)
continue
}
e
.
client
=
cli
sess
,
err
:=
concurrency
.
NewSession
(
cli
,
concurrency
.
WithTTL
(
e
.
ttlSec
))
if
err
!=
nil
{
log
.
Errorf
(
"create etcd session error: %v"
,
err
)
time
.
Sleep
(
retryTimeout
)
continue
continue
}
}
e
.
etcdClient
=
cli
e
.
sess
=
sess
log
.
Debugf
(
"inited client to %s"
,
e
.
e
tcdE
ndpoints
)
log
.
Debugf
(
"inited client to %s"
,
e
.
endpoints
)
break
break
}
}
// init /ps_desired using transaction, for multiple pservers may want to write
// init /ps_desired using transaction, for multiple pservers may want to write
...
@@ -95,7 +105,7 @@ func (e *EtcdClient) Register(port int) (int, error) {
...
@@ -95,7 +105,7 @@ func (e *EtcdClient) Register(port int) (int, error) {
cancel
()
cancel
()
if
err
!=
nil
{
if
err
!=
nil
{
log
.
Warn
(
err
)
log
.
Warn
(
err
)
time
.
Sleep
(
e
.
etcd
Timeout
)
time
.
Sleep
(
retry
Timeout
)
continue
continue
}
}
break
break
...
@@ -106,18 +116,18 @@ func (e *EtcdClient) Register(port int) (int, error) {
...
@@ -106,18 +116,18 @@ func (e *EtcdClient) Register(port int) (int, error) {
// wait and set s.desired init value
// wait and set s.desired init value
for
{
for
{
ctx
,
cancel
:=
context
.
WithTimeout
(
context
.
Background
(),
time
.
Second
)
ctx
,
cancel
:=
context
.
WithTimeout
(
context
.
Background
(),
time
.
Second
)
resp
,
err
:=
e
.
etcdC
lient
.
Get
(
ctx
,
PsDesired
)
resp
,
err
:=
e
.
c
lient
.
Get
(
ctx
,
PsDesired
)
cancel
()
cancel
()
if
err
!=
nil
{
if
err
!=
nil
{
log
.
Errorf
(
"getting %s error: %v"
,
PsDesired
,
err
)
log
.
Errorf
(
"getting %s error: %v"
,
PsDesired
,
err
)
time
.
Sleep
(
e
.
etcd
Timeout
)
time
.
Sleep
(
retry
Timeout
)
continue
continue
}
}
if
len
(
resp
.
Kvs
)
!=
0
{
if
len
(
resp
.
Kvs
)
!=
0
{
e
.
desired
,
err
=
strconv
.
Atoi
(
string
(
resp
.
Kvs
[
0
]
.
Value
))
e
.
desired
,
err
=
strconv
.
Atoi
(
string
(
resp
.
Kvs
[
0
]
.
Value
))
if
err
!=
nil
{
if
err
!=
nil
{
log
.
Errorf
(
"value of %s invalid %v
\n
"
,
PsDesired
,
err
)
log
.
Errorf
(
"value of %s invalid %v
\n
"
,
PsDesired
,
err
)
time
.
Sleep
(
e
.
etcd
Timeout
)
time
.
Sleep
(
retry
Timeout
)
// NOTE: wait util ps_desired value change
// NOTE: wait util ps_desired value change
continue
continue
}
}
...
@@ -134,7 +144,7 @@ func (e *EtcdClient) Register(port int) (int, error) {
...
@@ -134,7 +144,7 @@ func (e *EtcdClient) Register(port int) (int, error) {
cancel
()
cancel
()
if
err
!=
nil
{
if
err
!=
nil
{
log
.
Warn
(
err
)
log
.
Warn
(
err
)
time
.
Sleep
(
e
.
etcd
Timeout
)
time
.
Sleep
(
retry
Timeout
)
continue
continue
}
}
break
break
...
@@ -144,10 +154,10 @@ func (e *EtcdClient) Register(port int) (int, error) {
...
@@ -144,10 +154,10 @@ func (e *EtcdClient) Register(port int) (int, error) {
}
}
func
(
e
*
EtcdClient
)
initDesiredPservers
(
ctx
context
.
Context
,
numPservers
int
)
(
*
clientv3
.
TxnResponse
,
error
)
{
func
(
e
*
EtcdClient
)
initDesiredPservers
(
ctx
context
.
Context
,
numPservers
int
)
(
*
clientv3
.
TxnResponse
,
error
)
{
return
concurrency
.
NewSTM
(
e
.
etcdC
lient
,
func
(
c
concurrency
.
STM
)
error
{
return
concurrency
.
NewSTM
(
e
.
c
lient
,
func
(
c
concurrency
.
STM
)
error
{
dsStr
:=
c
.
Get
(
PsDesired
)
dsStr
:=
c
.
Get
(
PsDesired
)
if
dsStr
==
""
{
if
dsStr
==
""
{
c
.
Put
(
PsDesired
,
strconv
.
Itoa
(
numPservers
))
c
.
Put
(
PsDesired
,
strconv
.
Itoa
(
numPservers
)
,
clientv3
.
WithLease
(
e
.
sess
.
Lease
())
)
}
}
return
nil
return
nil
},
concurrency
.
WithAbortContext
(
ctx
),
concurrency
.
WithIsolation
(
concurrency
.
RepeatableReads
))
},
concurrency
.
WithAbortContext
(
ctx
),
concurrency
.
WithIsolation
(
concurrency
.
RepeatableReads
))
...
@@ -156,7 +166,7 @@ func (e *EtcdClient) initDesiredPservers(ctx context.Context, numPservers int) (
...
@@ -156,7 +166,7 @@ func (e *EtcdClient) initDesiredPservers(ctx context.Context, numPservers int) (
// registerPserverEtcd registers pserver node on etcd using transaction.
// registerPserverEtcd registers pserver node on etcd using transaction.
func
(
e
*
EtcdClient
)
registerPserverEtcd
(
ctx
context
.
Context
,
port
int
)
(
int
,
error
)
{
func
(
e
*
EtcdClient
)
registerPserverEtcd
(
ctx
context
.
Context
,
port
int
)
(
int
,
error
)
{
var
idx
int
var
idx
int
_
,
err
:=
concurrency
.
NewSTM
(
e
.
etcdC
lient
,
func
(
c
concurrency
.
STM
)
error
{
_
,
err
:=
concurrency
.
NewSTM
(
e
.
c
lient
,
func
(
c
concurrency
.
STM
)
error
{
registered
:=
false
registered
:=
false
for
i
:=
0
;
i
<
e
.
desired
;
i
++
{
for
i
:=
0
;
i
<
e
.
desired
;
i
++
{
psKey
:=
PsPath
+
strconv
.
Itoa
(
i
)
psKey
:=
PsPath
+
strconv
.
Itoa
(
i
)
...
@@ -165,26 +175,10 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context, port int) (int, er
...
@@ -165,26 +175,10 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context, port int) (int, er
log
.
Debugf
(
"got value (%s) for key: %s"
,
ps
,
psKey
)
log
.
Debugf
(
"got value (%s) for key: %s"
,
ps
,
psKey
)
if
ps
==
""
{
if
ps
==
""
{
resp
,
err
:=
e
.
etcdClient
.
Grant
(
context
.
TODO
(),
5
)
if
err
!=
nil
{
log
.
Fatal
(
err
)
}
// find the first id and write info
// find the first id and write info
pserverAddr
:=
e
.
externalIP
+
":"
+
strconv
.
Itoa
(
port
)
pserverAddr
:=
e
.
externalIP
+
":"
+
strconv
.
Itoa
(
port
)
c
.
Put
(
psKey
,
pserverAddr
,
clientv3
.
WithLease
(
resp
.
ID
))
c
.
Put
(
psKey
,
pserverAddr
,
clientv3
.
WithLease
(
e
.
sess
.
Lease
()
))
log
.
Debugf
(
"set pserver node %s with value %s"
,
psKey
,
pserverAddr
)
log
.
Debugf
(
"set pserver node %s with value %s"
,
psKey
,
pserverAddr
)
ch
,
kaerr
:=
e
.
etcdClient
.
KeepAlive
(
context
.
TODO
(),
resp
.
ID
)
if
kaerr
!=
nil
{
log
.
Errorf
(
"keepalive etcd node error: %v"
,
kaerr
)
return
kaerr
}
// Eat the keep alive message so etcd
// will not expire the lease.
go
func
(
ch
<-
chan
*
clientv3
.
LeaseKeepAliveResponse
)
{
ka
:=
<-
ch
log
.
Debugf
(
"keepalive: %d
\n
"
,
ka
.
TTL
)
}(
ch
)
log
.
Debug
(
"register finished"
)
log
.
Debug
(
"register finished"
)
idx
=
i
idx
=
i
registered
=
true
registered
=
true
...
@@ -207,7 +201,7 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context, port int) (int, er
...
@@ -207,7 +201,7 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context, port int) (int, er
// GetKey gets the value by the specified key
// GetKey gets the value by the specified key
func
(
e
*
EtcdClient
)
GetKey
(
key
string
,
timeout
time
.
Duration
)
([]
byte
,
error
)
{
func
(
e
*
EtcdClient
)
GetKey
(
key
string
,
timeout
time
.
Duration
)
([]
byte
,
error
)
{
ctx
,
cancel
:=
context
.
WithTimeout
(
context
.
Background
(),
timeout
)
ctx
,
cancel
:=
context
.
WithTimeout
(
context
.
Background
(),
timeout
)
resp
,
err
:=
e
.
etcdC
lient
.
Get
(
ctx
,
key
)
resp
,
err
:=
e
.
c
lient
.
Get
(
ctx
,
key
)
cancel
()
cancel
()
if
err
!=
nil
{
if
err
!=
nil
{
return
[]
byte
{},
err
return
[]
byte
{},
err
...
@@ -223,7 +217,27 @@ func (e *EtcdClient) GetKey(key string, timeout time.Duration) ([]byte, error) {
...
@@ -223,7 +217,27 @@ func (e *EtcdClient) GetKey(key string, timeout time.Duration) ([]byte, error) {
// PutKey put into etcd with value by key specified
// PutKey put into etcd with value by key specified
func
(
e
*
EtcdClient
)
PutKey
(
key
string
,
value
[]
byte
,
timeout
time
.
Duration
)
error
{
func
(
e
*
EtcdClient
)
PutKey
(
key
string
,
value
[]
byte
,
timeout
time
.
Duration
)
error
{
ctx
,
cancel
:=
context
.
WithTimeout
(
context
.
Background
(),
timeout
)
ctx
,
cancel
:=
context
.
WithTimeout
(
context
.
Background
(),
timeout
)
_
,
err
:=
e
.
etcdClient
.
Put
(
ctx
,
key
,
string
(
value
))
_
,
err
:=
e
.
client
.
Put
(
ctx
,
key
,
string
(
value
),
clientv3
.
WithLease
(
e
.
sess
.
Lease
()
))
cancel
()
cancel
()
return
err
return
err
}
}
// Shutdown shuts down the etcd client gracefully.
func
(
e
*
EtcdClient
)
Shutdown
()
error
{
var
err
error
if
e
.
sess
!=
nil
{
err
=
e
.
sess
.
Close
()
}
if
e
.
client
!=
nil
{
newErr
:=
e
.
client
.
Close
()
if
newErr
!=
nil
{
if
err
!=
nil
{
log
.
Errorln
(
newErr
)
}
else
{
err
=
newErr
}
}
}
return
err
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录