Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
72a73ab6
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
72a73ab6
编写于
6月 08, 2017
作者:
H
Helin Wang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
implement master server client, RPC part.
上级
f05649af
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
190 addition
and
9 deletion
+190
-9
go/cmd/master/master.go
go/cmd/master/master.go
+0
-2
go/connection/conn.go
go/connection/conn.go
+15
-0
go/master/client.go
go/master/client.go
+74
-0
go/master/client_test.go
go/master/client_test.go
+78
-0
go/master/service.go
go/master/service.go
+9
-2
go/pserver/client.go
go/pserver/client.go
+14
-5
未找到文件。
go/cmd/master/master.go
浏览文件 @
72a73ab6
...
...
@@ -50,7 +50,6 @@ func main() {
panic
(
"no valid datset specified."
)
}
idx
:=
0
for
_
,
path
:=
range
paths
{
f
,
err
:=
os
.
Open
(
path
)
if
err
!=
nil
{
...
...
@@ -66,7 +65,6 @@ func main() {
count
:=
index
.
NumChunks
()
for
i
:=
0
;
i
<
count
;
i
++
{
chunk
:=
master
.
Chunk
{
Idx
:
idx
,
Path
:
path
,
Index
:
*
index
.
ChunkIndex
(
i
),
}
...
...
go/connection/conn.go
浏览文件 @
72a73ab6
...
...
@@ -21,6 +21,18 @@ func New() *Conn {
return
c
}
// Close closes the connection.
func
(
c
*
Conn
)
Close
()
error
{
c
.
mu
.
Lock
()
defer
c
.
mu
.
Unlock
()
if
c
.
client
==
nil
{
return
nil
}
return
c
.
client
.
Close
()
}
// Connect connects the connection to a address.
func
(
c
*
Conn
)
Connect
(
addr
string
)
error
{
c
.
mu
.
Lock
()
...
...
@@ -56,6 +68,9 @@ func (c *Conn) Connect(addr string) error {
return
nil
}
// TODO(helin): refactor Call to be able to perform given retry
// policy.
// Call make a RPC call.
//
// Call will be blocked until the connection to remote RPC service
...
...
go/master/client.go
0 → 100644
浏览文件 @
72a73ab6
package
master
import
(
"log"
"time"
"github.com/PaddlePaddle/Paddle/go/connection"
)
// Addresser provide the address of the master server.
type
Addresser
interface
{
Address
()
string
}
// Client is the client of the master server.
type
Client
struct
{
conn
*
connection
.
Conn
}
// NewClient creates a new Client.
func
NewClient
(
addr
Addresser
)
*
Client
{
c
:=
&
Client
{}
c
.
conn
=
connection
.
New
()
go
c
.
monitorMaster
(
addr
)
return
c
}
func
(
c
*
Client
)
monitorMaster
(
addr
Addresser
)
{
lastMaster
:=
""
monitor
:=
func
()
{
curMaster
:=
addr
.
Address
()
if
curMaster
!=
lastMaster
{
if
curMaster
==
""
{
err
:=
c
.
conn
.
Close
()
if
err
!=
nil
{
log
.
Println
(
err
)
}
}
else
{
err
:=
c
.
conn
.
Connect
(
curMaster
)
if
err
!=
nil
{
log
.
Println
(
err
)
// connect to addr failed, set
// to last known addr in order
// to retry next time.
curMaster
=
lastMaster
}
}
}
lastMaster
=
curMaster
}
monitor
()
ticker
:=
time
.
NewTicker
(
10
*
time
.
Second
)
for
_
=
range
ticker
.
C
{
monitor
()
}
}
// GetTask gets a new task from the master server.
func
(
c
*
Client
)
GetTask
()
(
Task
,
error
)
{
var
dummy
int
var
t
Task
err
:=
c
.
conn
.
Call
(
"Service.GetTask"
,
dummy
,
&
t
)
return
t
,
err
}
// TaskFinished tells the master server a task is finished.
func
(
c
*
Client
)
TaskFinished
(
taskID
int
)
error
{
var
dummy
int
return
c
.
conn
.
Call
(
"Service.TaskFinished"
,
taskID
,
&
dummy
)
}
go/master/client_test.go
0 → 100644
浏览文件 @
72a73ab6
package
master_test
import
(
"fmt"
"net"
"net/http"
"net/rpc"
"strconv"
"strings"
"testing"
"time"
"github.com/PaddlePaddle/Paddle/go/master"
)
const
(
totalTask
=
20
chunkPerTask
=
10
)
var
port
int
func
init
()
{
l
,
err
:=
net
.
Listen
(
"tcp"
,
":0"
)
if
err
!=
nil
{
panic
(
err
)
}
ss
:=
strings
.
Split
(
l
.
Addr
()
.
String
(),
":"
)
p
,
err
:=
strconv
.
Atoi
(
ss
[
len
(
ss
)
-
1
])
if
err
!=
nil
{
panic
(
err
)
}
port
=
p
go
func
(
l
net
.
Listener
)
{
chunks
:=
make
([]
master
.
Chunk
,
totalTask
)
s
:=
master
.
NewService
(
chunks
,
chunkPerTask
,
time
.
Second
,
1
)
server
:=
rpc
.
NewServer
()
err
:=
server
.
Register
(
s
)
if
err
!=
nil
{
panic
(
err
)
}
mux
:=
http
.
NewServeMux
()
mux
.
Handle
(
rpc
.
DefaultRPCPath
,
server
)
err
=
http
.
Serve
(
l
,
mux
)
if
err
!=
nil
{
panic
(
err
)
}
}(
l
)
}
type
addresser
string
func
(
a
addresser
)
Address
()
string
{
return
string
(
a
)
}
func
TestClientFull
(
t
*
testing
.
T
)
{
c
:=
master
.
NewClient
(
addresser
(
fmt
.
Sprintf
(
":%d"
,
port
)))
for
i
:=
0
;
i
<
5
*
totalTask
/
chunkPerTask
;
i
++
{
task
,
err
:=
c
.
GetTask
()
if
err
!=
nil
{
panic
(
err
)
}
if
len
(
task
.
Chunks
)
!=
chunkPerTask
{
t
.
Fatal
(
"wrong number of chunk per task"
,
len
(
task
.
Chunks
))
}
err
=
c
.
TaskFinished
(
task
.
ID
)
if
err
!=
nil
{
panic
(
err
)
}
}
}
go/master/service.go
浏览文件 @
72a73ab6
...
...
@@ -75,9 +75,8 @@ func NewService(chunks []Chunk, chunksPerTask int, timeoutDur time.Duration, tim
// Chunk is a chunk of data consisted of several data instances.
type
Chunk
struct
{
Idx
int
// index of the chunk within the file
Path
string
Index
recordio
.
Index
//
bloc
k index
Index
recordio
.
Index
//
chun
k index
}
// Task is the basic unit of data instances assigned to trainers.
...
...
@@ -123,6 +122,8 @@ func (s *Service) GetTask(dummy int, task *Task) error {
return
err
}
*
task
=
t
.
Task
time
.
AfterFunc
(
s
.
timeoutDur
,
func
(
taskID
int
,
epoch
int
)
func
()
{
return
func
()
{
s
.
mu
.
Lock
()
...
...
@@ -174,5 +175,11 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error {
t
.
NumTimeout
=
0
s
.
taskQueues
.
Done
=
append
(
s
.
taskQueues
.
Done
,
t
)
delete
(
s
.
taskQueues
.
Pending
,
taskID
)
if
len
(
s
.
taskQueues
.
Todo
)
==
0
{
s
.
taskQueues
.
Todo
=
s
.
taskQueues
.
Done
s
.
taskQueues
.
Done
=
nil
}
return
s
.
snapshot
()
}
go/pserver/client.go
浏览文件 @
72a73ab6
...
...
@@ -47,7 +47,7 @@ func NewClient(l Lister, pserverNum int, sel Selector) *Client {
// monitorPservers monitors pserver addresses, and updates connection
// when the address changes.
func
(
c
*
Client
)
monitorPservers
(
l
Lister
,
pserverNum
int
)
{
known
Servers
:=
make
([]
Server
,
pserverNum
)
last
Servers
:=
make
([]
Server
,
pserverNum
)
ticker
:=
time
.
NewTicker
(
10
*
time
.
Second
)
monitor
:=
func
()
{
curServers
:=
make
([]
Server
,
pserverNum
)
...
...
@@ -56,8 +56,17 @@ func (c *Client) monitorPservers(l Lister, pserverNum int) {
curServers
[
l
.
Index
]
=
l
}
for
i
:=
range
knownServers
{
if
knownServers
[
i
]
.
Addr
!=
curServers
[
i
]
.
Addr
{
for
i
:=
range
lastServers
{
if
lastServers
[
i
]
.
Addr
!=
curServers
[
i
]
.
Addr
{
if
curServers
[
i
]
.
Addr
==
""
{
err
:=
c
.
pservers
[
i
]
.
Close
()
if
err
!=
nil
{
log
.
Println
(
err
)
}
continue
}
err
:=
c
.
pservers
[
i
]
.
Connect
(
curServers
[
i
]
.
Addr
)
if
err
!=
nil
{
log
.
Println
(
err
)
...
...
@@ -65,12 +74,12 @@ func (c *Client) monitorPservers(l Lister, pserverNum int) {
// connect to addr failed, set
// to last known addr in order
// to retry next time.
curServers
[
i
]
.
Addr
=
known
Servers
[
i
]
.
Addr
curServers
[
i
]
.
Addr
=
last
Servers
[
i
]
.
Addr
}
}
}
known
Servers
=
curServers
last
Servers
=
curServers
}
monitor
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录