Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
72a73ab6
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
72a73ab6
编写于
6月 08, 2017
作者:
H
Helin Wang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
implement master server client, RPC part.
上级
f05649af
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
190 addition
and
9 deletion
+190
-9
go/cmd/master/master.go
go/cmd/master/master.go
+0
-2
go/connection/conn.go
go/connection/conn.go
+15
-0
go/master/client.go
go/master/client.go
+74
-0
go/master/client_test.go
go/master/client_test.go
+78
-0
go/master/service.go
go/master/service.go
+9
-2
go/pserver/client.go
go/pserver/client.go
+14
-5
未找到文件。
go/cmd/master/master.go
浏览文件 @
72a73ab6
...
...
@@ -50,7 +50,6 @@ func main() {
panic
(
"no valid datset specified."
)
}
idx
:=
0
for
_
,
path
:=
range
paths
{
f
,
err
:=
os
.
Open
(
path
)
if
err
!=
nil
{
...
...
@@ -66,7 +65,6 @@ func main() {
count
:=
index
.
NumChunks
()
for
i
:=
0
;
i
<
count
;
i
++
{
chunk
:=
master
.
Chunk
{
Idx
:
idx
,
Path
:
path
,
Index
:
*
index
.
ChunkIndex
(
i
),
}
...
...
go/connection/conn.go
浏览文件 @
72a73ab6
...
...
@@ -21,6 +21,18 @@ func New() *Conn {
return
c
}
// Close closes the connection.
func
(
c
*
Conn
)
Close
()
error
{
c
.
mu
.
Lock
()
defer
c
.
mu
.
Unlock
()
if
c
.
client
==
nil
{
return
nil
}
return
c
.
client
.
Close
()
}
// Connect connects the connection to a address.
func
(
c
*
Conn
)
Connect
(
addr
string
)
error
{
c
.
mu
.
Lock
()
...
...
@@ -56,6 +68,9 @@ func (c *Conn) Connect(addr string) error {
return
nil
}
// TODO(helin): refactor Call to be able to perform given retry
// policy.
// Call make a RPC call.
//
// Call will be blocked until the connection to remote RPC service
...
...
go/master/client.go
0 → 100644
浏览文件 @
72a73ab6
package
master
import
(
"log"
"time"
"github.com/PaddlePaddle/Paddle/go/connection"
)
// Addresser provide the address of the master server.
type
Addresser
interface
{
Address
()
string
}
// Client is the client of the master server.
type
Client
struct
{
conn
*
connection
.
Conn
}
// NewClient creates a new Client.
func
NewClient
(
addr
Addresser
)
*
Client
{
c
:=
&
Client
{}
c
.
conn
=
connection
.
New
()
go
c
.
monitorMaster
(
addr
)
return
c
}
func
(
c
*
Client
)
monitorMaster
(
addr
Addresser
)
{
lastMaster
:=
""
monitor
:=
func
()
{
curMaster
:=
addr
.
Address
()
if
curMaster
!=
lastMaster
{
if
curMaster
==
""
{
err
:=
c
.
conn
.
Close
()
if
err
!=
nil
{
log
.
Println
(
err
)
}
}
else
{
err
:=
c
.
conn
.
Connect
(
curMaster
)
if
err
!=
nil
{
log
.
Println
(
err
)
// connect to addr failed, set
// to last known addr in order
// to retry next time.
curMaster
=
lastMaster
}
}
}
lastMaster
=
curMaster
}
monitor
()
ticker
:=
time
.
NewTicker
(
10
*
time
.
Second
)
for
_
=
range
ticker
.
C
{
monitor
()
}
}
// GetTask gets a new task from the master server.
func
(
c
*
Client
)
GetTask
()
(
Task
,
error
)
{
var
dummy
int
var
t
Task
err
:=
c
.
conn
.
Call
(
"Service.GetTask"
,
dummy
,
&
t
)
return
t
,
err
}
// TaskFinished tells the master server a task is finished.
func
(
c
*
Client
)
TaskFinished
(
taskID
int
)
error
{
var
dummy
int
return
c
.
conn
.
Call
(
"Service.TaskFinished"
,
taskID
,
&
dummy
)
}
go/master/client_test.go
0 → 100644
浏览文件 @
72a73ab6
package
master_test
import
(
"fmt"
"net"
"net/http"
"net/rpc"
"strconv"
"strings"
"testing"
"time"
"github.com/PaddlePaddle/Paddle/go/master"
)
const
(
totalTask
=
20
chunkPerTask
=
10
)
var
port
int
func
init
()
{
l
,
err
:=
net
.
Listen
(
"tcp"
,
":0"
)
if
err
!=
nil
{
panic
(
err
)
}
ss
:=
strings
.
Split
(
l
.
Addr
()
.
String
(),
":"
)
p
,
err
:=
strconv
.
Atoi
(
ss
[
len
(
ss
)
-
1
])
if
err
!=
nil
{
panic
(
err
)
}
port
=
p
go
func
(
l
net
.
Listener
)
{
chunks
:=
make
([]
master
.
Chunk
,
totalTask
)
s
:=
master
.
NewService
(
chunks
,
chunkPerTask
,
time
.
Second
,
1
)
server
:=
rpc
.
NewServer
()
err
:=
server
.
Register
(
s
)
if
err
!=
nil
{
panic
(
err
)
}
mux
:=
http
.
NewServeMux
()
mux
.
Handle
(
rpc
.
DefaultRPCPath
,
server
)
err
=
http
.
Serve
(
l
,
mux
)
if
err
!=
nil
{
panic
(
err
)
}
}(
l
)
}
type
addresser
string
func
(
a
addresser
)
Address
()
string
{
return
string
(
a
)
}
func
TestClientFull
(
t
*
testing
.
T
)
{
c
:=
master
.
NewClient
(
addresser
(
fmt
.
Sprintf
(
":%d"
,
port
)))
for
i
:=
0
;
i
<
5
*
totalTask
/
chunkPerTask
;
i
++
{
task
,
err
:=
c
.
GetTask
()
if
err
!=
nil
{
panic
(
err
)
}
if
len
(
task
.
Chunks
)
!=
chunkPerTask
{
t
.
Fatal
(
"wrong number of chunk per task"
,
len
(
task
.
Chunks
))
}
err
=
c
.
TaskFinished
(
task
.
ID
)
if
err
!=
nil
{
panic
(
err
)
}
}
}
go/master/service.go
浏览文件 @
72a73ab6
...
...
@@ -75,9 +75,8 @@ func NewService(chunks []Chunk, chunksPerTask int, timeoutDur time.Duration, tim
// Chunk is a chunk of data consisted of several data instances.
type
Chunk
struct
{
Idx
int
// index of the chunk within the file
Path
string
Index
recordio
.
Index
//
bloc
k index
Index
recordio
.
Index
//
chun
k index
}
// Task is the basic unit of data instances assigned to trainers.
...
...
@@ -123,6 +122,8 @@ func (s *Service) GetTask(dummy int, task *Task) error {
return
err
}
*
task
=
t
.
Task
time
.
AfterFunc
(
s
.
timeoutDur
,
func
(
taskID
int
,
epoch
int
)
func
()
{
return
func
()
{
s
.
mu
.
Lock
()
...
...
@@ -174,5 +175,11 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error {
t
.
NumTimeout
=
0
s
.
taskQueues
.
Done
=
append
(
s
.
taskQueues
.
Done
,
t
)
delete
(
s
.
taskQueues
.
Pending
,
taskID
)
if
len
(
s
.
taskQueues
.
Todo
)
==
0
{
s
.
taskQueues
.
Todo
=
s
.
taskQueues
.
Done
s
.
taskQueues
.
Done
=
nil
}
return
s
.
snapshot
()
}
go/pserver/client.go
浏览文件 @
72a73ab6
...
...
@@ -47,7 +47,7 @@ func NewClient(l Lister, pserverNum int, sel Selector) *Client {
// monitorPservers monitors pserver addresses, and updates connection
// when the address changes.
func
(
c
*
Client
)
monitorPservers
(
l
Lister
,
pserverNum
int
)
{
known
Servers
:=
make
([]
Server
,
pserverNum
)
last
Servers
:=
make
([]
Server
,
pserverNum
)
ticker
:=
time
.
NewTicker
(
10
*
time
.
Second
)
monitor
:=
func
()
{
curServers
:=
make
([]
Server
,
pserverNum
)
...
...
@@ -56,8 +56,17 @@ func (c *Client) monitorPservers(l Lister, pserverNum int) {
curServers
[
l
.
Index
]
=
l
}
for
i
:=
range
knownServers
{
if
knownServers
[
i
]
.
Addr
!=
curServers
[
i
]
.
Addr
{
for
i
:=
range
lastServers
{
if
lastServers
[
i
]
.
Addr
!=
curServers
[
i
]
.
Addr
{
if
curServers
[
i
]
.
Addr
==
""
{
err
:=
c
.
pservers
[
i
]
.
Close
()
if
err
!=
nil
{
log
.
Println
(
err
)
}
continue
}
err
:=
c
.
pservers
[
i
]
.
Connect
(
curServers
[
i
]
.
Addr
)
if
err
!=
nil
{
log
.
Println
(
err
)
...
...
@@ -65,12 +74,12 @@ func (c *Client) monitorPservers(l Lister, pserverNum int) {
// connect to addr failed, set
// to last known addr in order
// to retry next time.
curServers
[
i
]
.
Addr
=
known
Servers
[
i
]
.
Addr
curServers
[
i
]
.
Addr
=
last
Servers
[
i
]
.
Addr
}
}
}
known
Servers
=
curServers
last
Servers
=
curServers
}
monitor
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录