提交 e4c7d8cc 编写于 作者: G gongweibao 提交者: GitHub

Merge branch 'develop' into recordio

group: deprecated-2017Q2
language: cpp language: cpp
cache: cache:
directories: directories:
- $HOME/third_party
- $HOME/.ccache - $HOME/.ccache
- $HOME/.cache/pip - $HOME/.cache/pip
- $TRAVIS_BUILD_DIR/build/third_party
sudo: required sudo: required
dist: trusty dist: trusty
os: os:
- linux - linux
env: env:
- JOB=DOCS - JOB=build_doc
- JOB=BUILD_AND_TEST - JOB=check_style
- JOB=PRE_COMMIT
addons: addons:
apt: apt:
packages: packages:
- gcc-4.8 - gcc-4.8
- g++-4.8 - g++-4.8
- gfortran-4.8
- git - git
- build-essential - build-essential
- python - python
...@@ -34,18 +33,7 @@ addons: ...@@ -34,18 +33,7 @@ addons:
- libtool - libtool
- ccache - ccache
before_install: before_install:
- | - if [[ "$JOB" == "check_style" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
if [ ${JOB} == "BUILD_AND_TEST" ]; then
local change_list=`git diff --name-only $TRAVIS_COMMIT_RANGE`
if [ $? -eq 0 ]; then # if git diff return no zero, then rerun unit test.
if ! echo ${change_list} | grep -qvE '(\.md$)|(\.rst$)|(\.jpg$)|(\.png$)'
then
echo "Only markdown docs were updated, stopping build process."
exit
fi
fi
fi
- if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
# Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python
# protobuf version. # protobuf version.
- pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker - pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker
...@@ -54,8 +42,8 @@ before_install: ...@@ -54,8 +42,8 @@ before_install:
- | - |
function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
script: script:
- | - |
timeout 2580 paddle/scripts/travis/main.sh # 43min timeout timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout
RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi; RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi;
notifications: notifications:
email: email:
......
...@@ -71,7 +71,7 @@ if(ANDROID) ...@@ -71,7 +71,7 @@ if(ANDROID)
"Disable RDMA when cross-compiling for Android" FORCE) "Disable RDMA when cross-compiling for Android" FORCE)
endif(ANDROID) endif(ANDROID)
set(THIRD_PARTY_PATH "${PROJ_ROOT}/third_party" CACHE STRING set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
"A path setting third party libraries download & build directories.") "A path setting third party libraries download & build directories.")
if (WITH_C_API AND WITH_PYTHON) if (WITH_C_API AND WITH_PYTHON)
......
...@@ -25,7 +25,7 @@ COPY ./paddle/scripts/docker/root/ /root/ ...@@ -25,7 +25,7 @@ COPY ./paddle/scripts/docker/root/ /root/
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y \ apt-get install -y \
git python-pip python-dev openssh-server bison \ git python-pip python-dev openssh-server bison \
wget unzip tar xz-utils bzip2 gzip coreutils \ wget unzip tar xz-utils bzip2 gzip coreutils ntp \
curl sed grep graphviz libjpeg-dev zlib1g-dev \ curl sed grep graphviz libjpeg-dev zlib1g-dev \
python-numpy python-matplotlib gcc g++ \ python-numpy python-matplotlib gcc g++ \
automake locales clang-format-3.8 swig doxygen cmake \ automake locales clang-format-3.8 swig doxygen cmake \
......
...@@ -21,7 +21,8 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -21,7 +21,8 @@ IF(NOT ${CBLAS_FOUND})
SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas) SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas)
SET(CBLAS_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE) SET(CBLAS_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE)
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/${LIBRARY_PREFIX}openblas${STATIC_LIBRARY_SUFFIX}" SET(CBLAS_LIBRARIES
"${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}"
CACHE FILEPATH "openblas library." FORCE) CACHE FILEPATH "openblas library." FORCE)
SET(COMMON_ARGS CC=${CMAKE_C_COMPILER} NO_SHARED=1 NO_LAPACK=1 libs) SET(COMMON_ARGS CC=${CMAKE_C_COMPILER} NO_SHARED=1 NO_LAPACK=1 libs)
......
...@@ -14,11 +14,41 @@ ...@@ -14,11 +14,41 @@
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
# Print and set the protobuf library information,
# finish this cmake process and exit from this file.
macro(PROMPT_PROTOBUF_LIB) macro(PROMPT_PROTOBUF_LIB)
SET(protobuf_DEPS ${ARGN})
MESSAGE(STATUS "Protobuf protoc executable: ${PROTOBUF_PROTOC_EXECUTABLE}") MESSAGE(STATUS "Protobuf protoc executable: ${PROTOBUF_PROTOC_EXECUTABLE}")
MESSAGE(STATUS "Protobuf library: ${PROTOBUF_LIBRARY}") MESSAGE(STATUS "Protobuf library: ${PROTOBUF_LIBRARY}")
MESSAGE(STATUS "Protobuf version: ${PROTOBUF_VERSION}") MESSAGE(STATUS "Protobuf version: ${PROTOBUF_VERSION}")
INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
# Assuming that all the protobuf libraries are of the same type.
IF(${PROTOBUF_LIBRARY} MATCHES "${CMAKE_STATIC_LIBRARY_SUFFIX}$")
SET(protobuf_LIBTYPE STATIC)
ELSEIF(${PROTOBUF_LIBRARY} MATCHES "${CMAKE_SHARED_LIBRARY_SUFFIX}$")
SET(protobuf_LIBTYPE SHARED)
ELSE()
MESSAGE(FATAL_ERROR "Unknown library type: ${PROTOBUF_LIBRARY}")
ENDIF()
ADD_LIBRARY(protobuf ${protobuf_LIBTYPE} IMPORTED GLOBAL)
SET_PROPERTY(TARGET protobuf PROPERTY IMPORTED_LOCATION ${PROTOBUF_LIBRARY})
ADD_LIBRARY(protobuf_lite ${protobuf_LIBTYPE} IMPORTED GLOBAL)
SET_PROPERTY(TARGET protobuf_lite PROPERTY IMPORTED_LOCATION ${PROTOBUF_LITE_LIBRARY})
ADD_LIBRARY(protoc ${protobuf_LIBTYPE} IMPORTED GLOBAL)
SET_PROPERTY(TARGET protoc PROPERTY IMPORTED_LOCATION ${PROTOC_LIBRARY})
FOREACH(dep ${protobuf_DEPS})
ADD_DEPENDENCIES(protobuf ${dep})
ADD_DEPENDENCIES(protobuf_lite ${dep})
ADD_DEPENDENCIES(protoc ${dep})
ENDFOREACH()
LIST(APPEND external_project_dependencies protobuf)
RETURN() RETURN()
endmacro() endmacro()
macro(SET_PROTOBUF_VERSION) macro(SET_PROTOBUF_VERSION)
...@@ -43,22 +73,23 @@ if (NOT "${PROTOBUF_ROOT}" STREQUAL "") ...@@ -43,22 +73,23 @@ if (NOT "${PROTOBUF_ROOT}" STREQUAL "")
endif() endif()
FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/${TARGET_NAME}) STRING(REPLACE "extern_" "" TARGET_DIR_NAME "${TARGET_NAME}")
SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/${TARGET_NAME}) SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/${TARGET_DIR_NAME})
SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/${TARGET_DIR_NAME})
SET(${TARGET_NAME}_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" PARENT_SCOPE) SET(${TARGET_NAME}_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" PARENT_SCOPE)
SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" PARENT_SCOPE) SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" PARENT_SCOPE)
SET(${TARGET_NAME}_LITE_LIBRARY SET(${TARGET_NAME}_LITE_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite${STATIC_LIBRARY_SUFFIX}" "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite${CMAKE_STATIC_LIBRARY_SUFFIX}"
PARENT_SCOPE) PARENT_SCOPE)
SET(${TARGET_NAME}_LIBRARY SET(${TARGET_NAME}_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf${STATIC_LIBRARY_SUFFIX}" "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf${CMAKE_STATIC_LIBRARY_SUFFIX}"
PARENT_SCOPE) PARENT_SCOPE)
SET(${TARGET_NAME}_PROTOC_LIBRARY SET(${TARGET_NAME}_PROTOC_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc${STATIC_LIBRARY_SUFFIX}" "${PROTOBUF_INSTALL_DIR}/lib/libprotoc${CMAKE_STATIC_LIBRARY_SUFFIX}"
PARENT_SCOPE) PARENT_SCOPE)
SET(${TARGET_NAME}_PROTOC_EXECUTABLE SET(${TARGET_NAME}_PROTOC_EXECUTABLE
"${PROTOBUF_INSTALL_DIR}/bin/protoc${EXECUTABLE_SUFFIX}" "${PROTOBUF_INSTALL_DIR}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX}"
PARENT_SCOPE) PARENT_SCOPE)
SET(OPTIONAL_CACHE_ARGS "") SET(OPTIONAL_CACHE_ARGS "")
...@@ -109,6 +140,8 @@ IF(NOT CMAKE_CROSSCOMPILING) ...@@ -109,6 +140,8 @@ IF(NOT CMAKE_CROSSCOMPILING)
SET_PROTOBUF_VERSION() SET_PROTOBUF_VERSION()
IF("${PROTOBUF_VERSION}" VERSION_LESS "3.1.0") IF("${PROTOBUF_VERSION}" VERSION_LESS "3.1.0")
SET(PROTOBUF_FOUND OFF) SET(PROTOBUF_FOUND OFF)
ELSE()
PROMPT_PROTOBUF_LIB()
ENDIF() ENDIF()
ENDIF(PROTOBUF_FOUND) ENDIF(PROTOBUF_FOUND)
ELSE() ELSE()
...@@ -120,18 +153,22 @@ ELSE() ...@@ -120,18 +153,22 @@ ELSE()
ENDIF() ENDIF()
IF(NOT PROTOBUF_FOUND) IF(NOT PROTOBUF_FOUND)
build_protobuf(protobuf FALSE) build_protobuf(extern_protobuf FALSE)
LIST(APPEND external_project_dependencies protobuf)
SET(PROTOBUF_INCLUDE_DIR ${protobuf_INCLUDE_DIR} SET(PROTOBUF_INCLUDE_DIR ${extern_protobuf_INCLUDE_DIR}
CACHE PATH "protobuf include directory." FORCE) CACHE PATH "protobuf include directory." FORCE)
IF(NOT CMAKE_CROSSCOMPILING) SET(PROTOBUF_LITE_LIBRARY ${extern_protobuf_LITE_LIBRARY}
SET(PROTOBUF_PROTOC_EXECUTABLE ${protobuf_PROTOC_EXECUTABLE} CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY ${extern_protobuf_LIBRARY}
CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY ${extern_protobuf_PROTOC_LIBRARY}
CACHE FILEPATH "protoc library." FORCE)
IF(CMAKE_CROSSCOMPILING)
PROMPT_PROTOBUF_LIB(protobuf_host extern_protobuf)
ELSE()
SET(PROTOBUF_PROTOC_EXECUTABLE ${extern_protobuf_PROTOC_EXECUTABLE}
CACHE FILEPATH "protobuf executable." FORCE) CACHE FILEPATH "protobuf executable." FORCE)
PROMPT_PROTOBUF_LIB(extern_protobuf)
ENDIF() ENDIF()
SET(PROTOBUF_LITE_LIBRARY ${protobuf_LITE_LIBRARY} CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY ${protobuf_LIBRARY} CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY ${protobuf_PROTOC_LIBRARY} CACHE FILEPATH "protoc library." FORCE)
ENDIF(NOT PROTOBUF_FOUND) ENDIF(NOT PROTOBUF_FOUND)
PROMPT_PROTOBUF_LIB()
\ No newline at end of file
...@@ -84,24 +84,6 @@ IF(DEFINED CMAKE_SYSTEM_NAME) ...@@ -84,24 +84,6 @@ IF(DEFINED CMAKE_SYSTEM_NAME)
ENDIF() ENDIF()
ENDIF() ENDIF()
# prefix and suffix on different os
IF(WIN32)
SET(LIBRARY_PREFIX "")
SET(SHARED_LIBRARY_SUFFIX ".dll")
SET(STATIC_LIBRARY_SUFFIX ".lib")
SET(EXECUTABLE_SUFFIX ".exe")
ELSE(WIN32)
SET(LIBRARY_PREFIX "lib")
IF(APPLE)
SET(SHARED_LIBRARY_SUFFIX ".dylib")
ELSE(APPLE)
SET(SHARED_LIBRARY_SUFFIX ".so")
ENDIF(APPLE)
SET(STATIC_LIBRARY_SUFFIX ".a")
SET(EXECUTABLE_SUFFIX "")
ENDIF(WIN32)
# external dependencies log output # external dependencies log output
SET(EXTERNAL_PROJECT_LOG_ARGS SET(EXTERNAL_PROJECT_LOG_ARGS
LOG_DOWNLOAD 0 # Wrap download in script to log output LOG_DOWNLOAD 0 # Wrap download in script to log output
......
...@@ -99,3 +99,12 @@ value_printer ...@@ -99,3 +99,12 @@ value_printer
.. automodule:: paddle.v2.evaluator .. automodule:: paddle.v2.evaluator
:members: value_printer :members: value_printer
:noindex: :noindex:
Detection
=====
detection_map
-------------
.. automodule:: paddle.v2.evaluator
:members: detection_map
:noindex:
package main package main
import ( import (
"fmt"
"net" "net"
"net/http" "net/http"
"net/rpc" "net/rpc"
"strconv" "strconv"
"strings"
"time" "time"
"github.com/namsral/flag" "github.com/namsral/flag"
log "github.com/sirupsen/logrus"
"github.com/PaddlePaddle/Paddle/go/master" "github.com/PaddlePaddle/Paddle/go/master"
"github.com/PaddlePaddle/Paddle/go/utils/networkhelper"
) )
func main() { func main() {
port := flag.Int("port", 8080, "port of the master server.") port := flag.Int("port", 8080, "port of the master server.")
ttlSec := flag.Int("ttl", 60, "etcd lease TTL in seconds.")
faultTolerance := flag.Bool("fault_tolerance", false, "enable fault tolerance (requires etcd).") endpoints := flag.String("endpoints", "http://127.0.0.1:2379", "comma separated etcd endpoints. If empty, fault tolerance will not be enabled.")
taskTimeoutDur := flag.Duration("task_timout_dur", 20*time.Minute, "task timout duration.") taskTimeoutDur := flag.Duration("task_timout_dur", 20*time.Minute, "task timout duration.")
taskTimeoutMax := flag.Int("task_timeout_max", 3, "max timtout count for each task before it being declared failed task.") taskTimeoutMax := flag.Int("task_timeout_max", 3, "max timtout count for each task before it being declared failed task.")
chunkPerTask := flag.Int("chunk_per_task", 10, "chunk per task.") chunkPerTask := flag.Int("chunk_per_task", 10, "chunk per task.")
flag.Parse() flag.Parse()
if *faultTolerance { if *endpoints == "" {
panic("fault tolernance not implemented.") log.Warningln("-endpoints not set, fault tolerance not be enabled.")
}
var store master.Store
if *endpoints != "" {
eps := strings.Split(*endpoints, ",")
ip, err := networkhelper.GetExternalIP()
if err != nil {
log.Fatal(err)
}
addr := fmt.Sprintf("%s:%d", ip, *port)
store, err = master.NewEtcdClient(eps, addr, master.DefaultLockPath, master.DefaultAddrPath, master.DefaultStatePath, *ttlSec)
if err != nil {
log.Fatal(err)
}
} else {
store = &master.InMemStore{}
}
s, err := master.NewService(store, *chunkPerTask, *taskTimeoutDur, *taskTimeoutMax)
if err != nil {
log.Fatal(err)
} }
s := master.NewService(*chunkPerTask, *taskTimeoutDur, *taskTimeoutMax) err = rpc.Register(s)
err := rpc.Register(s)
if err != nil { if err != nil {
panic(err) log.Fatal(err)
} }
rpc.HandleHTTP() rpc.HandleHTTP()
l, err := net.Listen("tcp", ":"+strconv.Itoa(*port)) l, err := net.Listen("tcp", ":"+strconv.Itoa(*port))
if err != nil { if err != nil {
panic(err) log.Fatal(err)
} }
err = http.Serve(l, nil) err = http.Serve(l, nil)
if err != nil { if err != nil {
panic(err) log.Fatal(err)
} }
} }
...@@ -5,18 +5,36 @@ import ( ...@@ -5,18 +5,36 @@ import (
"net/http" "net/http"
"net/rpc" "net/rpc"
"strconv" "strconv"
"time"
"github.com/namsral/flag" "github.com/namsral/flag"
"github.com/PaddlePaddle/Paddle/go/pserver" "github.com/PaddlePaddle/Paddle/go/pserver"
log "github.com/sirupsen/logrus"
) )
func main() { func main() {
port := flag.Int("port", 0, "port of the pserver") port := flag.Int("port", 0, "port of the pserver")
etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379",
"comma separated endpoint string for pserver to connect to etcd")
etcdTimeout := flag.Int("etcd-timeout", 5, "timeout for etcd calls")
numPservers := flag.Int("num-pservers", 1, "total pserver count in a training job")
logLevel := flag.String("log-level", "info",
"log level, possible values: debug, info, warning, error, fatal, panic")
flag.Parse() flag.Parse()
s := pserver.NewService() level, err := log.ParseLevel(*logLevel)
err := rpc.Register(s) if err != nil {
panic(err)
}
log.SetLevel(level)
timeout := time.Second * time.Duration((*etcdTimeout))
s, err := pserver.NewService(*etcdEndpoint, *numPservers, timeout)
if err != nil {
panic(err)
}
err = rpc.Register(s)
if err != nil { if err != nil {
panic(err) panic(err)
} }
...@@ -27,7 +45,9 @@ func main() { ...@@ -27,7 +45,9 @@ func main() {
panic(err) panic(err)
} }
log.Infof("start pserver at port %d", *port)
err = http.Serve(l, nil) err = http.Serve(l, nil)
if err != nil { if err != nil {
panic(err) panic(err)
} }
......
...@@ -47,9 +47,13 @@ func TestGetFinishTask(t *testing.T) { ...@@ -47,9 +47,13 @@ func TestGetFinishTask(t *testing.T) {
} }
go func(l net.Listener) { go func(l net.Listener) {
s := NewService(chunkPerTask, time.Second, 1) s, err := NewService(&InMemStore{}, chunkPerTask, time.Second, 1)
if err != nil {
panic(err)
}
server := rpc.NewServer() server := rpc.NewServer()
err := server.Register(s) err = server.Register(s)
if err != nil { if err != nil {
panic(err) panic(err)
} }
......
...@@ -33,9 +33,13 @@ func TestNextRecord(t *testing.T) { ...@@ -33,9 +33,13 @@ func TestNextRecord(t *testing.T) {
} }
go func(l net.Listener) { go func(l net.Listener) {
s := master.NewService(10, time.Second, 1) s, err := master.NewService(&master.InMemStore{}, 10, time.Second, 1)
if err != nil {
panic(err)
}
server := rpc.NewServer() server := rpc.NewServer()
err := server.Register(s) err = server.Register(s)
if err != nil { if err != nil {
panic(err) panic(err)
} }
......
package master
import (
"context"
"time"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/clientv3/concurrency"
log "github.com/sirupsen/logrus"
)
const (
// DefaultLockPath is the default etcd master lock path.
DefaultLockPath = "/master/lock"
// DefaultStatePath is the default etcd key for master state.
DefaultStatePath = "/master/state"
// DefaultAddrPath is the default etcd key for master address.
DefaultAddrPath = "/master/addr"
)
// EtcdClient is the etcd client that master uses for fault tolerance
// and service registry.
type EtcdClient struct {
lockPath string
statePath string
client *clientv3.Client
lock *concurrency.Mutex
}
// NewEtcdClient creates a new EtcdClient.
func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePath string, ttlSec int) (*EtcdClient, error) {
log.Debugf("Connecting to etcd at %v", endpoints)
// TODO(helin): gracefully shutdown etcd store. Becuase etcd
// store holds a etcd lock, even though the lock will expire
// when the lease timeout, we need to implement graceful
// shutdown to release the lock.
cli, err := clientv3.New(clientv3.Config{
Endpoints: endpoints,
DialTimeout: dialTimeout,
})
if err != nil {
return nil, err
}
sess, err := concurrency.NewSession(cli, concurrency.WithTTL(ttlSec))
if err != nil {
return nil, err
}
lock := concurrency.NewMutex(sess, lockPath)
// It's fine for the lock to get stuck, in this case we have
// multiple master servers running (only configured to have
// one master running, but split-brain problem may cuase
// multiple master servers running), and the cluster management
// software will kill one of them.
log.Debugf("Trying to acquire lock at %s.", lockPath)
err = lock.Lock(context.TODO())
if err != nil {
return nil, err
}
log.Debugf("Successfully acquired lock at %s.", lockPath)
put := clientv3.OpPut(addrPath, string(addr))
resp, err := cli.Txn(context.Background()).If(lock.IsOwner()).Then(put).Commit()
if err != nil {
return nil, err
}
if !resp.Succeeded {
log.Fatal("No longer owns the master lock. Exiting.")
}
e := &EtcdClient{
lockPath: lockPath,
statePath: statePath,
client: cli,
lock: lock,
}
return e, nil
}
// Save saves the state into the etcd.
func (e *EtcdClient) Save(state []byte) error {
ctx := context.TODO()
put := clientv3.OpPut(e.statePath, string(state))
resp, err := e.client.Txn(ctx).If(e.lock.IsOwner()).Then(put).Commit()
if err != nil {
return err
}
if !resp.Succeeded {
log.Errorln("No longer owns the lock, trying to lock again")
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
err := e.lock.Lock(ctx)
cancel()
if err != nil {
// We lost the master lock and can not acquire
// it back, it means some other master is
// already started. We don't want cluster
// managment system to kill the master server
// who is holding the lock and running
// correctly. So the most feasible solution is
// to kill current master server. The current
// state is not saved, but the trainer's RPC
// call will fail, so the trainer will retry.
log.Fatalf("Could not acquire the lock at %s: %v. Exiting.", e.lockPath, err)
}
log.Infof("Successfully acquired lock at %s.", e.lockPath)
return e.Save(state)
}
return nil
}
// Load loads the state from etcd.
func (e *EtcdClient) Load() ([]byte, error) {
ctx := context.TODO()
get := clientv3.OpGet(e.statePath)
resp, err := e.client.Txn(ctx).If(e.lock.IsOwner()).Then(get).Commit()
if err != nil {
return nil, err
}
if !resp.Succeeded {
log.Errorln("No longer owns the lock, trying to lock and load again.")
err = e.lock.Lock(context.Background())
if err != nil {
return nil, err
}
return e.Load()
}
kvs := resp.Responses[0].GetResponseRange().Kvs
if len(kvs) == 0 {
// No state exists
return nil, nil
}
state := kvs[0].Value
return state, nil
}
package master
import "sync"
// InMemStore is an in memory implementation of Store interface.
//
// It does not tolerate the fault that casues the program to crash.
type InMemStore struct {
mu sync.Mutex
buf []byte
}
// Save saves the state into the in-memory store.
func (m *InMemStore) Save(state []byte) error {
m.mu.Lock()
defer m.mu.Unlock()
m.buf = state
return nil
}
// Load loads the state from the in-memory store.
func (m *InMemStore) Load() ([]byte, error) {
m.mu.Lock()
defer m.mu.Unlock()
return m.buf, nil
}
package master package master
import ( import (
"bytes"
"compress/gzip"
"encoding/gob"
"errors" "errors"
"os" "os"
"path/filepath" "path/filepath"
...@@ -12,24 +15,54 @@ import ( ...@@ -12,24 +15,54 @@ import (
"github.com/PaddlePaddle/recordio" "github.com/PaddlePaddle/recordio"
) )
const (
dialTimeout = 5 * time.Second
)
// Store is the interface for save and load the master state.
type Store interface {
Save([]byte) error
Load() ([]byte, error)
}
// Chunk is a chunk of data consisted of several data instances.
type Chunk struct {
Path string
Index recordio.Index // chunk index
}
// Task is the basic unit of data instances assigned to trainers.
type Task struct {
ID int
Chunks []Chunk
}
type taskEntry struct {
Epoch int
NumTimeout int
Task Task
}
type taskQueues struct {
Todo []taskEntry
Pending map[int]taskEntry // map from task ID to task entry
Done []taskEntry
Failed []Task
}
// Service is the master server service. // Service is the master server service.
type Service struct { type Service struct {
chunksPerTask int chunksPerTask int
timeoutDur time.Duration timeoutDur time.Duration
timeoutMax int timeoutMax int
ready chan struct{} ready chan struct{}
store Store
mu sync.Mutex mu sync.Mutex
initDone bool initDone bool
taskQueues taskQueues taskQueues taskQueues
} }
// Recover recovers service state from etcd.
func Recover() (*Service, error) {
// TODO(helin): recover from snapshot state from etcd.
return nil, nil
}
func partition(chunks []Chunk, chunksPerTask int) []taskEntry { func partition(chunks []Chunk, chunksPerTask int) []taskEntry {
id := 0 id := 0
if chunksPerTask <= 0 { if chunksPerTask <= 0 {
...@@ -58,7 +91,7 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry { ...@@ -58,7 +91,7 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry {
} }
// NewService creates a new service. // NewService creates a new service.
func NewService(chunksPerTask int, timeoutDur time.Duration, timeoutMax int) *Service { func NewService(store Store, chunksPerTask int, timeoutDur time.Duration, timeoutMax int) (*Service, error) {
s := &Service{} s := &Service{}
s.chunksPerTask = chunksPerTask s.chunksPerTask = chunksPerTask
s.timeoutDur = timeoutDur s.timeoutDur = timeoutDur
...@@ -66,38 +99,82 @@ func NewService(chunksPerTask int, timeoutDur time.Duration, timeoutMax int) *Se ...@@ -66,38 +99,82 @@ func NewService(chunksPerTask int, timeoutDur time.Duration, timeoutMax int) *Se
s.taskQueues = taskQueues{} s.taskQueues = taskQueues{}
s.taskQueues.Pending = make(map[int]taskEntry) s.taskQueues.Pending = make(map[int]taskEntry)
s.ready = make(chan struct{}) s.ready = make(chan struct{})
return s s.store = store
} recovered, err := s.recover()
if err != nil {
return nil, err
}
// Chunk is a chunk of data consisted of several data instances. if recovered {
type Chunk struct { // Recovered. Now the state is already initialized,
Path string // and the master is ready.
Index recordio.Index // chunk index s.initDone = true
} close(s.ready)
log.Info("Master recovered from saved state.")
}
// Task is the basic unit of data instances assigned to trainers. return s, nil
type Task struct {
ID int
Chunks []Chunk
} }
type taskEntry struct { // recover recovers service state from etcd.
Epoch int func (s *Service) recover() (bool, error) {
NumTimeout int state, err := s.store.Load()
Task Task if err != nil {
} return false, err
}
type taskQueues struct { if state == nil {
Todo []taskEntry log.Infoln("No state exists, not recovered.")
Pending map[int]taskEntry // map from task ID to task entry return false, nil
Done []taskEntry }
Failed []Task
log.Infof("Loaded snapshot of size: %d bytes.", len(state))
gr, err := gzip.NewReader(bytes.NewReader(state))
if err != nil {
return false, err
}
dec := gob.NewDecoder(gr)
var tqs taskQueues
err = dec.Decode(&tqs)
if err != nil {
return false, err
}
err = gr.Close()
if err != nil {
// Only close failed, recover actually succeed, so
// just log error.
log.Errorln(err)
}
s.taskQueues = tqs
return true, nil
} }
// *must* be called with s.mu being held. // snapshot *must* be called with s.mu being held.
func (s *Service) snapshot() error { func (s *Service) snapshot() error {
// TODO(helin): snapshot state on etcd. // TOOD(helin): etcd request has a size limit, so the snapshot
return nil // size is limited by the max request size. We should either
// divide the snapshot into smaller chunks and save under
// different keys, or configure the request size to be big
// enough:
// https://github.com/coreos/etcd/blob/2f84f3d8d8ed8f9537ab6ffa44a3a1c7eddfa9b1/embed/config.go#L44
var buf bytes.Buffer
gw := gzip.NewWriter(&buf)
enc := gob.NewEncoder(gw)
err := enc.Encode(s.taskQueues)
if err != nil {
return err
}
err = gw.Close()
if err != nil {
return err
}
state := buf.Bytes()
log.Infof("Saving snapshot of size: %d bytes.", len(state))
return s.store.Save(state)
} }
func readChunks(globPaths []string) ([]Chunk, error) { func readChunks(globPaths []string) ([]Chunk, error) {
...@@ -207,12 +284,12 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { ...@@ -207,12 +284,12 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() {
t.NumTimeout++ t.NumTimeout++
if t.NumTimeout > s.timeoutMax { if t.NumTimeout > s.timeoutMax {
log.Warningf("Task %v timed out %d times, discard.\n", t.Task, t.NumTimeout) log.Warningf("Task %v timed out %d times, discard.", t.Task, t.NumTimeout)
s.taskQueues.Failed = append(s.taskQueues.Failed, t.Task) s.taskQueues.Failed = append(s.taskQueues.Failed, t.Task)
return return
} }
log.Warningf("Task %v timed out %d times, retry.\n", t.Task, t.NumTimeout) log.Warningf("Task %v timed out %d times, retry.", t.Task, t.NumTimeout)
s.taskQueues.Todo = append(s.taskQueues.Todo, t) s.taskQueues.Todo = append(s.taskQueues.Todo, t)
} }
} }
......
...@@ -133,7 +133,7 @@ func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, ...@@ -133,7 +133,7 @@ func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter,
if err != nil { if err != nil {
if err.Error() == pserver.AlreadyInitialized { if err.Error() == pserver.AlreadyInitialized {
log.Warningf("parameter %s already initialized, treat paddle_init_param as sucessful.\n", name) log.Warningf("parameter %s already initialized, treat paddle_init_param as sucessful.", name)
return C.PSERVER_OK return C.PSERVER_OK
} }
log.Errorln(err) log.Errorln(err)
...@@ -200,7 +200,7 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter, ...@@ -200,7 +200,7 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter,
for i, p := range ps { for i, p := range ps {
pn[i] = p.Name pn[i] = p.Name
} }
log.Errorf("pserver returned wrong number of parameters. Requested: %s, returned: %s.\n", strings.Join(pn, ", "), strings.Join(ns, ", ")) log.Errorf("pserver returned wrong number of parameters. Requested: %s, returned: %s.", strings.Join(pn, ", "), strings.Join(ns, ", "))
return C.PSERVER_ERROR return C.PSERVER_ERROR
} }
...@@ -210,7 +210,7 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter, ...@@ -210,7 +210,7 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter,
for i, p := range ps { for i, p := range ps {
pn[i] = p.Name pn[i] = p.Name
} }
log.Errorf("pserver returned wrong parameters, or not in requested order. Requested: %s, returned: %s.\n", strings.Join(pn, ", "), strings.Join(ns, ", ")) log.Errorf("pserver returned wrong parameters, or not in requested order. Requested: %s, returned: %s.", strings.Join(pn, ", "), strings.Join(ns, ", "))
return C.PSERVER_ERROR return C.PSERVER_ERROR
} }
} }
......
...@@ -7,6 +7,7 @@ import ( ...@@ -7,6 +7,7 @@ import (
"strconv" "strconv"
"strings" "strings"
"testing" "testing"
"time"
"github.com/PaddlePaddle/Paddle/go/pserver" "github.com/PaddlePaddle/Paddle/go/pserver"
) )
...@@ -30,9 +31,12 @@ func init() { ...@@ -30,9 +31,12 @@ func init() {
port[i] = p port[i] = p
go func(l net.Listener) { go func(l net.Listener) {
s := pserver.NewService() s, err := pserver.NewService("", time.Second*5)
if err != nil {
panic(err)
}
server := rpc.NewServer() server := rpc.NewServer()
err := server.Register(s) err = server.Register(s)
if err != nil { if err != nil {
panic(err) panic(err)
} }
......
package pserver package pserver
import ( import (
"context"
"errors" "errors"
"fmt" "fmt"
"strconv"
"strings"
"sync" "sync"
"time"
"github.com/PaddlePaddle/Paddle/go/utils/networkhelper"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/clientv3/concurrency"
log "github.com/sirupsen/logrus"
) )
// ElementType is the type of elements of a Parameter. // ElementType is the type of elements of a Parameter.
...@@ -24,6 +33,9 @@ const ( ...@@ -24,6 +33,9 @@ const (
Float64 Float64
) )
// PsDesired is etcd path for store desired pserver count
const PsDesired = "/ps_desired"
// Parameter is a piece of data to sync with the parameter server. // Parameter is a piece of data to sync with the parameter server.
type Parameter struct { type Parameter struct {
Name string Name string
...@@ -47,14 +59,154 @@ type Service struct { ...@@ -47,14 +59,154 @@ type Service struct {
mu sync.Mutex mu sync.Mutex
opt *optimizer opt *optimizer
paramMap map[string]Parameter paramMap map[string]Parameter
etcdEndpoints string
etcdClient *clientv3.Client
// etcdTimeout is also used as retry intervals.
etcdTimeout time.Duration
// desired number of pservers in the job.
// assume desired will not change during one training job.
desired int
// FIXME: ensure GetExternalIP gets the correct ip for trainers to connect.
externalIP string
} }
// NewService creates a new service. // NewService creates a new service, will bypass etcd registration if no
func NewService() *Service { // endpoints specified.
func NewService(endpoints string, numPservers int, timeout time.Duration) (*Service, error) {
s := &Service{opt: newOptimizer(sgd, 0.005)} s := &Service{opt: newOptimizer(sgd, 0.005)}
s.paramMap = make(map[string]Parameter) s.paramMap = make(map[string]Parameter)
s.initialized = make(chan struct{}) s.initialized = make(chan struct{})
return s s.etcdEndpoints = endpoints
s.etcdTimeout = timeout
var err error
s.externalIP, err = networkhelper.GetExternalIP()
if err != nil {
return nil, err
}
if endpoints != "" {
// initialize connection to etcd, try
ep := strings.Split(s.etcdEndpoints, ",")
for {
cli, err := clientv3.New(clientv3.Config{
Endpoints: ep,
DialTimeout: s.etcdTimeout,
})
if err != nil {
log.Errorf("connect to etcd error: %v", err)
time.Sleep(s.etcdTimeout)
continue
}
s.etcdClient = cli
log.Debugf("inited client to %s", s.etcdEndpoints)
break
}
// init /ps_desired using transaction, for multiple pservers may want to write
// it at the same time.
for {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
_, err := s.initDesiredPsercers(ctx, numPservers)
cancel()
if err != nil {
log.Warn(err)
time.Sleep(s.etcdTimeout)
continue
}
break
}
// TODO: when implementing extending or reducing pservers, /ps_desired is
// changed, then we need to watch /ps_desired node for events. For now, just
// write once when init and read from it.
// wait and set s.desired init value
for {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
resp, err := s.etcdClient.Get(ctx, PsDesired)
cancel()
if err != nil {
log.Errorf("getting %s error: %v", PsDesired, err)
time.Sleep(s.etcdTimeout)
continue
}
if len(resp.Kvs) != 0 {
s.desired, err = strconv.Atoi(string(resp.Kvs[0].Value))
if err != nil {
log.Errorf("value of %s invalid %v\n", PsDesired, err)
time.Sleep(s.etcdTimeout)
// NOTE: wait util ps_desired value change
continue
}
break
}
}
// try register pserver node on etcd
for {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
_, err := s.registerPserverEtcd(ctx)
cancel()
if err != nil {
log.Warn(err)
time.Sleep(s.etcdTimeout)
continue
}
break
}
} // if endpoints != ""
// Bypass etcd registration if no endpoints specified
return s, nil
}
func (s *Service) initDesiredPsercers(ctx context.Context, numPservers int) (*clientv3.TxnResponse, error) {
return concurrency.NewSTM(s.etcdClient, func(c concurrency.STM) error {
dsStr := c.Get(PsDesired)
if dsStr == "" {
c.Put(PsDesired, strconv.Itoa(numPservers))
}
return nil
}, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads))
}
// registerPserverEtcd registers pserver node on etcd using transaction.
func (s *Service) registerPserverEtcd(ctx context.Context) (*clientv3.TxnResponse, error) {
return concurrency.NewSTM(s.etcdClient, func(c concurrency.STM) error {
registered := false
for i := 0; i < s.desired; i++ {
psKey := "/ps/" + strconv.Itoa(i)
log.Debugf("checking %s", psKey)
ps := c.Get(psKey)
log.Debugf("got value (%s) for key: %s", ps, psKey)
if ps == "" {
resp, err := s.etcdClient.Grant(context.TODO(), 5)
if err != nil {
log.Fatal(err)
}
// find the first id and write info
c.Put(psKey, s.externalIP, clientv3.WithLease(resp.ID))
log.Debugf("set pserver node %s with value %s", psKey, s.externalIP)
ch, kaerr := s.etcdClient.KeepAlive(context.TODO(), resp.ID)
if kaerr != nil {
log.Errorf("keepalive etcd node error: %v", kaerr)
return kaerr
}
// Eat the keep alive message so etcd
// will not expire the lease.
go func(ch <-chan *clientv3.LeaseKeepAliveResponse) {
ka := <-ch
log.Debugf("keepalive: %d\n", ka.TTL)
}(ch)
log.Debug("register finished")
registered = true
break
}
}
if registered == true {
return nil
}
return errors.New("not registerd, may due to already have enough pservers")
}, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads))
} }
// InitParam initializes a parameter. // InitParam initializes a parameter.
......
...@@ -10,12 +10,15 @@ import ( ...@@ -10,12 +10,15 @@ import (
) )
func TestFull(t *testing.T) { func TestFull(t *testing.T) {
s := pserver.NewService() s, err := pserver.NewService("", time.Second*5)
if err != nil {
t.Error(err)
}
var p pserver.Parameter var p pserver.Parameter
p.Name = "param_a" p.Name = "param_a"
p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0}
p.ElementType = pserver.Int32 p.ElementType = pserver.Int32
err := s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) err = s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil)
if err != nil { if err != nil {
t.FailNow() t.FailNow()
} }
...@@ -72,8 +75,11 @@ func TestFull(t *testing.T) { ...@@ -72,8 +75,11 @@ func TestFull(t *testing.T) {
} }
func TestMultipleInit(t *testing.T) { func TestMultipleInit(t *testing.T) {
s := pserver.NewService() s, err := pserver.NewService("", time.Second*5)
err := s.FinishInitParams(0, nil) if err != nil {
t.Error(err)
}
err = s.FinishInitParams(0, nil)
if err != nil { if err != nil {
t.FailNow() t.FailNow()
} }
...@@ -85,15 +91,18 @@ func TestMultipleInit(t *testing.T) { ...@@ -85,15 +91,18 @@ func TestMultipleInit(t *testing.T) {
} }
func TestUninitialized(t *testing.T) { func TestUninitialized(t *testing.T) {
s := pserver.NewService() s, err := pserver.NewService("", time.Second*5)
err := s.SendGrad(pserver.Gradient{}, nil) err = s.SendGrad(pserver.Gradient{}, nil)
if err.Error() != pserver.Uninitialized { if err.Error() != pserver.Uninitialized {
t.FailNow() t.FailNow()
} }
} }
func TestBlockUntilInitialized(t *testing.T) { func TestBlockUntilInitialized(t *testing.T) {
s := pserver.NewService() s, err := pserver.NewService("", time.Second*5)
if err != nil {
t.Error(err)
}
ch := make(chan struct{}, 2) ch := make(chan struct{}, 2)
errCh := make(chan error, 2) errCh := make(chan error, 2)
var wg sync.WaitGroup var wg sync.WaitGroup
...@@ -133,7 +142,7 @@ func TestBlockUntilInitialized(t *testing.T) { ...@@ -133,7 +142,7 @@ func TestBlockUntilInitialized(t *testing.T) {
p.Name = "param_a" p.Name = "param_a"
p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0}
p.ElementType = pserver.Int32 p.ElementType = pserver.Int32
err := s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) err = s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil)
if err != nil { if err != nil {
t.FailNow() t.FailNow()
} }
......
package networkhelper
import (
"errors"
"net"
)
// GetExternalIP returns the ip address of local network interface, not the
// loopback device.
func GetExternalIP() (string, error) {
ifaces, err := net.Interfaces()
if err != nil {
return "", err
}
for _, iface := range ifaces {
if iface.Flags&net.FlagUp == 0 {
continue // interface down
}
if iface.Flags&net.FlagLoopback != 0 {
continue // loopback interface
}
addrs, err := iface.Addrs()
if err != nil {
return "", err
}
for _, addr := range addrs {
var ip net.IP
switch v := addr.(type) {
case *net.IPNet:
ip = v.IP
case *net.IPAddr:
ip = v.IP
}
if ip == nil || ip.IsLoopback() {
continue
}
ip = ip.To4()
if ip == nil {
continue // not an ipv4 address
}
return ip.String(), nil
}
}
return "", errors.New("are you connected to the network?")
}
package networkhelper
import "testing"
func TestGetIP(t *testing.T) {
_, err := GetExternalIP()
if err != nil {
t.Errorf("GetExternalIP returns error : %v\n", err)
}
}
...@@ -2,3 +2,5 @@ cc_library(ddim SRCS ddim.cc) ...@@ -2,3 +2,5 @@ cc_library(ddim SRCS ddim.cc)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(dim_test SRCS dim_test.cu DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim)
cc_test(variable_test SRCS variable_test.cc)
//#include <stdexcept>
//#include <unittest/unittest.h>
#include <sstream> #include <sstream>
#include <vector> #include <vector>
......
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#pragma once
#include <memory>
#include <typeindex>
#include <typeinfo>
#include "paddle/platform/assert.h"
namespace paddle {
namespace framework {
class Variable {
public:
template <typename T>
const T& Get() const {
PADDLE_ASSERT(holder_ != nullptr);
PADDLE_ASSERT(std::type_index(typeid(T)) ==
std::type_index(holder_->Type()));
return *static_cast<const T*>(holder_->Ptr());
}
template <typename T>
T* GetMutable() {
if (holder_ == nullptr ||
std::type_index(typeid(T)) != std::type_index(holder_->Type())) {
holder_.reset(new PlaceholderImpl<T>(new T()));
}
return static_cast<T*>(holder_->Ptr());
}
private:
struct Placeholder {
virtual ~Placeholder() {}
virtual const std::type_info& Type() const = 0;
virtual void* Ptr() const = 0;
};
// Placeholder hides type T, so it doesn't appear as a template
// parameter of Variable.
template <typename T>
struct PlaceholderImpl : public Placeholder {
PlaceholderImpl(T* ptr) : ptr_(ptr), type_(typeid(T)) {}
virtual const std::type_info& Type() const { return type_; }
virtual void* Ptr() const { return static_cast<void*>(ptr_.get()); }
std::unique_ptr<T> ptr_;
const std::type_info& type_;
};
std::unique_ptr<Placeholder>
holder_; // pointers to a PlaceholderImpl object indeed.
};
} // namespace framework
} // namespace paddle
# Design Doc: Variable
Variable is also known as *blob* in MxNet and Caffe2. It is the input and output type of operators, where a neural network is a graph of operators.
## Requirements: Lazy Memory Allocation
For the flexibility of a DL system, a variable should be able to contain any typed value -- a tensor in most cases, but could also be some integer IDs or a scope of other variables in the case of RNN.
To use the minimum amount of memory, we'd like that a variable to allocate memory when it has to, or, lazy memory allocation. Let's take the following example:
```cpp
Variable vr, v1, v2;
Tensor* t1 = new Tensor();
Tensor* t2 = new Tensor();
Randomize(
/* malloc */ v1.GetMutable<Tensor>().mutable_data<float16>(DDim(100,200)),
/* size */ t1.Size());
Randomize(
/* malloc */ v2.GetMutable<Tensor>().mutable_data<float16>(DDim(200,300)),
/* size */ t2.Size());
Mult(
/*result*/ vr.GetMutable<Tensor>().mutable_data<v1.Type()>(SizeOfMult(v1, v2)),
/*input1*/ v1.Get<Tensor>().data(),
/*input2*/ v2.Get<Tensor>().data());
```
We see that a variable holds nothing until `Variable::GetMutable<Tensor>()` allocates a tensor and puts it in the variable. Similarly, a tensor gets its memory until `Tensor::mutable_data()`.
This syntax for lazy memory allocation when we call `Randomize` and `Mult`, those functions that mutate the variable, so it saves us some line of C++ code.
## Implementation: Type Hiding
To make memory allocation lazy, we cannot assume that we know the type held by a variable at definition time. In other words, `class Variable` cannot be a template `template <T> class Variable`.
Because we don't know the type `T`, we cannot save a `T*` as `Variable's` data member. Instead, we save an interface object `Placeholder`, who can return the pointer to the saved object via `Placeholder::Ptr()` as `void*`.
But anyway, Variable needs to know `T` so could it `delete<T>(ptr)` and so could `Variable::Get` checks the expected type and the saved object's type.
We save `T` in `PlaceholderImpl`, the implementation of `Placeholder`. Please be aware that `PlaceholderImpl` is a class template and `T` is passed in as a template parameter.
Because `PlaceholderImpl` knows `T`, it can save and return `typeid(T)` for the type comparison in `Variable::Get` and `Variable::GetMutable`.
## Conclusion
The technique type hiding utilizes C++ class templates, interface and derivation, and C++ RTTI (typeid). This combination saves us from definition something like `caffe2::TypeMata`, which takes hundreds of lines of C++ code.
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <memory>
#include <string>
#include "gtest/gtest.h"
#include "paddle/framework/variable.h"
TEST(Variable, GetMutable) {
using paddle::framework::Variable;
struct Tensor {
int content_;
};
std::unique_ptr<Variable> v(new Variable());
Tensor* t = v->GetMutable<Tensor>();
t->content_ = 1234;
const Tensor& tt = v->Get<Tensor>();
EXPECT_EQ(1234, tt.content_);
std::string* s = v->GetMutable<std::string>();
*s = "hello";
const std::string& ss = v->Get<std::string>();
EXPECT_EQ("hello", ss);
}
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Evaluator.h"
#include "paddle/gserver/layers/DetectionUtil.h"
using std::map;
using std::vector;
using std::pair;
using std::make_pair;
namespace paddle {
/**
* @brief detection map Evaluator
*
* The config file api is detection_map_evaluator.
*/
class DetectionMAPEvaluator : public Evaluator {
public:
DetectionMAPEvaluator()
: evaluateDifficult_(false), cpuOutput_(nullptr), cpuLabel_(nullptr) {}
virtual void start() {
Evaluator::start();
allTruePos_.clear();
allFalsePos_.clear();
numPos_.clear();
}
virtual real evalImp(std::vector<Argument>& arguments) {
overlapThreshold_ = config_.overlap_threshold();
backgroundId_ = config_.background_id();
evaluateDifficult_ = config_.evaluate_difficult();
apType_ = config_.ap_type();
MatrixPtr detectTmpValue = arguments[0].value;
Matrix::resizeOrCreate(cpuOutput_,
detectTmpValue->getHeight(),
detectTmpValue->getWidth(),
false,
false);
MatrixPtr labelTmpValue = arguments[1].value;
Matrix::resizeOrCreate(cpuLabel_,
labelTmpValue->getHeight(),
labelTmpValue->getWidth(),
false,
false);
cpuOutput_->copyFrom(*detectTmpValue);
cpuLabel_->copyFrom(*labelTmpValue);
Argument label = arguments[1];
const int* labelIndex = label.sequenceStartPositions->getData(false);
size_t batchSize = label.getNumSequences();
vector<map<size_t, vector<NormalizedBBox>>> allGTBBoxes;
vector<map<size_t, vector<pair<real, NormalizedBBox>>>> allDetectBBoxes;
for (size_t n = 0; n < batchSize; ++n) {
map<size_t, vector<NormalizedBBox>> bboxes;
for (int i = labelIndex[n]; i < labelIndex[n + 1]; ++i) {
vector<NormalizedBBox> bbox;
getBBoxFromLabelData(cpuLabel_->getData() + i * 6, 1, bbox);
int c = cpuLabel_->getData()[i * 6];
bboxes[c].push_back(bbox[0]);
}
allGTBBoxes.push_back(bboxes);
}
size_t n = 0;
const real* cpuOutputData = cpuOutput_->getData();
for (size_t imgId = 0; imgId < batchSize; ++imgId) {
map<size_t, vector<pair<real, NormalizedBBox>>> bboxes;
size_t curImgId = static_cast<size_t>((cpuOutputData + n * 7)[0]);
while (curImgId == imgId && n < cpuOutput_->getHeight()) {
vector<real> label;
vector<real> score;
vector<NormalizedBBox> bbox;
getBBoxFromDetectData(cpuOutputData + n * 7, 1, label, score, bbox);
bboxes[label[0]].push_back(make_pair(score[0], bbox[0]));
++n;
curImgId = static_cast<size_t>((cpuOutputData + n * 7)[0]);
}
allDetectBBoxes.push_back(bboxes);
}
for (size_t n = 0; n < batchSize; ++n) {
for (map<size_t, vector<NormalizedBBox>>::iterator it =
allGTBBoxes[n].begin();
it != allGTBBoxes[n].end();
++it) {
size_t count = 0;
if (evaluateDifficult_) {
count = it->second.size();
} else {
for (size_t i = 0; i < it->second.size(); ++i)
if (!(it->second[i].isDifficult)) ++count;
}
if (numPos_.find(it->first) == numPos_.end() && count != 0) {
numPos_[it->first] = count;
} else {
numPos_[it->first] += count;
}
}
}
// calcTFPos
calcTFPos(batchSize, allGTBBoxes, allDetectBBoxes);
return 0;
}
virtual void printStats(std::ostream& os) const {
real mAP = calcMAP();
os << "Detection mAP=" << mAP;
}
virtual void distributeEval(ParameterClient2* client) {
LOG(FATAL) << "Distribute detection evaluation not implemented.";
}
protected:
void calcTFPos(const size_t batchSize,
const vector<map<size_t, vector<NormalizedBBox>>>& allGTBBoxes,
const vector<map<size_t, vector<pair<real, NormalizedBBox>>>>&
allDetectBBoxes) {
for (size_t n = 0; n < allDetectBBoxes.size(); ++n) {
if (allGTBBoxes[n].size() == 0) {
for (map<size_t, vector<pair<real, NormalizedBBox>>>::const_iterator
it = allDetectBBoxes[n].begin();
it != allDetectBBoxes[n].end();
++it) {
size_t label = it->first;
for (size_t i = 0; i < it->second.size(); ++i) {
allTruePos_[label].push_back(make_pair(it->second[i].first, 0));
allFalsePos_[label].push_back(make_pair(it->second[i].first, 1));
}
}
} else {
for (map<size_t, vector<pair<real, NormalizedBBox>>>::const_iterator
it = allDetectBBoxes[n].begin();
it != allDetectBBoxes[n].end();
++it) {
size_t label = it->first;
vector<pair<real, NormalizedBBox>> predBBoxes = it->second;
if (allGTBBoxes[n].find(label) == allGTBBoxes[n].end()) {
for (size_t i = 0; i < predBBoxes.size(); ++i) {
allTruePos_[label].push_back(make_pair(predBBoxes[i].first, 0));
allFalsePos_[label].push_back(make_pair(predBBoxes[i].first, 1));
}
} else {
vector<NormalizedBBox> gtBBoxes =
allGTBBoxes[n].find(label)->second;
vector<bool> visited(gtBBoxes.size(), false);
// Sort detections in descend order based on scores
std::sort(predBBoxes.begin(),
predBBoxes.end(),
sortScorePairDescend<NormalizedBBox>);
for (size_t i = 0; i < predBBoxes.size(); ++i) {
real maxOverlap = -1.0;
size_t maxIdx = 0;
for (size_t j = 0; j < gtBBoxes.size(); ++j) {
real overlap =
jaccardOverlap(predBBoxes[i].second, gtBBoxes[j]);
if (overlap > maxOverlap) {
maxOverlap = overlap;
maxIdx = j;
}
}
if (maxOverlap > overlapThreshold_) {
if (evaluateDifficult_ ||
(!evaluateDifficult_ && !gtBBoxes[maxIdx].isDifficult)) {
if (!visited[maxIdx]) {
allTruePos_[label].push_back(
make_pair(predBBoxes[i].first, 1));
allFalsePos_[label].push_back(
make_pair(predBBoxes[i].first, 0));
visited[maxIdx] = true;
} else {
allTruePos_[label].push_back(
make_pair(predBBoxes[i].first, 0));
allFalsePos_[label].push_back(
make_pair(predBBoxes[i].first, 1));
}
}
} else {
allTruePos_[label].push_back(make_pair(predBBoxes[i].first, 0));
allFalsePos_[label].push_back(
make_pair(predBBoxes[i].first, 1));
}
}
}
}
}
}
}
real calcMAP() const {
real mAP = 0.0;
size_t count = 0;
for (map<size_t, size_t>::const_iterator it = numPos_.begin();
it != numPos_.end();
++it) {
size_t label = it->first;
size_t labelNumPos = it->second;
if (labelNumPos == 0 || allTruePos_.find(label) == allTruePos_.end())
continue;
vector<pair<real, size_t>> labelTruePos = allTruePos_.find(label)->second;
vector<pair<real, size_t>> labelFalsePos =
allFalsePos_.find(label)->second;
// Compute average precision.
vector<size_t> tpCumSum;
getAccumulation(labelTruePos, &tpCumSum);
vector<size_t> fpCumSum;
getAccumulation(labelFalsePos, &fpCumSum);
std::vector<real> precision, recall;
size_t num = tpCumSum.size();
// Compute Precision.
for (size_t i = 0; i < num; ++i) {
CHECK_LE(tpCumSum[i], labelNumPos);
precision.push_back(static_cast<real>(tpCumSum[i]) /
static_cast<real>(tpCumSum[i] + fpCumSum[i]));
recall.push_back(static_cast<real>(tpCumSum[i]) / labelNumPos);
}
// VOC2007 style
if (apType_ == "11point") {
vector<real> maxPrecisions(11, 0.0);
int startIdx = num - 1;
for (int j = 10; j >= 0; --j)
for (int i = startIdx; i >= 0; --i) {
if (recall[i] < j / 10.) {
startIdx = i;
if (j > 0) maxPrecisions[j - 1] = maxPrecisions[j];
break;
} else {
if (maxPrecisions[j] < precision[i])
maxPrecisions[j] = precision[i];
}
}
for (int j = 10; j >= 0; --j) mAP += maxPrecisions[j] / 11;
++count;
} else if (apType_ == "Integral") {
// Nature integral
real averagePrecisions = 0.;
real prevRecall = 0.;
for (size_t i = 0; i < num; ++i) {
if (fabs(recall[i] - prevRecall) > 1e-6)
averagePrecisions += precision[i] * fabs(recall[i] - prevRecall);
prevRecall = recall[i];
}
mAP += averagePrecisions;
++count;
} else {
LOG(FATAL) << "Unkown ap version: " << apType_;
}
}
if (count != 0) mAP /= count;
return mAP * 100;
}
void getAccumulation(vector<pair<real, size_t>> inPairs,
vector<size_t>* accuVec) const {
std::stable_sort(
inPairs.begin(), inPairs.end(), sortScorePairDescend<size_t>);
accuVec->clear();
size_t sum = 0;
for (size_t i = 0; i < inPairs.size(); ++i) {
sum += inPairs[i].second;
accuVec->push_back(sum);
}
}
std::string getTypeImpl() const { return "detection_map"; }
real getValueImpl() const { return calcMAP(); }
private:
real overlapThreshold_; // overlap threshold when determining whether matched
bool evaluateDifficult_; // whether evaluate difficult ground truth
size_t backgroundId_; // class index of background
std::string apType_; // how to calculate mAP (Integral or 11point)
MatrixPtr cpuOutput_;
MatrixPtr cpuLabel_;
map<size_t, size_t> numPos_; // counts of true objects each classification
map<size_t, vector<pair<real, size_t>>>
allTruePos_; // true positive prediction
map<size_t, vector<pair<real, size_t>>>
allFalsePos_; // false positive prediction
};
REGISTER_EVALUATOR(detection_map, DetectionMAPEvaluator);
} // namespace paddle
...@@ -241,11 +241,14 @@ void NeuralNetwork::forward(const std::vector<Argument>& inArgs, ...@@ -241,11 +241,14 @@ void NeuralNetwork::forward(const std::vector<Argument>& inArgs,
dataLayers_[i]->setData(inArgs[i]); dataLayers_[i]->setData(inArgs[i]);
} }
gLayerStackTrace.set_stage(true);
{ {
for (auto& layer : layers_) { for (auto& layer : layers_) {
REGISTER_TIMER_INFO("ForwardTimer", layer->getName().c_str()); REGISTER_TIMER_INFO("ForwardTimer", layer->getName().c_str());
gLayerStackTrace.push(layer->getName()); gLayerStackTrace.push(layer->getName());
layer->forward(passType); layer->forward(passType);
gLayerStackTrace.pop(layer->getName());
} }
} }
...@@ -254,9 +257,6 @@ void NeuralNetwork::forward(const std::vector<Argument>& inArgs, ...@@ -254,9 +257,6 @@ void NeuralNetwork::forward(const std::vector<Argument>& inArgs,
for (auto& layer : outputLayers_) { for (auto& layer : outputLayers_) {
outArgs->push_back(layer->getOutput()); outArgs->push_back(layer->getOutput());
} }
if (passType == PASS_TEST) {
gLayerStackTrace.clear();
}
} }
void NeuralNetwork::resetState() { void NeuralNetwork::resetState() {
...@@ -283,9 +283,10 @@ void NeuralNetwork::getState(MachineState& machineState) { ...@@ -283,9 +283,10 @@ void NeuralNetwork::getState(MachineState& machineState) {
} }
void NeuralNetwork::backward(const UpdateCallback& callback) { void NeuralNetwork::backward(const UpdateCallback& callback) {
gLayerStackTrace.pop(""); // tell layer trace is during backward. gLayerStackTrace.set_stage(false);
FOR_EACH_R(layer, layers_) { FOR_EACH_R(layer, layers_) {
REGISTER_TIMER_INFO("BackwardTimer", (*layer)->getName().c_str()); REGISTER_TIMER_INFO("BackwardTimer", (*layer)->getName().c_str());
gLayerStackTrace.push((*layer)->getName());
if ((*layer)->needGradient()) { if ((*layer)->needGradient()) {
(*layer)->backward(callback); (*layer)->backward(callback);
} }
...@@ -308,35 +309,35 @@ public: ...@@ -308,35 +309,35 @@ public:
void addEvaluator(std::unique_ptr<Evaluator>&& evaluator) { void addEvaluator(std::unique_ptr<Evaluator>&& evaluator) {
evaluators_.emplace_back(std::move(evaluator)); evaluators_.emplace_back(std::move(evaluator));
} }
virtual void start() { void start() override {
for (auto& evaluator : evaluators_) { for (auto& evaluator : evaluators_) {
evaluator->start(); evaluator->start();
} }
} }
virtual void finish() { void finish() override {
for (auto& evaluator : evaluators_) { for (auto& evaluator : evaluators_) {
evaluator->finish(); evaluator->finish();
} }
} }
virtual void eval(const NeuralNetwork& nn) { void eval(const NeuralNetwork& nn) override {
for (auto& evaluator : evaluators_) { for (auto& evaluator : evaluators_) {
evaluator->eval(nn); evaluator->eval(nn);
} }
} }
virtual real evalImp(std::vector<Argument>& arguments) { real evalImp(std::vector<Argument>& arguments) override {
(void)arguments; (void)arguments;
return -1; return -1;
} }
virtual void printStats(std::ostream& os) const { void printStats(std::ostream& os) const override {
for (auto& evaluator : evaluators_) { for (auto& evaluator : evaluators_) {
evaluator->printStats(os); evaluator->printStats(os);
os << ' '; os << ' ';
} }
} }
virtual void distributeEval(ParameterClient2* client) { void distributeEval(ParameterClient2* client) override {
for (auto& evaluator : evaluators_) { for (auto& evaluator : evaluators_) {
evaluator->distributeEval(client); evaluator->distributeEval(client);
} }
...@@ -351,7 +352,7 @@ public: ...@@ -351,7 +352,7 @@ public:
* @brief getNames will return all inside evaluators' names. * @brief getNames will return all inside evaluators' names.
* @param names [out]: return names. * @param names [out]: return names.
*/ */
void getNames(std::vector<std::string>* names) { void getNames(std::vector<std::string>* names) override {
for (auto& eval : evaluators_) { for (auto& eval : evaluators_) {
eval->getNames(names); eval->getNames(names);
} }
...@@ -360,7 +361,7 @@ public: ...@@ -360,7 +361,7 @@ public:
/** /**
* @brief getValue could get all inside evaluators' value. * @brief getValue could get all inside evaluators' value.
*/ */
real getValue(const std::string& name, Error* err) const { real getValue(const std::string& name, Error* err) const override {
return this->getMethodHelper<real>( return this->getMethodHelper<real>(
name, err, [&name, err](const std::unique_ptr<Evaluator>& eval) { name, err, [&name, err](const std::unique_ptr<Evaluator>& eval) {
return eval->getValue(name, err); return eval->getValue(name, err);
...@@ -370,7 +371,7 @@ public: ...@@ -370,7 +371,7 @@ public:
/** /**
* @brief getType could get all inside evaluators' type. * @brief getType could get all inside evaluators' type.
*/ */
std::string getType(const std::string& name, Error* err) const { std::string getType(const std::string& name, Error* err) const override {
return this->getMethodHelper<std::string>( return this->getMethodHelper<std::string>(
name, err, [&name, err](const std::unique_ptr<Evaluator>& eval) { name, err, [&name, err](const std::unique_ptr<Evaluator>& eval) {
return eval->getType(name, err); return eval->getType(name, err);
...@@ -395,6 +396,30 @@ private: ...@@ -395,6 +396,30 @@ private:
} }
}; };
class SubnetEvaluator : public CombinedEvaluator {
public:
SubnetEvaluator(const std::string& layerName,
std::unique_ptr<Evaluator>&& evaluator)
: layerName_(layerName) {
addEvaluator(std::move(evaluator));
}
virtual void eval(const NeuralNetwork& nn) override {
const LayerPtr& layer = nn.getLayer(layerName_);
CHECK(layer) << "Nonexisted layer: " << layerName_ << " in submodel "
<< nn.getName();
bool accessed = false;
layer->accessSubNetwork([this, &accessed](NeuralNetwork& subnet) {
subnet.eval(evaluators_[0].get());
accessed = true;
});
CHECK(accessed) << "There is no subnetwork for layer " << layerName_
<< " in submodel " << nn.getName();
}
protected:
std::string layerName_;
};
Evaluator* NeuralNetwork::makeEvaluator() const { Evaluator* NeuralNetwork::makeEvaluator() const {
CombinedEvaluator* combinedEvaluator = new CombinedEvaluator(); CombinedEvaluator* combinedEvaluator = new CombinedEvaluator();
auto subModelConfig = std::find_if(config_.sub_models().begin(), auto subModelConfig = std::find_if(config_.sub_models().begin(),
...@@ -421,6 +446,15 @@ Evaluator* NeuralNetwork::makeEvaluator() const { ...@@ -421,6 +446,15 @@ Evaluator* NeuralNetwork::makeEvaluator() const {
combinedEvaluator->addEvaluator(std::move(evaluator)); combinedEvaluator->addEvaluator(std::move(evaluator));
} }
} }
for (auto& layer : layers_) {
layer->accessSubNetwork(
[layer, combinedEvaluator](NeuralNetwork& subnet) {
std::unique_ptr<Evaluator> subEvaluator(new SubnetEvaluator(
layer->getName(),
std::unique_ptr<Evaluator>(subnet.makeEvaluator())));
combinedEvaluator->addEvaluator(std::move(subEvaluator));
});
}
} else { } else {
for (const EvaluatorConfig& evalConfig : config_.evaluators()) { for (const EvaluatorConfig& evalConfig : config_.evaluators()) {
std::unique_ptr<Evaluator> evaluator(Evaluator::create(evalConfig)); std::unique_ptr<Evaluator> evaluator(Evaluator::create(evalConfig));
......
...@@ -129,6 +129,8 @@ public: ...@@ -129,6 +129,8 @@ public:
static NeuralNetwork* newNeuralNetwork(const std::string& name = "", static NeuralNetwork* newNeuralNetwork(const std::string& name = "",
NeuralNetwork* rootNetwork = nullptr); NeuralNetwork* rootNetwork = nullptr);
const std::string& getName() const { return subModelName_; }
protected: protected:
/** /**
* The constructor of NeuralNetwork. * The constructor of NeuralNetwork.
......
...@@ -208,6 +208,7 @@ void RecurrentGradientMachine::init( ...@@ -208,6 +208,7 @@ void RecurrentGradientMachine::init(
}); });
CHECK(subModelConfig != config.sub_models().end()); CHECK(subModelConfig != config.sub_models().end());
reversed_ = subModelConfig->reversed(); reversed_ = subModelConfig->reversed();
generating_ = subModelConfig->has_generator();
inFrameLines_.resize(subModelConfig->in_links_size()); inFrameLines_.resize(subModelConfig->in_links_size());
for (size_t i = 0; i < inFrameLines_.size(); ++i) { for (size_t i = 0; i < inFrameLines_.size(); ++i) {
...@@ -287,10 +288,6 @@ void RecurrentGradientMachine::init( ...@@ -287,10 +288,6 @@ void RecurrentGradientMachine::init(
parameterIds_.push_back(para->getID()); parameterIds_.push_back(para->getID());
} }
} }
if (subModelConfig->evaluator_names_size() > 0) {
evaluator_.reset(frames_[0]->makeEvaluator());
}
} }
void RecurrentGradientMachine::resizeOrCreateFrames(int numFrames) { void RecurrentGradientMachine::resizeOrCreateFrames(int numFrames) {
...@@ -538,7 +535,7 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs, ...@@ -538,7 +535,7 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
The outputs are outFramesLines_[i].agentLayer The outputs are outFramesLines_[i].agentLayer
*/ */
if (inFrameLines_.empty() && passType == PASS_TEST) { if (generating_) {
generateSequence(); generateSequence();
return; return;
} // else forward.. } // else forward..
...@@ -561,14 +558,14 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs, ...@@ -561,14 +558,14 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
std::vector<Argument> outArgs; std::vector<Argument> outArgs;
frames_[i]->forward(inArgs, &outArgs, passType); frames_[i]->forward(inArgs, &outArgs, passType);
} }
if (evaluator_ && passType == PASS_TEST) {
this->eval(evaluator_.get());
}
reorganizeOutput(passType); reorganizeOutput(passType);
} }
void RecurrentGradientMachine::backward(const UpdateCallback& callback) { void RecurrentGradientMachine::backward(const UpdateCallback& callback) {
if (generating_) {
return;
}
REGISTER_TIMER_INFO("RecurrentBwTime", "RecurrentBwTime"); REGISTER_TIMER_INFO("RecurrentBwTime", "RecurrentBwTime");
AsyncGpuBlock asyncGpuBlock; AsyncGpuBlock asyncGpuBlock;
for (int i = maxSequenceLength_ - 1; i >= 0; --i) { for (int i = maxSequenceLength_ - 1; i >= 0; --i) {
...@@ -577,11 +574,6 @@ void RecurrentGradientMachine::backward(const UpdateCallback& callback) { ...@@ -577,11 +574,6 @@ void RecurrentGradientMachine::backward(const UpdateCallback& callback) {
for (auto& memoryFrameLine : memoryFrameLines_) { for (auto& memoryFrameLine : memoryFrameLines_) {
memoryFrameLine.bootLayer->backward(nullptr); memoryFrameLine.bootLayer->backward(nullptr);
} }
// call printers here so the gradient can be printed
if (evaluator_) {
this->eval(evaluator_.get());
}
} }
void RecurrentGradientMachine::forwardBackward( void RecurrentGradientMachine::forwardBackward(
...@@ -595,9 +587,9 @@ void RecurrentGradientMachine::forwardBackward( ...@@ -595,9 +587,9 @@ void RecurrentGradientMachine::forwardBackward(
void RecurrentGradientMachine::eval(Evaluator* evaluator) const { void RecurrentGradientMachine::eval(Evaluator* evaluator) const {
// call printers frame by frame // call printers frame by frame
for (int i = 0; i < maxSequenceLength_; ++i) { for (int i = 0; i < maxSequenceLength_; ++i) {
LOG(INFO) << "Recurrent Layer Group eval frame " << i << " begin"; VLOG(2) << "Recurrent Layer Group eval frame " << i << " begin";
evaluator->eval(*(frames_[i].get())); evaluator->eval(*(frames_[i].get()));
LOG(INFO) << "Recurrent Layer Group eval frame " << i << " end"; VLOG(2) << "Recurrent Layer Group eval frame " << i << " end";
} }
} }
...@@ -1093,10 +1085,6 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) { ...@@ -1093,10 +1085,6 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
copyDataOutlinkFrame(machineCur); copyDataOutlinkFrame(machineCur);
// call value printer
if (evaluator_) {
evaluator_->eval(*(frames_[machineCur].get()));
}
// check eos // check eos
const IVectorPtr& eosVec = const IVectorPtr& eosVec =
eosFrameLine_->layers[machineCur]->getOutput().ids; eosFrameLine_->layers[machineCur]->getOutput().ids;
...@@ -1321,11 +1309,10 @@ void RecurrentGradientMachine::fillGenOutputs() { ...@@ -1321,11 +1309,10 @@ void RecurrentGradientMachine::fillGenOutputs() {
batchMachineIdVec_.clear(); batchMachineIdVec_.clear();
generator_.ids.clear(); generator_.ids.clear();
int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false);
starts[0] = 0;
if (numResults > 1) { if (numResults > 1) {
real* probs = generator_.outArg.in->getData(); real* probs = generator_.outArg.in->getData();
int* starts =
generator_.outArg.sequenceStartPositions->getMutableData(false);
starts[0] = 0;
for (size_t i = 0; i < finalPaths_.size(); ++i) { for (size_t i = 0; i < finalPaths_.size(); ++i) {
for (size_t j = 0; j < finalPaths_[i].size(); ++j) { for (size_t j = 0; j < finalPaths_[i].size(); ++j) {
Path& path = finalPaths_[i][j]; Path& path = finalPaths_[i][j];
...@@ -1348,7 +1335,10 @@ void RecurrentGradientMachine::fillGenOutputs() { ...@@ -1348,7 +1335,10 @@ void RecurrentGradientMachine::fillGenOutputs() {
} else { } else {
for (size_t i = 0; i < finalPaths_.size(); ++i) { for (size_t i = 0; i < finalPaths_.size(); ++i) {
CHECK(!finalPaths_[i].empty()); CHECK(!finalPaths_[i].empty());
generator_.ids = finalPaths_[i][0].ids; generator_.ids.insert(generator_.ids.begin(),
finalPaths_[i][0].ids.begin(),
finalPaths_[i][0].ids.end());
starts[i + 1] = starts[i] + finalPaths_[i][0].ids.size();
} }
} }
} }
......
...@@ -414,6 +414,7 @@ protected: ...@@ -414,6 +414,7 @@ protected:
std::vector<int> ids; // store generated sequences std::vector<int> ids; // store generated sequences
Argument outArg; // final output argument Argument outArg; // final output argument
}; };
bool generating_;
Generator generator_; Generator generator_;
std::vector<std::unique_ptr<NeuralNetwork>> frames_; std::vector<std::unique_ptr<NeuralNetwork>> frames_;
...@@ -428,8 +429,6 @@ protected: ...@@ -428,8 +429,6 @@ protected:
std::vector<int> std::vector<int>
parameterIds_; // parameters actually used by this Layer Group parameterIds_; // parameters actually used by this Layer Group
std::unique_ptr<Evaluator> evaluator_; // frame printers in this layer group
// store final argument of outFrameLines_ // store final argument of outFrameLines_
std::vector<Argument> dataArgs_; std::vector<Argument> dataArgs_;
// store each frame's output argument of outFrameLines_ // store each frame's output argument of outFrameLines_
......
...@@ -109,6 +109,40 @@ void GatherAgentLayer::forwardValue(PassType passType) { ...@@ -109,6 +109,40 @@ void GatherAgentLayer::forwardValue(PassType passType) {
} }
} }
namespace {
// dest[index[i]] <- src[i] for each i
void copyElements(const IVector& srcVec,
const IVector& indexVec,
IVector& destVec) {
const int* src = srcVec.getData();
const int* index = indexVec.getData();
int* dest = destVec.getData();
int len = indexVec.getSize();
CHECK_EQ(srcVec.getSize(), indexVec.getSize());
for (int i = 0; i < len; ++i) {
dest[index[i]] = src[i];
}
}
}
void GatherAgentLayer::forwardIds(PassType passType) {
IVectorPtr realId = realLayers_[0]->getOutputLabel();
if (!realId) return;
IVector::resizeOrCreate(output_.ids, allIds_->getSize(), useGpu_);
IVectorPtr outId = output_.ids;
idsVec_.resize(idIndex_.size());
for (size_t i = 0; i < realLayers_.size(); ++i) {
const IVectorPtr& realId = realLayers_[i]->getOutputLabel();
idsVec_[i] = IVector::create(allIds_->getData() + idIndex_[i],
/* size */ realId->getSize(),
useGpu_);
execViaCpu(&copyElements, *realId, *idsVec_[i], *outId);
}
}
void GatherAgentLayer::backward(const UpdateCallback& callback) { void GatherAgentLayer::backward(const UpdateCallback& callback) {
(void)callback; (void)callback;
const MatrixPtr& outputGrad = getOutputGrad(); const MatrixPtr& outputGrad = getOutputGrad();
...@@ -136,23 +170,22 @@ void ScatterAgentLayer::forward(PassType passType) { ...@@ -136,23 +170,22 @@ void ScatterAgentLayer::forward(PassType passType) {
CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId()); CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId());
int width = this->getSize(); int width = this->getSize();
if (realOutArg_.hasSeq()) { if (selectionMode_) {
forwardSequence(passType); forwardWithSelection(passType);
} else if (realOutArg_.value || realOutArg_.ids) { } else {
output_.subArgFrom( if (realOutArg_.hasSeq()) {
realOutArg_, /* offset */ idIndex_, idSize_, width, useGpu_); output_.subArgFrom(realOutArg_,
} else { // used in generation /* offset */ idIndex_,
if (realLayer_->getOutput().ids) { idSize_,
IVector::resizeOrCreate(output_.ids, ids_->getSize(), useGpu_); width,
output_.ids->selectFrom(*realLayer_->getOutput().ids, *ids_); useGpu_,
} /* trans */ false,
if (realLayer_->getOutput().value) { /* seqFlag */ true,
int height = ids_->getSize(); /* seqStart */ seqStartPosIndex_,
resetOutput(height, width); /* seqSize */ numSequences_);
} else {
const MatrixPtr& outV = getOutputValue(); output_.subArgFrom(
const MatrixPtr& realV = realLayer_->getOutputValue(); realOutArg_, /* offset */ idIndex_, idSize_, width, useGpu_);
outV->selectRows(*realV, *ids_);
} }
} }
} }
...@@ -160,6 +193,8 @@ void ScatterAgentLayer::forward(PassType passType) { ...@@ -160,6 +193,8 @@ void ScatterAgentLayer::forward(PassType passType) {
void ScatterAgentLayer::backward(const UpdateCallback& callback) { void ScatterAgentLayer::backward(const UpdateCallback& callback) {
(void)callback; (void)callback;
CHECK(!selectionMode_);
const MatrixPtr& outputGrad = realOutArg_.grad; const MatrixPtr& outputGrad = realOutArg_.grad;
const MatrixPtr& realGrad = realLayer_->getOutputGrad(); const MatrixPtr& realGrad = realLayer_->getOutputGrad();
if (realGrad) { if (realGrad) {
...@@ -174,42 +209,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) { ...@@ -174,42 +209,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
REGISTER_LAYER(gather_agent, GatherAgentLayer); REGISTER_LAYER(gather_agent, GatherAgentLayer);
REGISTER_LAYER(scatter_agent, ScatterAgentLayer); REGISTER_LAYER(scatter_agent, ScatterAgentLayer);
void GatherAgentLayer::forwardIds(PassType passType) { void ScatterAgentLayer::forwardWithSelection(PassType passType) {
int height = 0;
IVectorPtr idReal = realLayers_[0]->getOutputLabel();
if (!idReal) return;
if (output_.subSequenceStartPositions) {
int* starts = output_.subSequenceStartPositions->getMutableData(false);
// Gather generator.idsVec
// if is beam search generation result. Get first result.
if (idReal->getData()[idReal->getSize() - 1] == -1) {
for (size_t i = 0; i < realLayers_.size(); ++i) {
// The first element stores first result size
idReal = realLayers_[i]->getOutputLabel();
idReal->subVecFrom(*idReal, 1, idReal->getData()[0]);
}
}
for (size_t i = 0; i < realLayers_.size(); ++i) {
CHECK(realLayers_[i]->getOutputLabel());
starts[i] = height;
height += realLayers_[i]->getOutputLabel()->getSize();
}
starts[realLayers_.size()] = height;
output_.sequenceStartPositions->getMutableData(false)[1] = height;
IVector::resizeOrCreate(output_.ids, height, false);
for (size_t i = 0; i < realLayers_.size(); ++i) {
output_.ids->subVec(starts[i], starts[i + 1] - starts[i])
->copyFrom(*realLayers_[i]->getOutputLabel());
}
} else {
LOG(FATAL) << "Not implemented";
}
}
void ScatterAgentLayer::forwardSequence(PassType passType) {
Layer::forward(passType); Layer::forward(passType);
CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId()); CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId());
...@@ -220,17 +220,19 @@ void ScatterAgentLayer::forwardSequence(PassType passType) { ...@@ -220,17 +220,19 @@ void ScatterAgentLayer::forwardSequence(PassType passType) {
AsyncGpuBlock asyncGpuBlock; AsyncGpuBlock asyncGpuBlock;
REGISTER_TIMER_INFO("SequenceAgentLayerForward", getName().c_str()); REGISTER_TIMER_INFO("SequenceAgentLayerForward", getName().c_str());
if (realOutArg_.value || realOutArg_.ids) { if (!input.hasSeq()) {
CHECK(realOutArg_.sequenceStartPositions); if (realLayer_->getOutput().ids) {
output_.subArgFrom(realOutArg_, IVector::resizeOrCreate(output_.ids, ids_->getSize(), useGpu_);
/* offset */ idIndex_, output_.ids->selectFrom(*realLayer_->getOutput().ids, *ids_);
idSize_, }
width, if (realLayer_->getOutput().value) {
useGpu_, int height = ids_->getSize();
/* trans */ false, resetOutput(height, width);
/* seqFlag */ true,
/* seqStart */ seqStartPosIndex_, const MatrixPtr& outV = getOutputValue();
/* seqSize */ numSequences_); const MatrixPtr& realV = realLayer_->getOutputValue();
outV->selectRows(*realV, *ids_);
}
} else { } else {
// Putting the generation logic here is really an ugly hack! // Putting the generation logic here is really an ugly hack!
// used in generation // used in generation
......
...@@ -110,6 +110,9 @@ protected: ...@@ -110,6 +110,9 @@ protected:
// of real layer. // of real layer.
ICpuGpuVectorPtr inputStartPos_; ICpuGpuVectorPtr inputStartPos_;
// true for setRealLayer, false for setRealLayerAndOutput
bool selectionMode_;
public: public:
explicit ScatterAgentLayer(const LayerConfig& config) : Layer(config) {} explicit ScatterAgentLayer(const LayerConfig& config) : Layer(config) {}
...@@ -137,6 +140,7 @@ public: ...@@ -137,6 +140,7 @@ public:
} else { } else {
cpuIds_ = ids_; cpuIds_ = ids_;
} }
selectionMode_ = true;
} }
// set real layer and output, [idIndex, idIndex + idSize) of *ids* // set real layer and output, [idIndex, idIndex + idSize) of *ids*
...@@ -153,6 +157,7 @@ public: ...@@ -153,6 +157,7 @@ public:
idIndex_ = idIndex; idIndex_ = idIndex;
idSize_ = idSize; idSize_ = idSize;
handleBackward_ = handleBackward; handleBackward_ = handleBackward;
selectionMode_ = false;
} }
void setSequenceStartPositions(const ICpuGpuVectorPtr& sequenceStartPositions, void setSequenceStartPositions(const ICpuGpuVectorPtr& sequenceStartPositions,
...@@ -166,7 +171,7 @@ public: ...@@ -166,7 +171,7 @@ public:
void forward(PassType passType) override; void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override; void backward(const UpdateCallback& callback) override;
void forwardSequence(PassType passType); void forwardWithSelection(PassType passType);
}; };
} // namespace paddle } // namespace paddle
...@@ -138,6 +138,23 @@ void testEvaluatorAll(TestConfig testConf, ...@@ -138,6 +138,23 @@ void testEvaluatorAll(TestConfig testConf,
testEvaluator(testConf, testEvaluatorName, batchSize, false); testEvaluator(testConf, testEvaluatorName, batchSize, false);
} }
TEST(Evaluator, detection_map) {
TestConfig config;
config.evaluatorConfig.set_type("detection_map");
config.evaluatorConfig.set_overlap_threshold(0.5);
config.evaluatorConfig.set_background_id(0);
config.evaluatorConfig.set_ap_type("Integral");
config.evaluatorConfig.set_evaluate_difficult(0);
config.inputDefs.push_back({INPUT_DATA, "output", 7});
config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "label", 6});
config.evaluatorConfig.set_evaluate_difficult(false);
testEvaluatorAll(config, "detection_map", 100);
config.evaluatorConfig.set_evaluate_difficult(true);
testEvaluatorAll(config, "detection_map", 100);
}
TEST(Evaluator, classification_error) { TEST(Evaluator, classification_error) {
TestConfig config; TestConfig config;
config.evaluatorConfig.set_type("classification_error"); config.evaluatorConfig.set_type("classification_error");
......
## Design
### Usage
To allocate 4KB CPU memory:
```cpp
p = memory::Alloc(platform::CPUPlace(), 4*1024);
```
To allocate 4KB memory on the 3rd GPU:
```cpp
p = memory::Alloc(platform::GPUPlace(2), 4*1024);
```
To free memory and check the so-far used amount of memory on a place:
```cpp
auto pl = platform::GPUPlace(0);
p = memory::Alloc(pl, 4*1024);
cout << memory::Used(pl);
memory::Free(pl, p);
```
### API
In `paddle/memory/memory.h` we have:
```cpp
namespace memory {
template <typename Place> void* Alloc(Place, size_t);
template <typename Place> void Free(Place, void*);
template <typename Place> size_t Used(Place);
} // namespace memory
```
These function templates have specializations on either `platform::CPUPlace` or `platform::GPUPlace`:
```cpp
template<>
void* Alloc<CPUPlace>(CPUPlace p, size_t size) {
return GetCPUBuddyAllocator()->Alloc(size);
}
```
and
```cpp
template<>
void Alloc<GPUPlace>(GPUPlace p, size_t size) {
return GetGPUBuddyAllocator(p.id)->Alloc(size);
}
```
Similar specializations exist for `Free` and `Used`.
### Implementation
`GetCPUBuddyAllocator` and `GetGPUBuddyAllocator` are singletions.
```cpp
BuddyAllocator* GetCPUBuddyAllocator() {
static BuddyAllocator* a = NULL;
if (a == NULL) {
a = new BuddyAllocator(new CPUAllocator /*backup allocator*/, ...);
}
return a;
}
BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
static BuddyAllocator* as = NULL;
if (as == NULL) {
as = new BuddyAllocator*[platform::NumGPUs()];
for (int gpu = 0; gpu < platform::NumGPUs(); gpu++) {
as[gpu] = new BuddyAllocator(new GPUAllocator(gpu) /* backup allocator */, ...);
}
}
return as[gpu_id);
```
#### `BuddyAllocator`
`BuddyAllocator` implements the buddy allocation algorithm. Its constructor takes parameters only related with the algorithm:
```cpp
BuddyAllocator::BuddyAllocator(initial_pool_size, max_pool_size) {
...
}
```
Please be aware that **`BuddyAllocator` always allocate aligned memory**, aligned on 32-bytes, which can hold a `BuddyAllocator::Block` object:
```cpp
class BuddyAllocator {
private:
struct Block {
size_t size;
Block* left, right;
};
...
};
```
Because BuddyAllocator has the meta-data of each block, it can trace the used memory -- record the amount returned by `Alloc` freed in `Free`. Instead, `CPUAllocator` and `GPUAllocator` doesn't know the size of freed memory block and cannot do the trace.
#### System Allocators
The `GPUAllocator` and `CPUAllocator` are calls *system allocators*. They work as the fallback allocators of `BuddyAllocator`.
## Justification
I got inspiration from Majel and Caffe2, though above design look different from both.
### Caffe2
In Caffe2, `Tensor<Context>::mutable_data()` allocates the memroy. In particular, [`Tensor<Context>::mutable_data`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L523) calls [`Tensor<Context>::raw_mutable_data`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L459), which in turn calls [`Context::New`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L479).
There are two implementations of `Context`:
1. [`CPUContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L105), whose [`New` method](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L131) calls [`g_cpu_allocator.get()->New(size_t)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.cc#L15) to allocate the memory.
1. [`CUDAContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L99), which has a data member [`int gpu_id_`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L202). This looks very similar to class `majel::GPUPlace`, who also has an `int id_` data member. `CUDAContext::New(size_t)` calls [`g_cub_allocator->DeviceAllocate(&ptr, nbytes)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.cu#L355) to allocate the memory.
### Majel
In Majel, there are basically two allocator types:
1. `cpu::SystemAllocator`, which has similar functionality to `caffe2::CPUContext::New/Delete`.
1. `gpu::SystemAllocator`, which has similar functionality to `caffe2::CUDAContext::New/Delete`.
However, memory allocation is not via these two allocators. Instead, these two allocators are defined in hidden namespaces.
In Majel there are hidden global variables like:
1. `cpu::SystemAllocator g_cpu_allocator`, and
1. `vector<gpu::SystemAllocator*> g_gpu_allocators(NUM_GPUS)`.
Programs allocate memory via a BuddyAllocator, which can take the `g_cpu_allocator` or a `g_gpu_allocators[gpu_id]` as its *fallback allocator*, so that if BuddyAllocator cannot find a block in its memory pool, it extends its memory pool by calling the fallback allocator's `New(size_t)`.
...@@ -14,11 +14,13 @@ limitations under the License. */ ...@@ -14,11 +14,13 @@ limitations under the License. */
#include "ParameterUpdaterHook.h" #include "ParameterUpdaterHook.h"
#include <algorithm>
#include <atomic> #include <atomic>
#include <fstream> #include <fstream>
#include <mutex> #include <mutex>
#include <thread> #include <thread>
#include <unordered_map> #include <unordered_map>
#include <vector>
#include "paddle/math/Vector.h" #include "paddle/math/Vector.h"
#include "paddle/parameter/Parameter.h" #include "paddle/parameter/Parameter.h"
...@@ -29,106 +31,76 @@ namespace paddle { ...@@ -29,106 +31,76 @@ namespace paddle {
/** /**
* The static pruning hook * The static pruning hook
* * Static means user specify a sparsity_ratio before training started, and the
* Static means user load a mask map before training started. This map will * network will prune the parameters based on the sparsity_ratio. More details
* define which link/weight between neural is disabled. * can be found https://arxiv.org/pdf/1506.02626.pdf.
*/ */
class StaticPruningHook : public IParameterUpdaterHook { class StaticPruningHook : public IParameterUpdaterHook {
public: public:
/** explicit StaticPruningHook(const ParameterUpdaterHookConfig &hookConfig)
* The Mask Map Header. : initCount_(0) {
* The map file started with this header. sparsityRatio_ = hookConfig.sparsity_ratio();
*
* In Version 0, reset file will be:
* contains header.size bit, each bit means such weight is enabled or not.
* if bit is 1, then such weight is enabled.
* at end, the file will round to byte, and the low bits of end byte will be
* filled by zero.
*
*/
struct StaticMaskHeader {
uint32_t version;
size_t size;
} __attribute__((__packed__));
explicit StaticPruningHook(const std::string& mask_filename) : initCount_(0) {
bool ok = this->loadMaskFile(mask_filename);
if (!ok) {
LOG(WARNING) << "Fail to load mask file " << mask_filename
<< " in current directory, searching in init_model_path";
std::string combineMaskFilename =
path::join(FLAGS_init_model_path, mask_filename);
CHECK(this->loadMaskFile(combineMaskFilename))
<< "Cannot load " << mask_filename << " in ./" << mask_filename
<< " and " << combineMaskFilename;
}
VLOG(3) << mask_filename << " mask size = " << this->mask_.size();
} }
void update(Parameter* para) { static bool sortPairAscend(const std::pair<real, size_t> &pair1,
const std::pair<real, size_t> &pair2) {
return pair1.first > pair2.first;
}
void update(Parameter *para) {
updateThreadChecker_.check(); updateThreadChecker_.check();
auto& vec = para->getBuf(PARAMETER_GRADIENT); auto &vec = para->getBuf(PARAMETER_GRADIENT);
if (vec) { if (vec) {
vec->dotMul(*maskVec_); vec->dotMul(*maskVec_);
} }
} }
void init(Parameter* para) { void generateMask(Parameter *para) {
size_t initCount = this->initCount_.fetch_add(1); VectorPtr maskTemp = Vector::create(para->getSize(), false);
CHECK_EQ(initCount, 0UL) << "Currently the StaticPruningHook must invoke " maskTemp->zeroMem();
"in same ParamterUpdater"; real *maskTempData = maskTemp->getData();
VLOG(3) << "Initialize Parameter " << para; size_t nonZeroNum = para->getSize() * (1 - sparsityRatio_);
SetDevice device(para->getDeviceId());
auto maskVec = Vector::create(this->mask_.size(), false); VectorPtr paraVec = para->getBuf(PARAMETER_VALUE);
{ // Initialize maskVec with float mask vector VectorPtr paraCpuCopy = Vector::create(para->getSize(), false);
real* dataPtr = maskVec->getData();
size_t i = 0; paraCpuCopy->copyFrom(*paraVec);
for (bool m : mask_) { std::vector<std::pair<real, size_t>> param;
dataPtr[i++] = m ? 1.0 : 0.0;
} for (size_t i = 0; i < para->getSize(); i++)
} param.push_back(std::make_pair(fabs(paraCpuCopy->getData()[i]), i));
std::partial_sort(
param.begin(), param.begin() + nonZeroNum, param.end(), sortPairAscend);
for (size_t i = 0; i < nonZeroNum; i++) maskTempData[param[i].second] = 1.0;
// Currently just use a mask vector for hack. // Currently just use a mask vector for hack.
// @TODO(yuyang18): Implemented the mask operation in vector.
if (para->useGpu()) { if (para->useGpu()) {
maskVec_ = Vector::create(this->mask_.size(), para->useGpu()); maskVec_ = Vector::create(para->getSize(), para->useGpu());
maskVec_->copyFrom(*maskVec); maskVec_->copyFrom(*maskTemp);
} else { } else {
maskVec_ = maskVec; maskVec_ = maskTemp;
} }
auto& vec = para->getBuf(PARAMETER_VALUE);
vec->dotMul(*maskVec_);
} }
private: void init(Parameter *para) {
bool loadMaskFile(const std::string& mask_filename) { generateMask(para);
std::ifstream fin; size_t initCount = this->initCount_.fetch_add(1);
fin.open(mask_filename); CHECK_EQ(initCount, 0UL) << "Currently the StaticPruningHook must invoke "
if (fin.is_open()) { "in same ParamterUpdater";
StaticMaskHeader header; VLOG(3) << "Initialize Parameter " << para;
fin.read(reinterpret_cast<char*>(&header), sizeof(StaticMaskHeader)); SetDevice device(para->getDeviceId());
CHECK_EQ(header.version, 0UL);
mask_.resize(header.size); auto &paraVec = para->getBuf(PARAMETER_VALUE);
uint8_t buf; paraVec->dotMul(*maskVec_);
for (size_t i = 0; i < header.size; ++i, buf <<= 1) {
if (i % 8 == 0) {
fin.read(reinterpret_cast<char*>(&buf), sizeof(uint8_t));
}
mask_[i] = buf & 0x80;
}
fin.close();
return true;
} else {
return false;
}
} }
private:
SameThreadChecker updateThreadChecker_; SameThreadChecker updateThreadChecker_;
std::atomic<size_t> initCount_; std::atomic<size_t> initCount_;
VectorPtr maskVec_; VectorPtr maskVec_;
std::vector<bool> mask_; real sparsityRatio_;
}; };
IParameterUpdaterHook::IParameterUpdaterHook() {} IParameterUpdaterHook::IParameterUpdaterHook() {}
...@@ -145,7 +117,7 @@ IParameterUpdaterHook::~IParameterUpdaterHook() {} ...@@ -145,7 +117,7 @@ IParameterUpdaterHook::~IParameterUpdaterHook() {}
*/ */
class StringIntPairHasher { class StringIntPairHasher {
public: public:
size_t operator()(const std::pair<std::string, int>& k) const { size_t operator()(const std::pair<std::string, int> &k) const {
return intHasher_(strHasher_(k.first) + k.second); return intHasher_(strHasher_(k.first) + k.second);
} }
...@@ -162,19 +134,19 @@ static WeakKVCache<std::pair<std::string, int>, ...@@ -162,19 +134,19 @@ static WeakKVCache<std::pair<std::string, int>,
/** /**
* ParameterUpdaterHook actually factory method. * ParameterUpdaterHook actually factory method.
*/ */
static IParameterUpdaterHook* createImpl( static IParameterUpdaterHook *createImpl(
const ParameterUpdaterHookConfig& config) { const ParameterUpdaterHookConfig &config) {
auto& type = config.type(); auto &type = config.type();
if (type == "pruning") { if (type == "pruning") {
if (config.has_purning_mask_filename()) { return new StaticPruningHook(config);
return new StaticPruningHook(config.purning_mask_filename());
}
} }
LOG(FATAL) << "Unknown Hook type: " << type;
return nullptr; return nullptr;
} }
std::shared_ptr<IParameterUpdaterHook> IParameterUpdaterHook::create( std::shared_ptr<IParameterUpdaterHook> IParameterUpdaterHook::create(
const ParameterConfig& paramConfig, int idx) { const ParameterConfig &paramConfig, int idx) {
std::pair<std::string, int> key = {paramConfig.name(), idx}; std::pair<std::string, int> key = {paramConfig.name(), idx};
return g_hookCache_.get( return g_hookCache_.get(
key, [&] { return createImpl(paramConfig.update_hooks(idx)); }); key, [&] { return createImpl(paramConfig.update_hooks(idx)); });
......
...@@ -2,3 +2,4 @@ nv_test(cuda_test SRCS cuda_test.cu) ...@@ -2,3 +2,4 @@ nv_test(cuda_test SRCS cuda_test.cu)
cc_library(place SRCS place.cc) cc_library(place SRCS place.cc)
cc_test(place_test SRCS place_test.cc DEPS place glog gflags) cc_test(place_test SRCS place_test.cc DEPS place glog gflags)
cc_test(must_check_test SRCS must_check_test.cc)
...@@ -10,24 +10,17 @@ See the License for the specific language governing permissions and ...@@ -10,24 +10,17 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
/**
* This header defines some useful attribute by each compiler. It is the
* abstract layer of compilers.
*/
#ifdef __GNUC__
#define GCC_VERSION \
(__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#else
#define GCC_VERSION
#endif
/** /**
* __must_check macro. It make the function's return value must be used, * __must_check macro. It make the function's return value must be used,
* otherwise it will raise a compile warning. And also Paddle treat all compile * otherwise it will raise a compile warning. And also Paddle treat all compile
* warnings as errors. * warnings as errors.
*/ */
#if GCC_VERSION >= 30400 #ifdef __GNUC__
#if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 30400
#define __must_check __attribute__((warn_unused_result)) #define __must_check __attribute__((warn_unused_result))
#else #else
#define __must_check #define __must_check
#endif #endif
#else
#define __must_check
#endif
#include <gtest/gtest.h>
#include <paddle/platform/must_check.h>
int __must_check SomeFunctionMustCheck() { return 0; }
TEST(MustCheck, all) {
// This line should not be compiled, because the
// return value of SomeFunctionMustCheck marked as __must_check
// SomeFunctionMustCheck();
}
\ No newline at end of file
#!/bin/bash
source ./common.sh
NPROC=1
export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages
export PYTHONHOME=/opt/python/2.7.12
export PATH=/opt/python/2.7.12/bin:${PATH}
cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DWITH_COVERAGE=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
NRPOC=`nproc`
make -j $NPROC
make coveralls
sudo make install
#!/bin/bash #!/bin/bash
set -e
# Create the build directory for CMake.
mkdir -p $TRAVIS_BUILD_DIR/build
cd $TRAVIS_BUILD_DIR/build
# Add set -e, cd to directory.
source ./common.sh
# Compile Documentation only. # Compile Documentation only.
cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_STYLE_CHECK=OFF ${EXTRA_CMAKE_OPTS} cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_STYLE_CHECK=OFF
mkdir output mkdir output
make -j `nproc` make -j `nproc`
find .. -name '*whl' | xargs pip install # install all wheels. find .. -name '*whl' | xargs pip install # install all wheels.
rm -rf * rm -rf *
cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS} cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON
make paddle_docs paddle_docs_cn make -j `nproc` paddle_docs paddle_docs_cn
# check websites for broken links # check websites for broken links
linkchecker doc/en/html/index.html linkchecker doc/en/html/index.html
......
#!/bin/bash #!/bin/bash
function abort(){ function abort(){
echo "Your commit not fit PaddlePaddle code style" 1>&2 echo "Your change doesn't follow PaddlePaddle's code style." 1>&2
echo "Please use pre-commit scripts to auto-format your code" 1>&2 echo "Please use pre-commit to reformat your code and git push again." 1>&2
exit 1 exit 1
} }
trap 'abort' 0 trap 'abort' 0
set -e set -e
source common.sh
cd .. cd $TRAVIS_BUILD_DIR
export PATH=/usr/bin:$PATH export PATH=/usr/bin:$PATH
pre-commit install pre-commit install
clang-format --version clang-format --version
......
#!/bin/bash
set -e
mkdir -p ../../../build
cd ../../../build
mkdir -p $HOME/third_party
EXTRA_CMAKE_OPTS="-DTHIRD_PARTY_PATH=${HOME}/third_party"
#!/bin/bash
cd `dirname $0`
if [ ${JOB} == "BUILD_AND_TEST" ]; then
./build_and_test.sh
elif [ ${JOB} == "DOCS" ]; then
./docs.sh
elif [ ${JOB} == "PRE_COMMIT" ]; then
./precommit.sh
else
echo Unknown job ${JOB}
exit 1
fi
...@@ -35,7 +35,7 @@ def outer_step(dummy_data): ...@@ -35,7 +35,7 @@ def outer_step(dummy_data):
embedding_size=num_words)] embedding_size=num_words)]
def inner_step(dummy_memory, predict_word): def inner_step(dummy_memory, predict_word):
# simplified RNN for testing # simplified RNN for testing
with mixed_layer(size=num_words) as layer: with mixed_layer(size=num_words) as layer:
layer += full_matrix_projection(input=predict_word, layer += full_matrix_projection(input=predict_word,
...@@ -46,15 +46,15 @@ def outer_step(dummy_data): ...@@ -46,15 +46,15 @@ def outer_step(dummy_data):
param_attr=ParamAttr(name="wordvec")) param_attr=ParamAttr(name="wordvec"))
return out return out
beam_gen = beam_search(name="rnn_gen", beam_gen = beam_search(name="rnn_gen",
step=inner_step, step=inner_step,
input=gen_inputs, input=gen_inputs,
bos_id=0, bos_id=0,
eos_id=num_words-1, eos_id=num_words-1,
beam_size=2 if beam_flag else 1, beam_size=2 if beam_flag else 1,
num_results_per_sample=2 if beam_flag else 1, num_results_per_sample=1,
max_length=10) max_length=10)
return beam_gen return beam_gen
beam_gen_concat = recurrent_group(name="rnn_gen_concat", beam_gen_concat = recurrent_group(name="rnn_gen_concat",
......
...@@ -33,7 +33,7 @@ gen_inputs = [StaticInput(input=dummy_data, size=2), ...@@ -33,7 +33,7 @@ gen_inputs = [StaticInput(input=dummy_data, size=2),
embedding_size=num_words)] embedding_size=num_words)]
def step(dummy_memory, predict_word): def step(dummy_memory, predict_word):
# simplified RNN for testing # simplified RNN for testing
with mixed_layer(size=num_words) as layer: with mixed_layer(size=num_words) as layer:
layer += full_matrix_projection(input=predict_word, layer += full_matrix_projection(input=predict_word,
...@@ -44,7 +44,7 @@ def step(dummy_memory, predict_word): ...@@ -44,7 +44,7 @@ def step(dummy_memory, predict_word):
param_attr=ParamAttr(name="wordvec")) param_attr=ParamAttr(name="wordvec"))
return out return out
beam_gen = beam_search(name="rnn_gen", beam_gen = beam_search(name="rnn_gen",
step=step, step=step,
input=gen_inputs, input=gen_inputs,
...@@ -52,7 +52,7 @@ beam_gen = beam_search(name="rnn_gen", ...@@ -52,7 +52,7 @@ beam_gen = beam_search(name="rnn_gen",
eos_id=num_words-1, eos_id=num_words-1,
beam_size=2 if beam_flag else 1, beam_size=2 if beam_flag else 1,
num_results_per_sample=2 if beam_flag else 1, num_results_per_sample=2 if beam_flag else 1,
max_length=10) max_length=10)
seqtext_printer_evaluator(input=beam_gen, seqtext_printer_evaluator(input=beam_gen,
id_input=sent_id, id_input=sent_id,
......
...@@ -55,13 +55,17 @@ public: ...@@ -55,13 +55,17 @@ public:
* Else, just set status to popping. * Else, just set status to popping.
*/ */
void pop(const T& item) { void pop(const T& item) {
pushing() = false;
auto& s = this->stack(); auto& s = this->stack();
if (item == s.top()) { if (item == s.top()) {
s.pop(); s.pop();
} }
} }
/**
* @brief Indicate whether we are at forward or backward stage of computation
*/
void set_stage(bool isForward) { pushing() = isForward; }
/** /**
* @brief clear current thread stack. * @brief clear current thread stack.
*/ */
......
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
#include <stdio.h> #include <stdio.h>
#include <memory> #include <memory>
#include <string> #include <string>
#include "Compiler.h" #include "paddle/platform/must_check.h"
namespace paddle { namespace paddle {
......
...@@ -72,7 +72,6 @@ TEST(CustomStackTrace, normalTrain) { ...@@ -72,7 +72,6 @@ TEST(CustomStackTrace, normalTrain) {
for (size_t i = 0; i < layerSize; ++i) { for (size_t i = 0; i < layerSize; ++i) {
tracer.push("layer_" + paddle::str::to_string(i)); tracer.push("layer_" + paddle::str::to_string(i));
} }
tracer.pop("");
for (size_t i = 0; i < layerSize; ++i) { for (size_t i = 0; i < layerSize; ++i) {
tracer.pop("layer_" + paddle::str::to_string(layerSize - 1 - i)); tracer.pop("layer_" + paddle::str::to_string(layerSize - 1 - i));
} }
......
...@@ -489,6 +489,15 @@ message EvaluatorConfig { ...@@ -489,6 +489,15 @@ message EvaluatorConfig {
// Used by ClassificationErrorEvaluator // Used by ClassificationErrorEvaluator
// top # classification error // top # classification error
optional int32 top_k = 13 [default = 1]; optional int32 top_k = 13 [default = 1];
// Used by DetectionMAPEvaluator
optional double overlap_threshold = 14 [default = 0.5];
optional int32 background_id = 15 [default = 0];
optional bool evaluate_difficult = 16 [default = false];
optional string ap_type = 17 [default = "11point"];
} }
message LinkConfig { message LinkConfig {
......
...@@ -25,8 +25,10 @@ enum ParameterInitStrategy { ...@@ -25,8 +25,10 @@ enum ParameterInitStrategy {
} }
message ParameterUpdaterHookConfig { message ParameterUpdaterHookConfig {
// hook type such as 'pruning'
required string type = 1; required string type = 1;
optional string purning_mask_filename = 2; // this represents the ratio of zero element to be set by the Parameter
optional double sparsity_ratio = 2 [default = 0.6];
} }
message ParameterConfig { message ParameterConfig {
......
...@@ -1280,20 +1280,23 @@ def parse_maxout(maxout, input_layer_name, maxout_conf): ...@@ -1280,20 +1280,23 @@ def parse_maxout(maxout, input_layer_name, maxout_conf):
# Define an evaluator # Define an evaluator
@config_func @config_func
def Evaluator( def Evaluator(name,
name, type,
type, inputs,
inputs, chunk_scheme=None,
chunk_scheme=None, num_chunk_types=None,
num_chunk_types=None, classification_threshold=None,
classification_threshold=None, positive_label=None,
positive_label=None, dict_file=None,
dict_file=None, result_file=None,
result_file=None, num_results=None,
num_results=None, top_k=None,
top_k=None, delimited=None,
delimited=None, excluded_chunk_types=None,
excluded_chunk_types=None, ): overlap_threshold=None,
background_id=None,
evaluate_difficult=None,
ap_type=None):
evaluator = g_config.model_config.evaluators.add() evaluator = g_config.model_config.evaluators.add()
evaluator.type = type evaluator.type = type
evaluator.name = MakeLayerNameInSubmodel(name) evaluator.name = MakeLayerNameInSubmodel(name)
...@@ -1327,6 +1330,18 @@ def Evaluator( ...@@ -1327,6 +1330,18 @@ def Evaluator(
if excluded_chunk_types: if excluded_chunk_types:
evaluator.excluded_chunk_types.extend(excluded_chunk_types) evaluator.excluded_chunk_types.extend(excluded_chunk_types)
if overlap_threshold is not None:
evaluator.overlap_threshold = overlap_threshold
if background_id is not None:
evaluator.background_id = background_id
if evaluate_difficult is not None:
evaluator.evaluate_difficult = evaluate_difficult
if ap_type is not None:
evaluator.ap_type = ap_type
class LayerBase(object): class LayerBase(object):
def __init__( def __init__(
...@@ -3124,11 +3139,11 @@ def Layer(name, type, **xargs): ...@@ -3124,11 +3139,11 @@ def Layer(name, type, **xargs):
@config_func @config_func
def ParameterHook(type, **kwargs): def ParameterHook(type, **kwargs):
if type == 'pruning': if type == 'pruning':
mask_filename = kwargs.get('mask_filename', None)
assert mask_filename is not None
hook = ParameterUpdaterHookConfig() hook = ParameterUpdaterHookConfig()
hook.type = type hook.type = type
hook.purning_mask_filename = mask_filename sparsity_ratio = kwargs.get('sparsity_ratio', None)
if sparsity_ratio is not None:
hook.sparsity_ratio = sparsity_ratio
return hook return hook
else: else:
return None return None
...@@ -3236,13 +3251,13 @@ def Parameter(name, ...@@ -3236,13 +3251,13 @@ def Parameter(name,
if update_hooks is not None: if update_hooks is not None:
if hasattr(update_hooks, '__call__'): if hasattr(update_hooks, '__call__'):
update_hooks = update_hooks(para.name) update_hooks = update_hooks()
if isinstance(update_hooks, list): if isinstance(update_hooks, list):
for hook in update_hooks: for hook in update_hooks:
para.update_hooks.extend([hook]) para.update_hooks.extend([hook])
else: else:
para.update_hooks.extend(update_hooks) para.update_hooks.extend([update_hooks])
g_parameter_map[name] = para g_parameter_map[name] = para
if initializer is not None: if initializer is not None:
......
...@@ -14,7 +14,8 @@ ...@@ -14,7 +14,8 @@
from paddle.trainer.config_parser import * from paddle.trainer.config_parser import *
__all__ = [ __all__ = [
'ParamAttr', 'ExtraAttr', 'ParameterAttribute', 'ExtraLayerAttribute' 'HookAttr', 'ParamAttr', 'ExtraAttr', 'ParameterAttribute',
'ExtraLayerAttribute'
] ]
...@@ -55,6 +56,40 @@ def is_compatible_with(x, Type): ...@@ -55,6 +56,40 @@ def is_compatible_with(x, Type):
return False return False
class HookAttribute(object):
"""
Hook Attribute object. As a member of ParameterAttribute class, the hook is an auxiliary operation that occurs
during training process of a layer with parameters, such as img_conv layer, fc layer.
:param type: Hook type, currently supported types:
'pruning' : user specify a sparsity_ratio before training started, and the
network will prune the parameters based on the sparsity_ratio.
eg: The definition of Hook object can be hk = HookAttribute('pruning', 0.6)
The specific usage can be paddle.layer.img_conv(input=img, filter_size=3,
num_channels=3, num_filters=64,
param_attr=ParameterAttribute(update_hooks=hk) )
The pruning details can be found https://arxiv.org/pdf/1506.02626.pdf
:type type: string
:param sparsity_ratio: Must be specified if hook type is 'pruning',
it represents the ratio of the zero elements to be set by the Parameter.
:type sparsity_ratio: float or None
"""
def __init__(self, type, sparsity_ratio=None):
self.type = type
self.sparsity_ratio = sparsity_ratio
if self.sparsity_ratio is not None:
assert is_compatible_with(
self.sparsity_ratio,
float), 'sparisity_ratio must be float type'
assert self.sparsity_ratio <= 1 and self.sparsity_ratio >= 0, 'sparsity_ratio must be a float between [0, 1] '
def __call__(self):
return ParameterHook(self.type, sparsity_ratio=self.sparsity_ratio)
class ParameterAttribute(object): class ParameterAttribute(object):
""" """
Parameter Attributes object. To fine-tuning network training process, user Parameter Attributes object. To fine-tuning network training process, user
...@@ -114,6 +149,7 @@ class ParameterAttribute(object): ...@@ -114,6 +149,7 @@ class ParameterAttribute(object):
momentum=None, momentum=None,
gradient_clipping_threshold=None, gradient_clipping_threshold=None,
sparse_update=False, sparse_update=False,
update_hooks=None,
initializer=None): initializer=None):
self.attr = {} self.attr = {}
...@@ -169,6 +205,9 @@ class ParameterAttribute(object): ...@@ -169,6 +205,9 @@ class ParameterAttribute(object):
if initializer is not None: if initializer is not None:
self.attr['initializer'] = initializer self.attr['initializer'] = initializer
if update_hooks:
self.attr['update_hooks'] = update_hooks
def set_default_parameter_name(self, name): def set_default_parameter_name(self, name):
""" """
Set default parameter name. If parameter not set, then will use default Set default parameter name. If parameter not set, then will use default
...@@ -244,5 +283,6 @@ class ExtraLayerAttribute(object): ...@@ -244,5 +283,6 @@ class ExtraLayerAttribute(object):
return attr.attr return attr.attr
HookAttr = HookAttribute
ParamAttr = ParameterAttribute ParamAttr = ParameterAttribute
ExtraAttr = ExtraLayerAttribute ExtraAttr = ExtraLayerAttribute
...@@ -21,7 +21,8 @@ __all__ = [ ...@@ -21,7 +21,8 @@ __all__ = [
"chunk_evaluator", "sum_evaluator", "column_sum_evaluator", "chunk_evaluator", "sum_evaluator", "column_sum_evaluator",
"value_printer_evaluator", "gradient_printer_evaluator", "value_printer_evaluator", "gradient_printer_evaluator",
"maxid_printer_evaluator", "maxframe_printer_evaluator", "maxid_printer_evaluator", "maxframe_printer_evaluator",
"seqtext_printer_evaluator", "classification_error_printer_evaluator" "seqtext_printer_evaluator", "classification_error_printer_evaluator",
"detection_map_evaluator"
] ]
...@@ -31,10 +32,11 @@ class EvaluatorAttribute(object): ...@@ -31,10 +32,11 @@ class EvaluatorAttribute(object):
FOR_RANK = 1 << 2 FOR_RANK = 1 << 2
FOR_PRINT = 1 << 3 FOR_PRINT = 1 << 3
FOR_UTILS = 1 << 4 FOR_UTILS = 1 << 4
FOR_DETECTION = 1 << 5
KEYS = [ KEYS = [
"for_classification", "for_regression", "for_rank", "for_print", "for_classification", "for_regression", "for_rank", "for_print",
"for_utils" "for_utils", "for_detection"
] ]
@staticmethod @staticmethod
...@@ -57,22 +59,25 @@ def evaluator(*attrs): ...@@ -57,22 +59,25 @@ def evaluator(*attrs):
return impl return impl
def evaluator_base( def evaluator_base(input,
input, type,
type, label=None,
label=None, weight=None,
weight=None, name=None,
name=None, chunk_scheme=None,
chunk_scheme=None, num_chunk_types=None,
num_chunk_types=None, classification_threshold=None,
classification_threshold=None, positive_label=None,
positive_label=None, dict_file=None,
dict_file=None, result_file=None,
result_file=None, num_results=None,
num_results=None, delimited=None,
delimited=None, top_k=None,
top_k=None, excluded_chunk_types=None,
excluded_chunk_types=None, ): overlap_threshold=None,
background_id=None,
evaluate_difficult=None,
ap_type=None):
""" """
Evaluator will evaluate the network status while training/testing. Evaluator will evaluate the network status while training/testing.
...@@ -107,6 +112,14 @@ def evaluator_base( ...@@ -107,6 +112,14 @@ def evaluator_base(
:type weight: LayerOutput. :type weight: LayerOutput.
:param top_k: number k in top-k error rate :param top_k: number k in top-k error rate
:type top_k: int :type top_k: int
:param overlap_threshold: In detection tasks to filter detection results
:type overlap_threshold: float
:param background_id: Identifier of background class
:type background_id: int
:param evaluate_difficult: Whether to evaluate difficult objects
:type evaluate_difficult: bool
:param ap_type: How to calculate average persicion
:type ap_type: str
""" """
# inputs type assertions. # inputs type assertions.
assert classification_threshold is None or isinstance( assert classification_threshold is None or isinstance(
...@@ -136,7 +149,61 @@ def evaluator_base( ...@@ -136,7 +149,61 @@ def evaluator_base(
delimited=delimited, delimited=delimited,
num_results=num_results, num_results=num_results,
top_k=top_k, top_k=top_k,
excluded_chunk_types=excluded_chunk_types, ) excluded_chunk_types=excluded_chunk_types,
overlap_threshold=overlap_threshold,
background_id=background_id,
evaluate_difficult=evaluate_difficult,
ap_type=ap_type)
@evaluator(EvaluatorAttribute.FOR_DETECTION)
@wrap_name_default()
def detection_map_evaluator(input,
label,
overlap_threshold=0.5,
background_id=0,
evaluate_difficult=False,
ap_type="11point",
name=None):
"""
Detection mAP Evaluator. It will print mean Average Precision (mAP) for detection.
The detection mAP Evaluator based on the output of detection_output layer counts
the true positive and the false positive bbox and integral them to get the
mAP.
The simple usage is:
.. code-block:: python
eval = detection_map_evaluator(input=det_output,label=lbl)
:param input: Input layer.
:type input: LayerOutput
:param label: Label layer.
:type label: LayerOutput
:param overlap_threshold: The bbox overlap threshold of a true positive.
:type overlap_threshold: float
:param background_id: The background class index.
:type background_id: int
:param evaluate_difficult: Whether evaluate a difficult ground truth.
:type evaluate_difficult: bool
"""
if not isinstance(input, list):
input = [input]
if label:
input.append(label)
evaluator_base(
name=name,
type="detection_map",
input=input,
label=label,
overlap_threshold=overlap_threshold,
background_id=background_id,
evaluate_difficult=evaluate_difficult,
ap_type=ap_type)
@evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION)
......
...@@ -3839,7 +3839,8 @@ def classification_cost(input, ...@@ -3839,7 +3839,8 @@ def classification_cost(input,
weight=None, weight=None,
name=None, name=None,
evaluator=classification_error_evaluator, evaluator=classification_error_evaluator,
layer_attr=None): layer_attr=None,
coeff=1.):
""" """
classification cost Layer. classification cost Layer.
...@@ -3855,6 +3856,8 @@ def classification_cost(input, ...@@ -3855,6 +3856,8 @@ def classification_cost(input,
:param evaluator: Evaluator method. :param evaluator: Evaluator method.
:param layer_attr: layer's extra attribute. :param layer_attr: layer's extra attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:param coeff: The coefficient affects the gradient in the backward.
:type coeff: float
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -3868,6 +3871,7 @@ def classification_cost(input, ...@@ -3868,6 +3871,7 @@ def classification_cost(input,
name=name, name=name,
type="multi-class-cross-entropy", type="multi-class-cross-entropy",
inputs=ipts, inputs=ipts,
coeff=coeff,
**ExtraLayerAttribute.to_kwargs(layer_attr)) **ExtraLayerAttribute.to_kwargs(layer_attr))
def __add_evaluator__(e): def __add_evaluator__(e):
......
...@@ -17,10 +17,12 @@ import paddle.trainer_config_helpers.attrs ...@@ -17,10 +17,12 @@ import paddle.trainer_config_helpers.attrs
__all__ = [ __all__ = [
"Param", "Param",
"Extra", "Extra",
"Hook",
] ]
Param = paddle.trainer_config_helpers.attrs.ParameterAttribute Param = paddle.trainer_config_helpers.attrs.ParameterAttribute
Extra = paddle.trainer_config_helpers.attrs.ExtraLayerAttribute Extra = paddle.trainer_config_helpers.attrs.ExtraLayerAttribute
Hook = paddle.trainer_config_helpers.attrs.HookAttribute
for each in paddle.trainer_config_helpers.attrs.__all__: for each in paddle.trainer_config_helpers.attrs.__all__:
globals()[each] = getattr(paddle.trainer_config_helpers.attrs, each) globals()[each] = getattr(paddle.trainer_config_helpers.attrs, each)
......
...@@ -31,10 +31,10 @@ images per class. ...@@ -31,10 +31,10 @@ images per class.
import cPickle import cPickle
import itertools import itertools
import numpy import numpy
from common import download import paddle.v2.dataset.common
import tarfile import tarfile
__all__ = ['train100', 'test100', 'train10', 'test10'] __all__ = ['train100', 'test100', 'train10', 'test10', 'convert']
URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/' URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/'
CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz' CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz'
...@@ -75,7 +75,8 @@ def train100(): ...@@ -75,7 +75,8 @@ def train100():
:rtype: callable :rtype: callable
""" """
return reader_creator( return reader_creator(
download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'train') paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5),
'train')
def test100(): def test100():
...@@ -88,7 +89,9 @@ def test100(): ...@@ -88,7 +89,9 @@ def test100():
:return: Test reader creator. :return: Test reader creator.
:rtype: callable :rtype: callable
""" """
return reader_creator(download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'test') return reader_creator(
paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5),
'test')
def train10(): def train10():
...@@ -102,7 +105,8 @@ def train10(): ...@@ -102,7 +105,8 @@ def train10():
:rtype: callable :rtype: callable
""" """
return reader_creator( return reader_creator(
download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch') paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
'data_batch')
def test10(): def test10():
...@@ -116,9 +120,20 @@ def test10(): ...@@ -116,9 +120,20 @@ def test10():
:rtype: callable :rtype: callable
""" """
return reader_creator( return reader_creator(
download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch') paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
'test_batch')
def fetch(): def fetch():
download(CIFAR10_URL, 'cifar', CIFAR10_MD5) paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5)
download(CIFAR100_URL, 'cifar', CIFAR100_MD5) paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5)
def convert(path):
"""
Converts dataset to recordio format
"""
paddle.v2.dataset.common.convert(path, train100(), 10, "cifar_train100")
paddle.v2.dataset.common.convert(path, test100(), 10, "cifar_test100")
paddle.v2.dataset.common.convert(path, train10(), 10, "cifar_train10")
paddle.v2.dataset.common.convert(path, test10(), 10, "cifar_test10")
...@@ -23,17 +23,24 @@ import paddle.v2.dataset ...@@ -23,17 +23,24 @@ import paddle.v2.dataset
import cPickle import cPickle
import glob import glob
__all__ = ['DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader'] __all__ = [
'DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader',
'convert'
]
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
if not os.path.exists(DATA_HOME): # When running unit tests, there could be multiple processes that
try: # trying to create DATA_HOME directory simultaneously, so we cannot
os.makedirs(DATA_HOME) # use a if condition to check for the existence of the directory;
except OSError as exc: # instead, we use the filesystem as the synchronization mechanism by
if exc.errno != errno.EEXIST: # catching returned errors.
raise try:
pass os.makedirs(DATA_HOME)
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
pass
def md5file(fname): def md5file(fname):
......
...@@ -23,9 +23,9 @@ to initialize SRL model. ...@@ -23,9 +23,9 @@ to initialize SRL model.
import tarfile import tarfile
import gzip import gzip
import itertools import itertools
from common import download import paddle.v2.dataset.common
__all__ = ['test, get_dict', 'get_embedding'] __all__ = ['test, get_dict', 'get_embedding', 'convert']
DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz' DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
DATA_MD5 = '387719152ae52d60422c016e92a742fc' DATA_MD5 = '387719152ae52d60422c016e92a742fc'
...@@ -182,9 +182,15 @@ def get_dict(): ...@@ -182,9 +182,15 @@ def get_dict():
""" """
Get the word, verb and label dictionary of Wikipedia corpus. Get the word, verb and label dictionary of Wikipedia corpus.
""" """
word_dict = load_dict(download(WORDDICT_URL, 'conll05st', WORDDICT_MD5)) word_dict = load_dict(
verb_dict = load_dict(download(VERBDICT_URL, 'conll05st', VERBDICT_MD5)) paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st',
label_dict = load_dict(download(TRGDICT_URL, 'conll05st', TRGDICT_MD5)) WORDDICT_MD5))
verb_dict = load_dict(
paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st',
VERBDICT_MD5))
label_dict = load_dict(
paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st',
TRGDICT_MD5))
return word_dict, verb_dict, label_dict return word_dict, verb_dict, label_dict
...@@ -192,7 +198,7 @@ def get_embedding(): ...@@ -192,7 +198,7 @@ def get_embedding():
""" """
Get the trained word vector based on Wikipedia corpus. Get the trained word vector based on Wikipedia corpus.
""" """
return download(EMB_URL, 'conll05st', EMB_MD5) return paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5)
def test(): def test():
...@@ -209,15 +215,23 @@ def test(): ...@@ -209,15 +215,23 @@ def test():
""" """
word_dict, verb_dict, label_dict = get_dict() word_dict, verb_dict, label_dict = get_dict()
reader = corpus_reader( reader = corpus_reader(
download(DATA_URL, 'conll05st', DATA_MD5), paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5),
words_name='conll05st-release/test.wsj/words/test.wsj.words.gz', words_name='conll05st-release/test.wsj/words/test.wsj.words.gz',
props_name='conll05st-release/test.wsj/props/test.wsj.props.gz') props_name='conll05st-release/test.wsj/props/test.wsj.props.gz')
return reader_creator(reader, word_dict, verb_dict, label_dict) return reader_creator(reader, word_dict, verb_dict, label_dict)
def fetch(): def fetch():
download(WORDDICT_URL, 'conll05st', WORDDICT_MD5) paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5)
download(VERBDICT_URL, 'conll05st', VERBDICT_MD5) paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5)
download(TRGDICT_URL, 'conll05st', TRGDICT_MD5) paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5)
download(EMB_URL, 'conll05st', EMB_MD5) paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5)
download(DATA_URL, 'conll05st', DATA_MD5) paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5)
def convert(path):
"""
Converts dataset to recordio format
"""
paddle.v2.dataset.common.convert(path, test(), 10, "conl105_train")
paddle.v2.dataset.common.convert(path, test(), 10, "conl105_test")
...@@ -28,7 +28,7 @@ import re ...@@ -28,7 +28,7 @@ import re
import string import string
import threading import threading
__all__ = ['build_dict', 'train', 'test'] __all__ = ['build_dict', 'train', 'test', 'convert']
URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz' URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz'
MD5 = '7c2ac02c03563afcf9b574c7e56c153a' MD5 = '7c2ac02c03563afcf9b574c7e56c153a'
...@@ -166,3 +166,12 @@ def word_dict(): ...@@ -166,3 +166,12 @@ def word_dict():
def fetch(): def fetch():
paddle.v2.dataset.common.download(URL, 'imdb', MD5) paddle.v2.dataset.common.download(URL, 'imdb', MD5)
def convert(path):
"""
Converts dataset to recordio format
"""
w = word_dict()
paddle.v2.dataset.common.convert(path, lambda: train(w), 10, "imdb_train")
paddle.v2.dataset.common.convert(path, lambda: test(w), 10, "imdb_test")
...@@ -22,7 +22,7 @@ import paddle.v2.dataset.common ...@@ -22,7 +22,7 @@ import paddle.v2.dataset.common
import collections import collections
import tarfile import tarfile
__all__ = ['train', 'test', 'build_dict'] __all__ = ['train', 'test', 'build_dict', 'convert']
URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz' URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'
MD5 = '30177ea32e27c525793142b6bf2c8e2d' MD5 = '30177ea32e27c525793142b6bf2c8e2d'
...@@ -146,3 +146,15 @@ def test(word_idx, n, data_type=DataType.NGRAM): ...@@ -146,3 +146,15 @@ def test(word_idx, n, data_type=DataType.NGRAM):
def fetch(): def fetch():
paddle.v2.dataset.common.download(URL, "imikolov", MD5) paddle.v2.dataset.common.download(URL, "imikolov", MD5)
def convert(path):
"""
Converts dataset to recordio format
"""
N = 5
word_dict = build_dict()
paddle.v2.dataset.common.convert(path,
train(word_dict, N), 10, "imikolov_train")
paddle.v2.dataset.common.convert(path,
test(word_dict, N), 10, "imikolov_test")
...@@ -21,7 +21,7 @@ import paddle.v2.dataset.common ...@@ -21,7 +21,7 @@ import paddle.v2.dataset.common
import subprocess import subprocess
import numpy import numpy
import platform import platform
__all__ = ['train', 'test'] __all__ = ['train', 'test', 'convert']
URL_PREFIX = 'http://yann.lecun.com/exdb/mnist/' URL_PREFIX = 'http://yann.lecun.com/exdb/mnist/'
TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz' TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz'
...@@ -113,3 +113,11 @@ def fetch(): ...@@ -113,3 +113,11 @@ def fetch():
paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5)
paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5) paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5)
paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TRAIN_LABEL_MD5)
def convert(path):
"""
Converts dataset to recordio format
"""
paddle.v2.dataset.common.convert(path, train(), 10, "minist_train")
paddle.v2.dataset.common.convert(path, test(), 10, "minist_test")
...@@ -23,14 +23,15 @@ set and test set into paddle reader creators. ...@@ -23,14 +23,15 @@ set and test set into paddle reader creators.
""" """
import zipfile import zipfile
from common import download import paddle.v2.dataset.common
import re import re
import random import random
import functools import functools
__all__ = [ __all__ = [
'train', 'test', 'get_movie_title_dict', 'max_movie_id', 'max_user_id', 'train', 'test', 'get_movie_title_dict', 'max_movie_id', 'max_user_id',
'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info' 'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info',
'convert'
] ]
age_table = [1, 18, 25, 35, 45, 50, 56] age_table = [1, 18, 25, 35, 45, 50, 56]
...@@ -99,7 +100,7 @@ USER_INFO = None ...@@ -99,7 +100,7 @@ USER_INFO = None
def __initialize_meta_info__(): def __initialize_meta_info__():
fn = download(URL, "movielens", MD5) fn = paddle.v2.dataset.common.download(URL, "movielens", MD5)
global MOVIE_INFO global MOVIE_INFO
if MOVIE_INFO is None: if MOVIE_INFO is None:
pattern = re.compile(r'^(.*)\((\d+)\)$') pattern = re.compile(r'^(.*)\((\d+)\)$')
...@@ -246,7 +247,15 @@ def unittest(): ...@@ -246,7 +247,15 @@ def unittest():
def fetch(): def fetch():
download(URL, "movielens", MD5) paddle.v2.dataset.common.download(URL, "movielens", MD5)
def convert(path):
"""
Converts dataset to recordio format
"""
paddle.v2.dataset.common.convert(path, train(), 10, "movielens_train")
paddle.v2.dataset.common.convert(path, test(), 10, "movielens_test")
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -26,9 +26,9 @@ from itertools import chain ...@@ -26,9 +26,9 @@ from itertools import chain
import nltk import nltk
from nltk.corpus import movie_reviews from nltk.corpus import movie_reviews
import common import paddle.v2.dataset.common
__all__ = ['train', 'test', 'get_word_dict'] __all__ = ['train', 'test', 'get_word_dict', 'convert']
NUM_TRAINING_INSTANCES = 1600 NUM_TRAINING_INSTANCES = 1600
NUM_TOTAL_INSTANCES = 2000 NUM_TOTAL_INSTANCES = 2000
...@@ -39,12 +39,13 @@ def download_data_if_not_yet(): ...@@ -39,12 +39,13 @@ def download_data_if_not_yet():
""" """
try: try:
# make sure that nltk can find the data # make sure that nltk can find the data
if common.DATA_HOME not in nltk.data.path: if paddle.v2.dataset.common.DATA_HOME not in nltk.data.path:
nltk.data.path.append(common.DATA_HOME) nltk.data.path.append(paddle.v2.dataset.common.DATA_HOME)
movie_reviews.categories() movie_reviews.categories()
except LookupError: except LookupError:
print "Downloading movie_reviews data set, please wait....." print "Downloading movie_reviews data set, please wait....."
nltk.download('movie_reviews', download_dir=common.DATA_HOME) nltk.download(
'movie_reviews', download_dir=paddle.v2.dataset.common.DATA_HOME)
print "Download data set success....." print "Download data set success....."
print "Path is " + nltk.data.find('corpora/movie_reviews').path print "Path is " + nltk.data.find('corpora/movie_reviews').path
...@@ -128,4 +129,13 @@ def test(): ...@@ -128,4 +129,13 @@ def test():
def fetch(): def fetch():
nltk.download('movie_reviews', download_dir=common.DATA_HOME) nltk.download(
'movie_reviews', download_dir=paddle.v2.dataset.common.DATA_HOME)
def convert(path):
"""
Converts dataset to recordio format
"""
paddle.v2.dataset.common.convert(path, train, 10, "sentiment_train")
paddle.v2.dataset.common.convert(path, test, 10, "sentiment_test")
...@@ -14,14 +14,14 @@ ...@@ -14,14 +14,14 @@
""" """
UCI Housing dataset. UCI Housing dataset.
This module will download dataset from This module will paddle.v2.dataset.common.download dataset from
https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ and https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ and
parse training set and test set into paddle reader creators. parse training set and test set into paddle reader creators.
""" """
import numpy as np import numpy as np
import os import os
from common import download import paddle.v2.dataset.common
__all__ = ['train', 'test'] __all__ = ['train', 'test']
...@@ -29,7 +29,7 @@ URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing ...@@ -29,7 +29,7 @@ URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing
MD5 = 'd4accdce7a25600298819f8e28e8d593' MD5 = 'd4accdce7a25600298819f8e28e8d593'
feature_names = [ feature_names = [
'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
'PTRATIO', 'B', 'LSTAT' 'PTRATIO', 'B', 'LSTAT', 'convert'
] ]
UCI_TRAIN_DATA = None UCI_TRAIN_DATA = None
...@@ -82,7 +82,7 @@ def train(): ...@@ -82,7 +82,7 @@ def train():
:rtype: callable :rtype: callable
""" """
global UCI_TRAIN_DATA global UCI_TRAIN_DATA
load_data(download(URL, 'uci_housing', MD5)) load_data(paddle.v2.dataset.common.download(URL, 'uci_housing', MD5))
def reader(): def reader():
for d in UCI_TRAIN_DATA: for d in UCI_TRAIN_DATA:
...@@ -102,7 +102,7 @@ def test(): ...@@ -102,7 +102,7 @@ def test():
:rtype: callable :rtype: callable
""" """
global UCI_TEST_DATA global UCI_TEST_DATA
load_data(download(URL, 'uci_housing', MD5)) load_data(paddle.v2.dataset.common.download(URL, 'uci_housing', MD5))
def reader(): def reader():
for d in UCI_TEST_DATA: for d in UCI_TEST_DATA:
...@@ -112,4 +112,12 @@ def test(): ...@@ -112,4 +112,12 @@ def test():
def fetch(): def fetch():
download(URL, 'uci_housing', MD5) paddle.v2.dataset.common.download(URL, 'uci_housing', MD5)
def convert(path):
"""
Converts dataset to recordio format
"""
paddle.v2.dataset.common.convert(path, train(), 10, "uci_housing_train")
paddle.v2.dataset.common.convert(path, test(), 10, "uci_houseing_test")
...@@ -22,10 +22,10 @@ parse training set and test set into paddle reader creators. ...@@ -22,10 +22,10 @@ parse training set and test set into paddle reader creators.
import tarfile import tarfile
import gzip import gzip
from paddle.v2.dataset.common import download import paddle.v2.dataset.common
from paddle.v2.parameters import Parameters from paddle.v2.parameters import Parameters
__all__ = ['train', 'test', 'build_dict'] __all__ = ['train', 'test', 'build_dict', 'convert']
URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz' URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz'
MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5' MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5'
...@@ -115,7 +115,8 @@ def train(dict_size): ...@@ -115,7 +115,8 @@ def train(dict_size):
:rtype: callable :rtype: callable
""" """
return reader_creator( return reader_creator(
download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'train/train', dict_size) paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
'train/train', dict_size)
def test(dict_size): def test(dict_size):
...@@ -130,16 +131,18 @@ def test(dict_size): ...@@ -130,16 +131,18 @@ def test(dict_size):
:rtype: callable :rtype: callable
""" """
return reader_creator( return reader_creator(
download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'test/test', dict_size) paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
'test/test', dict_size)
def gen(dict_size): def gen(dict_size):
return reader_creator( return reader_creator(
download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'gen/gen', dict_size) paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
'gen/gen', dict_size)
def model(): def model():
tar_file = download(URL_MODEL, 'wmt14', MD5_MODEL) tar_file = paddle.v2.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL)
with gzip.open(tar_file, 'r') as f: with gzip.open(tar_file, 'r') as f:
parameters = Parameters.from_tar(f) parameters = Parameters.from_tar(f)
return parameters return parameters
...@@ -148,7 +151,7 @@ def model(): ...@@ -148,7 +151,7 @@ def model():
def get_dict(dict_size, reverse=True): def get_dict(dict_size, reverse=True):
# if reverse = False, return dict = {'a':'001', 'b':'002', ...} # if reverse = False, return dict = {'a':'001', 'b':'002', ...}
# else reverse = true, return dict = {'001':'a', '002':'b', ...} # else reverse = true, return dict = {'001':'a', '002':'b', ...}
tar_file = download(URL_TRAIN, 'wmt14', MD5_TRAIN) tar_file = paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN)
src_dict, trg_dict = __read_to_dict__(tar_file, dict_size) src_dict, trg_dict = __read_to_dict__(tar_file, dict_size)
if reverse: if reverse:
src_dict = {v: k for k, v in src_dict.items()} src_dict = {v: k for k, v in src_dict.items()}
...@@ -157,5 +160,14 @@ def get_dict(dict_size, reverse=True): ...@@ -157,5 +160,14 @@ def get_dict(dict_size, reverse=True):
def fetch(): def fetch():
download(URL_TRAIN, 'wmt14', MD5_TRAIN) paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN)
download(URL_MODEL, 'wmt14', MD5_MODEL) paddle.v2.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL)
def convert(path):
"""
Converts dataset to recordio format
"""
dict_size = 30000
paddle.v2.dataset.common.convert(path, train(dict_size), 10, "wmt14_train")
paddle.v2.dataset.common.convert(path, test(dict_size), 10, "wmt14_test")
...@@ -45,12 +45,12 @@ __all__ = ['data', 'parse_network'] ...@@ -45,12 +45,12 @@ __all__ = ['data', 'parse_network']
def __need_to_keep__(name): def __need_to_keep__(name):
return name in [ return name in [
'StaticInput', 'SubsequenceInput', 'GeneratedInput', 'LayerType', 'StaticInput', 'SubsequenceInput', 'GeneratedInput', 'LayerType',
'layer_support' 'layer_support', 'BaseGeneratedInput'
] ]
def __need_to_wrap__(name): def __need_to_wrap__(name):
return name not in ['AggregateLevel', 'ExpandLevel'] return name not in ['AggregateLevel', 'ExpandLevel', 'BaseGeneratedInput']
def __convert_name__(inname): def __convert_name__(inname):
...@@ -199,6 +199,15 @@ def __get_used_submodels__(layer_names): ...@@ -199,6 +199,15 @@ def __get_used_submodels__(layer_names):
return submodel_names return submodel_names
def __get_submodel_data_out_links__():
data_links = set()
for submodel in cp.g_config.model_config.sub_models:
for link in submodel.out_links:
if cp.g_layer_map[link.link_name].type == 'data':
data_links.add(link.link_name)
return data_links
def __get_used_evaluators__(layer_names): def __get_used_evaluators__(layer_names):
evaluator_names = set() evaluator_names = set()
for e in cp.g_config.model_config.evaluators: for e in cp.g_config.model_config.evaluators:
...@@ -264,6 +273,7 @@ def parse_network(output_layers, extra_layers=None): ...@@ -264,6 +273,7 @@ def parse_network(output_layers, extra_layers=None):
submodel_names = __get_used_submodels__(layer_names) submodel_names = __get_used_submodels__(layer_names)
submodel_names.add('root') submodel_names.add('root')
evaluator_names = __get_used_evaluators__(layer_names) evaluator_names = __get_used_evaluators__(layer_names)
data_out_links = __get_submodel_data_out_links__()
input_layer_names = set() input_layer_names = set()
output_layer_names = set() output_layer_names = set()
...@@ -279,7 +289,7 @@ def parse_network(output_layers, extra_layers=None): ...@@ -279,7 +289,7 @@ def parse_network(output_layers, extra_layers=None):
continue continue
model_config.layers.extend([l]) model_config.layers.extend([l])
if l.type == 'data': if l.type == 'data':
if l.name in model_config.output_layer_names: if l.name in data_out_links:
""" """
In text generation, the outlink to save the generated word In text generation, the outlink to save the generated word
indices is a data_layer defined in recurrent_group. This indices is a data_layer defined in recurrent_group. This
......
...@@ -16,6 +16,7 @@ Creator package contains some simple reader creator, which could be used in user ...@@ -16,6 +16,7 @@ Creator package contains some simple reader creator, which could be used in user
program. program.
""" """
__all__ = ['np_array', 'text_file', "recordio"] __all__ = ['np_array', 'text_file', "recordio"]
...@@ -75,4 +76,4 @@ def recordio(path): ...@@ -75,4 +76,4 @@ def recordio(path):
yield r yield r
f.close() f.close()
return reader return reader
\ No newline at end of file
...@@ -230,7 +230,7 @@ class XmapEndSignal(): ...@@ -230,7 +230,7 @@ class XmapEndSignal():
pass pass
def xmap_readers(mapper, reader, process_num, buffer_size): def xmap_readers(mapper, reader, process_num, buffer_size, order=False):
""" """
Use multiprocess to map samples from reader by a mapper defined by user. Use multiprocess to map samples from reader by a mapper defined by user.
And this function contains a buffered decorator. And this function contains a buffered decorator.
...@@ -242,12 +242,15 @@ def xmap_readers(mapper, reader, process_num, buffer_size): ...@@ -242,12 +242,15 @@ def xmap_readers(mapper, reader, process_num, buffer_size):
:type process_num: int :type process_num: int
:param buffer_size: max buffer size :param buffer_size: max buffer size
:type buffer_size: int :type buffer_size: int
:param order: keep the order of reader
:type order: bool
:return: the decarated reader :return: the decarated reader
:rtype: callable :rtype: callable
""" """
end = XmapEndSignal() end = XmapEndSignal()
in_queue = Queue(buffer_size) in_queue = Queue(buffer_size)
out_queue = Queue(buffer_size) out_queue = Queue(buffer_size)
out_order = [0]
# define a worker to read samples from reader to in_queue # define a worker to read samples from reader to in_queue
def read_worker(reader, in_queue): def read_worker(reader, in_queue):
...@@ -255,8 +258,17 @@ def xmap_readers(mapper, reader, process_num, buffer_size): ...@@ -255,8 +258,17 @@ def xmap_readers(mapper, reader, process_num, buffer_size):
in_queue.put(i) in_queue.put(i)
in_queue.put(end) in_queue.put(end)
# define a worker to read samples from reader to in_queue with order flag
def order_read_worker(reader, in_queue):
in_order = 0
for i in reader():
in_queue.put((in_order, i))
in_order += 1
in_queue.put(end)
# start a read worker in a thread # start a read worker in a thread
t = Thread(target=read_worker, args=(reader, in_queue)) target = order_read_worker if order else read_worker
t = Thread(target=target, args=(reader, in_queue))
t.daemon = True t.daemon = True
t.start() t.start()
...@@ -271,11 +283,28 @@ def xmap_readers(mapper, reader, process_num, buffer_size): ...@@ -271,11 +283,28 @@ def xmap_readers(mapper, reader, process_num, buffer_size):
in_queue.put(end) in_queue.put(end)
out_queue.put(end) out_queue.put(end)
# define a worker to handle samples from in_queue by mapper
# and put mapped samples into out_queue by order
def order_handle_worker(in_queue, out_queue, mapper, out_order):
ins = in_queue.get()
while not isinstance(ins, XmapEndSignal):
order, sample = ins
r = mapper(sample)
while order != out_order[0]:
pass
out_queue.put(r)
out_order[0] += 1
ins = in_queue.get()
in_queue.put(end)
out_queue.put(end)
# start several handle_workers # start several handle_workers
target = order_handle_worker if order else handle_worker
args = (in_queue, out_queue, mapper, out_order) if order else (
in_queue, out_queue, mapper)
workers = [] workers = []
for i in xrange(process_num): for i in xrange(process_num):
worker = Thread( worker = Thread(target=target, args=args)
target=handle_worker, args=(in_queue, out_queue, mapper))
worker.daemon = True worker.daemon = True
workers.append(worker) workers.append(worker)
for w in workers: for w in workers:
......
...@@ -13,9 +13,7 @@ ...@@ -13,9 +13,7 @@
# limitations under the License. # limitations under the License.
import os import os
import unittest import unittest
import numpy as np import numpy as np
import paddle.v2.reader.creator import paddle.v2.reader.creator
......
...@@ -121,5 +121,27 @@ class TestShuffle(unittest.TestCase): ...@@ -121,5 +121,27 @@ class TestShuffle(unittest.TestCase):
self.assertEqual(total, 10) self.assertEqual(total, 10)
class TestXmap(unittest.TestCase):
def test_xmap(self):
def mapper(x):
return (x + 1)
orders = (True, False)
thread_nums = (1, 2, 4, 8, 16)
buffered_size = (1, 2, 4, 8, 16)
for order in orders:
for tNum in thread_nums:
for size in buffered_size:
result = []
for i in paddle.v2.reader.xmap_readers(mapper,
reader_creator_10(0),
tNum, size, order)():
result.append(i)
if not order:
result.sort()
for idx, e in enumerate(result):
self.assertEqual(e, mapper(idx))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -13,6 +13,7 @@ packages=['paddle', ...@@ -13,6 +13,7 @@ packages=['paddle',
setup_requires=["requests", setup_requires=["requests",
"numpy", "numpy",
"protobuf==3.1", "protobuf==3.1",
"recordio",
"matplotlib", "matplotlib",
"rarfile"] "rarfile"]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册