提交 1b6faffe 编写于 作者: C caoying03

Merge branch 'develop' into fix_recurrent_parse_bug

......@@ -25,7 +25,7 @@ COPY ./paddle/scripts/docker/root/ /root/
RUN apt-get update && \
apt-get install -y \
git python-pip python-dev openssh-server bison \
wget unzip tar xz-utils bzip2 gzip coreutils ntp \
wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
curl sed grep graphviz libjpeg-dev zlib1g-dev \
python-numpy python-matplotlib gcc g++ \
automake locales clang-format-3.8 swig doxygen cmake \
......
......@@ -11,6 +11,7 @@ import (
"github.com/namsral/flag"
log "github.com/sirupsen/logrus"
"github.com/topicai/candy"
"github.com/PaddlePaddle/Paddle/go/master"
"github.com/PaddlePaddle/Paddle/go/utils/networkhelper"
......@@ -20,11 +21,18 @@ func main() {
port := flag.Int("port", 8080, "port of the master server.")
ttlSec := flag.Int("ttl", 60, "etcd lease TTL in seconds.")
endpoints := flag.String("endpoints", "http://127.0.0.1:2379", "comma separated etcd endpoints. If empty, fault tolerance will not be enabled.")
taskTimeoutDur := flag.Duration("task_timout_dur", 20*time.Minute, "task timout duration.")
taskTimeoutMax := flag.Int("task_timeout_max", 3, "max timtout count for each task before it being declared failed task.")
chunkPerTask := flag.Int("chunk_per_task", 10, "chunk per task.")
taskTimeoutDur := flag.Duration("task-timout-dur", 20*time.Minute, "task timout duration.")
taskTimeoutMax := flag.Int("task-timeout-max", 3, "max timtout count for each task before it being declared failed task.")
chunkPerTask := flag.Int("chunk-per-task", 10, "chunk per task.")
logLevel := flag.String("log-level", "info",
"log level, possible values: debug, info, warning, error, fatal, panic")
flag.Parse()
level, e := log.ParseLevel(*logLevel)
candy.Must(e)
log.SetLevel(level)
if *endpoints == "" {
log.Warningln("-endpoints not set, fault tolerance not be enabled.")
}
......
......@@ -40,7 +40,7 @@ func main() {
idx = *index
} else {
e = pserver.NewEtcdClient(*etcdEndpoint, *numPservers, *etcdTimeout)
idx, err = e.Register()
idx, err = e.Register(*port)
candy.Must(err)
cp, err = pserver.NewCheckpointFromFile(*checkpointPath, idx, e)
......
......@@ -2,6 +2,7 @@ package master
import (
"os"
"time"
"github.com/PaddlePaddle/Paddle/go/connection"
"github.com/PaddlePaddle/recordio"
......@@ -36,9 +37,9 @@ func (c *Client) getRecords() {
for {
t, err := c.getTask()
if err != nil {
// TODO(helin): wait before move on with next
// getTask call.
log.Errorln(err)
log.Errorf("Get task failed, sleep 3 seconds and continue, %s", err)
time.Sleep(3 * time.Second)
continue
}
......
......@@ -215,6 +215,7 @@ func readChunks(globPaths []string) ([]Chunk, error) {
}
count := index.NumChunks()
log.Infof("readChunks: file %s has %d chunks", path, count)
for i := 0; i < count; i++ {
chunk := Chunk{
Path: path,
......
import paddle.v2 as paddle
import paddle.v2.dataset.uci_housing as uci_housing
import paddle.v2.master as master
import os
import cPickle as pickle
etcd_ip = os.getenv("MASTER_IP", "127.0.0.1")
etcd_endpoint = "http://" + etcd_ip + ":2379"
def cloud_reader():
print "connecting to master, etcd endpoints: ", etcd_endpoint
master_client = master.client(etcd_endpoint, 5, 64)
master_client.set_dataset(
["/pfs/dlnel/public/dataset/uci_housing/uci_housing-*-of-*"])
while 1:
r, e = master_client.next_record()
if not r:
break
yield pickle.loads(r)
def main():
......@@ -22,13 +40,13 @@ def main():
# create optimizer of new remote updater to pserver
optimizer = paddle.optimizer.Momentum(momentum=0)
#TODO(zhihong) : replace optimizer with new OptimizerConfig
print "etcd endoint: ", etcd_endpoint
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer,
is_local=False,
pserver_spec="localhost:3000")
pserver_spec=etcd_endpoint,
use_etcd=True)
# event_handler to print training and testing info
def event_handler(event):
......@@ -47,11 +65,11 @@ def main():
print "Test %d, %.2f" % (event.pass_id, result.cost)
# training
# NOTE: use uci_housing.train() as reader for non-paddlecloud training
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
uci_housing.train(), buf_size=500),
batch_size=2),
cloud_reader, buf_size=500), batch_size=2),
feeding={'x': 0,
'y': 1},
event_handler=event_handler,
......
......@@ -12,6 +12,7 @@ import (
)
const (
// DefaultEtcdTimeout is the default etcd timeout
DefaultEtcdTimeout time.Duration = 5 * time.Second
)
......@@ -66,12 +67,12 @@ func (p *EtcdClient) List() []Server {
for {
for i := 0; i < psDesired; i++ {
ctx, cancel := context.WithTimeout(context.Background(), p.timeout)
cancel()
psKey := pserver.PsPath + strconv.Itoa(i)
log.Debugf("checking %s", psKey)
resp, err := p.client.Get(ctx, psKey)
cancel()
if err != nil {
log.Infof("Get psKey= %s error, %v", psKey, err)
log.Infof("Get psKey=%s error, %v", psKey, err)
time.Sleep(p.timeout)
continue
}
......
......@@ -49,7 +49,7 @@ func NewEtcdClient(endpoints string, numPservers int, timeout time.Duration) *Et
// Register registers the pserver on etcd
//
// Register returns the index of the current pserver.
func (e *EtcdClient) Register() (int, error) {
func (e *EtcdClient) Register(port int) (int, error) {
var err error
e.externalIP, err = networkhelper.GetExternalIP()
......@@ -116,7 +116,7 @@ func (e *EtcdClient) Register() (int, error) {
for {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
var err error
pserverIdx, err = e.registerPserverEtcd(ctx)
pserverIdx, err = e.registerPserverEtcd(ctx, port)
cancel()
if err != nil {
log.Warn(err)
......@@ -140,7 +140,7 @@ func (e *EtcdClient) initDesiredPservers(ctx context.Context, numPservers int) (
}
// registerPserverEtcd registers pserver node on etcd using transaction.
func (e *EtcdClient) registerPserverEtcd(ctx context.Context) (int, error) {
func (e *EtcdClient) registerPserverEtcd(ctx context.Context, port int) (int, error) {
var idx int
_, err := concurrency.NewSTM(e.etcdClient, func(c concurrency.STM) error {
registered := false
......@@ -156,8 +156,9 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context) (int, error) {
log.Fatal(err)
}
// find the first id and write info
c.Put(psKey, e.externalIP, clientv3.WithLease(resp.ID))
log.Debugf("set pserver node %s with value %s", psKey, e.externalIP)
pserverAddr := e.externalIP + ":" + strconv.Itoa(port)
c.Put(psKey, pserverAddr, clientv3.WithLease(resp.ID))
log.Debugf("set pserver node %s with value %s", psKey, pserverAddr)
ch, kaerr := e.etcdClient.KeepAlive(context.TODO(), resp.ID)
if kaerr != nil {
log.Errorf("keepalive etcd node error: %v", kaerr)
......
......@@ -843,7 +843,8 @@ public:
bool useSparseUpdater);
static ParameterUpdater* createNewRemoteUpdater(
OptimizationConfig* config,
const std::string pserverSpec) throw(UnsupportError);
const std::string pserverSpec,
const bool useEtcd) throw(UnsupportError);
~ParameterUpdater();
/**
......
......@@ -33,11 +33,12 @@ ParameterUpdater *ParameterUpdater::createLocalUpdater(
ParameterUpdater *ParameterUpdater::createNewRemoteUpdater(
OptimizationConfig *config,
const std::string pserverSpec) throw(UnsupportError) {
const std::string pserverSpec,
const bool useEtcd) throw(UnsupportError) {
#ifndef PADDLE_WITHOUT_GOLANG
auto updater = new ParameterUpdater();
updater->m->updater.reset(new paddle::NewRemoteParameterUpdater(
config->m->getConfig(), pserverSpec));
config->m->getConfig(), pserverSpec, useEtcd));
return updater;
#else
throw UnsupportError();
......
......@@ -2,21 +2,23 @@
cc_library(ddim SRCS ddim.cc)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(dim_test SRCS dim_test.cu DEPS ddim)
cc_test(tensor_test SRCS tensor_test.cc DEPS ddim)
cc_test(tensor_test SRCS tensor_test.cc DEPS ddim glog gflags)
cc_test(variable_test SRCS variable_test.cc)
cc_test(scope_test SRCS scope_test.cc)
cc_test(enforce_test SRCS enforce_test.cc)
cc_library(enforce SRCS enforce.cc DEPS glog gflags)
cc_test(enforce_test SRCS enforce_test.cc DEPS enforce)
proto_library(attr_type SRCS attr_type.proto)
proto_library(op_proto SRCS op_proto.proto DEPS attr_type)
cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf)
proto_library(op_desc SRCS op_desc.proto DEPS attr_type)
cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf)
cc_library(operator SRCS operator.cc DEPS op_desc device_context)
cc_library(operator SRCS operator.cc DEPS op_desc device_context enforce)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto op_desc)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto op_desc enforce)
cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry operator)
py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.proto)
# Generate an empty __init__.py to make framework_py_proto as a valid python module.
add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/enforce.h"
......@@ -10,6 +10,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <paddle/string/printf.h>
#include <exception>
#include <sstream>
......@@ -58,12 +59,17 @@ class EnforceNotMet : public std::exception {
/**
* @brief Enforce a condition, otherwise throw an EnforceNotMet
*/
#ifdef NDEBUG
#define PADDLE_ENFORCE(condition, ...) \
do { \
if (UNLIKELY(!(condition))) { \
PADDLE_THROW(__VA_ARGS__); \
} \
} while (0)
#else
#define PADDLE_ENFORCE(condition, ...) \
CHECK(condition) << ::paddle::string::Sprintf(__VA_ARGS__);
#endif
} // namespace framework
} // namespace paddle
......@@ -32,7 +32,7 @@ __global__ void KeRowConv(real* y, const real* x, const real* w,
for (int i = tidy; i < context; i += blky) {
sw[i][tidx] = gidx + tidx < width ? w[i*width + gidx + tidx] : 0.0;
}
__syncthreads();
for (int i = 0; i < numSeq; ++i) {
......@@ -144,12 +144,15 @@ __global__ void KeRowConvBwWeight(real* dw, const real* x, const real* dy,
int yoff = start + j;
// transpose
sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0;
sh_dy[tidx][tidy + context - 1] = (xoff < width && yoff < end) ? dy[yoff * width + xoff] : 0.0;
sh_x[tidx][tidy] = (xoff < width && yoff < end) ?
x[yoff * width + xoff] : 0.0;
sh_dy[tidx][tidy + context - 1] = (xoff < width && yoff < end) ?
dy[yoff * width + xoff] : 0.0;
__syncthreads();
if (tidy < (context - 1)) {
yoff = yoff - context + 1;
sh_dy[tidx][tidy] = (xoff < width && yoff >= start) ? dy[yoff * width + xoff] : 0.0;
sh_dy[tidx][tidy] = (xoff < width && yoff >= start) ?
dy[yoff * width + xoff] : 0.0;
}
__syncthreads();
......@@ -199,11 +202,13 @@ __global__ void KeRowConvBwWeight2(real* dw, const real* x, const real* dy,
int yoff = start + j;
// transpose
sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0;
sh_x[tidx][tidy] = (xoff < width && yoff < end) ?
x[yoff * width + xoff] : 0.0;
__syncthreads();
for (int t = 0; t < context; t++) {
sh_dy[tidx][tidy] = (xoff < width && (yoff - t) >= start && yoff - t < end) ? dy[(yoff - t) * width + xoff] : 0.0;
sh_dy[tidx][tidy] = (xoff < width && (yoff - t) >= start &&
yoff - t < end) ? dy[(yoff - t) * width + xoff] : 0.0;
__syncthreads();
real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx];
......@@ -239,7 +244,7 @@ __global__ void KeRowConvBwData(real* dx, const real* w, const real* dy,
for (int i = tidy; i < context; i += blky) {
sw[i][tidx] = gidx + tidx < width ? w[i*width + gidx + tidx] : 0.0;
}
__syncthreads();
for (int i = 0; i < numSeq; ++i) {
......@@ -312,7 +317,7 @@ void RowConvGrad<DEVICE_TYPE_GPU>(const GpuMatrix& outG,
dim3 dimBlock(32, 32);
dim3 dimGrid(DIVUP(width, dimBlock.x), 1);
real* dw = filterG.getData();
if (contextLength <= 32) {
if (contextLength <= 32) {
KeRowConvBwWeight<32, 32, 32>
<<<dimGrid, dimBlock, 0, STREAM_DEFAULT>>>
(dw, x, dy, starts, height, width, numSeq, contextLength);
......
......@@ -155,7 +155,8 @@ RUN apt-get update &&\
paddle version
${DOCKERFILE_CUDNN_DSO}
${DOCKERFILE_GPU_ENV}
ADD go/cmd/pserver/pserver /usr/bin/
ADD go/cmd/master/master /usr/bin/
# default command shows the paddle version and exit
CMD ["paddle", "version"]
EOF
......@@ -28,6 +28,17 @@ NewRemoteParameterUpdater::NewRemoteParameterUpdater(
newGradients_(nullptr),
pserverSpec_(pserverSpec) {}
NewRemoteParameterUpdater::NewRemoteParameterUpdater(
const OptimizationConfig &config,
const std::string pserverSpec,
const bool useEtcd)
: trainerConfig_(config),
parameterClient_(-1),
newParameters_(nullptr),
newGradients_(nullptr),
pserverSpec_(pserverSpec),
useEtcd_(useEtcd) {}
void NewRemoteParameterUpdater::init(
const std::vector<ParameterPtr> &parameters) {
ParameterUpdater::init(parameters);
......@@ -38,8 +49,13 @@ void NewRemoteParameterUpdater::init(
}
// create parameter server client.
parameterClient_ = paddle_new_pserver_client((char *)pserverSpec_.c_str(),
FLAGS_trainer_id == 0);
if (useEtcd_) {
parameterClient_ = paddle_new_etcd_pserver_client(
(char *)pserverSpec_.c_str(), FLAGS_trainer_id == 0);
} else {
parameterClient_ = paddle_new_pserver_client((char *)pserverSpec_.c_str(),
FLAGS_trainer_id == 0);
}
// init new parameter and gradient.
newParameters_ = initNewParameter(PARAMETER_VALUE);
......
......@@ -32,6 +32,9 @@ class NewRemoteParameterUpdater : public ParameterUpdater {
public:
NewRemoteParameterUpdater(const OptimizationConfig& config,
const std::string pserverSpec);
NewRemoteParameterUpdater(const OptimizationConfig& config,
const std::string pserverSpec,
const bool useEtcd);
~NewRemoteParameterUpdater() {
releaseNewParameter(newParameters_);
releaseNewParameter(newGradients_);
......@@ -111,6 +114,8 @@ protected:
paddle_parameter** newGradients_;
/// the specification of parameter server "host1:port,host1:port"
std::string pserverSpec_;
/// true if pserverSpec_ is etcd endpoint, else pserverSpec_ is pserver addr
bool useEtcd_;
};
} // namespace paddle
......@@ -3171,11 +3171,11 @@ def memory(name,
@wrap_bias_attr_default()
@wrap_act_default(
param_names=['gate_act', 'state_act'], act=SigmoidActivation())
@wrap_act_default(param_names=['gate_act'], act=SigmoidActivation())
@wrap_act_default(param_names=['state_act'], act=TanhActivation())
@wrap_act_default(act=TanhActivation())
@wrap_name_default('lstm_step')
@layer_support()
@layer_support(ERROR_CLIPPING, DROPOUT)
def lstm_step_layer(input,
state,
size=None,
......@@ -3631,16 +3631,18 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
if isinstance(layer_outs, LayerOutput):
layer_outs = [layer_outs]
for ot in layer_outs:
assert isinstance(ot, LayerOutput)
ot.reverse = reverse
RecurrentLayerGroupSetOutLink(ot.name)
for layer_out in layer_outs:
assert isinstance(
layer_out, LayerOutput
), "Type of step function's return value must be LayerOutput."
layer_out.reverse = reverse
RecurrentLayerGroupSetOutLink(layer_out.name)
RecurrentLayerGroupEnd(name=name)
for layer_out in layer_outs:
# Thee previous full_name is the name is the rnn group
# We need a full_name outside the rnn group
# The previous full_name is the name inside the recurrent group.
# We need a full_name outside the recurrent group.
layer_out.full_name = MakeLayerNameInSubmodel(layer_out.name)
if len(layer_outs) == 1:
......@@ -3663,7 +3665,20 @@ class BaseGeneratedInput(object):
class GeneratedInput(BaseGeneratedInput):
def after_real_step(self, input):
return maxid_layer(input=input, name='__beam_search_predict__')
if isinstance(input, LayerOutput):
input = [input]
elif isinstance(input, collections.Sequence):
input = list(input)
if len(input) > 1:
logger.info(
("More than one layers inside the recurrent_group "
"are returned as outputs of the entire recurrent_group "
"PLEASE garantee the first output is probability of "
"the predicted next word."))
return [maxid_layer(
input=input[0], name='__beam_search_predict__')] + (
input[1:] if len(input) > 1 else [])
def before_real_step(self):
predict_id = memory(
......@@ -3924,7 +3939,7 @@ def beam_search(step,
predict = gipt.after_real_step(step(*args))
eos_layer(input=predict, eos_id=eos_id, name=eos_name)
eos_layer(input=predict[0], eos_id=eos_id, name=eos_name)
return predict
return recurrent_group(
......
......@@ -15,7 +15,6 @@
"""
# from activations import *
import pdb
from activations import LinearActivation, ReluActivation, SoftmaxActivation, \
IdentityActivation, TanhActivation, SequenceSoftmaxActivation
from attrs import ExtraAttr
......@@ -616,18 +615,16 @@ def simple_lstm(input,
@wrap_name_default('lstm_unit')
def lstmemory_unit(input,
out_memory=None,
memory_boot=None,
name=None,
size=None,
param_attr=None,
act=None,
gate_act=None,
state_act=None,
mixed_bias_attr=None,
input_proj_bias_attr=None,
input_proj_layer_attr=None,
lstm_bias_attr=None,
mixed_layer_attr=None,
lstm_layer_attr=None,
get_output_layer_attr=None):
lstm_layer_attr=None):
"""
Define calculations that a LSTM unit performs during a single time step.
This function itself is not a recurrent layer, so it can not be
......@@ -664,8 +661,8 @@ def lstmemory_unit(input,
:param input: input layer name.
:type input: LayerOutput
:param memory_boot: the initialization state of the LSTM cell.
:type memory_boot: LayerOutput | None
:param out_memory: output of previous time step
:type out_memory: LayerOutput | None
:param name: lstmemory unit name.
:type name: basestring
:param size: lstmemory unit size.
......@@ -678,18 +675,17 @@ def lstmemory_unit(input,
:type gate_act: BaseActivation
:param state_act: lstm state activiation type.
:type state_act: BaseActivation
:param mixed_bias_attr: bias parameter attribute of mixed layer.
False means no bias, None means default bias.
:type mixed_bias_attr: ParameterAttribute|False
:param input_proj_bias_attr: bias attribute for input-to-hidden projection.
False means no bias, None means default bias.
:type input_proj_bias_attr: ParameterAttribute|False|None
:param input_proj_layer_attr: extra layer attribute for input to hidden
projection of the LSTM unit, such as dropout, error clipping.
:type input_proj_layer_attr: ExtraLayerAttribute
:param lstm_bias_attr: bias parameter attribute of lstm layer.
False means no bias, None means default bias.
False means no bias, None means default bias.
:type lstm_bias_attr: ParameterAttribute|False
:param mixed_layer_attr: mixed layer's extra attribute.
:type mixed_layer_attr: ExtraLayerAttribute
:param lstm_layer_attr: lstm layer's extra attribute.
:type lstm_layer_attr: ExtraLayerAttribute
:param get_output_layer_attr: get output layer's extra attribute.
:type get_output_layer_attr: ExtraLayerAttribute
:return: lstmemory unit name.
:rtype: LayerOutput
"""
......@@ -701,14 +697,13 @@ def lstmemory_unit(input,
else:
out_mem = out_memory
state_mem = memory(
name="%s_state" % name, size=size, boot_layer=memory_boot)
state_mem = memory(name="%s_state" % name, size=size)
with mixed_layer(
name="%s_input_recurrent" % name,
size=size * 4,
bias_attr=mixed_bias_attr,
layer_attr=mixed_layer_attr,
bias_attr=input_proj_bias_attr,
layer_attr=input_proj_layer_attr,
act=IdentityActivation()) as m:
m += identity_projection(input=input)
m += full_matrix_projection(input=out_mem, param_attr=param_attr)
......@@ -723,11 +718,7 @@ def lstmemory_unit(input,
gate_act=gate_act,
state_act=state_act,
layer_attr=lstm_layer_attr)
get_output_layer(
name='%s_state' % name,
input=lstm_out,
arg_name='state',
layer_attr=get_output_layer_attr)
get_output_layer(name='%s_state' % name, input=lstm_out, arg_name='state')
return lstm_out
......@@ -736,17 +727,16 @@ def lstmemory_unit(input,
def lstmemory_group(input,
size=None,
name=None,
memory_boot=None,
out_memory=None,
reverse=False,
param_attr=None,
act=None,
gate_act=None,
state_act=None,
mixed_bias_attr=None,
input_proj_bias_attr=None,
input_proj_layer_attr=None,
lstm_bias_attr=None,
mixed_layer_attr=None,
lstm_layer_attr=None,
get_output_layer_attr=None):
lstm_layer_attr=None):
"""
lstm_group is a recurrent_group version of Long Short Term Memory. It
does exactly the same calculation as the lstmemory layer (see lstmemory in
......@@ -780,8 +770,8 @@ def lstmemory_group(input,
:type size: int
:param name: name of the lstmemory group.
:type name: basestring
:param memory_boot: the initialization state of LSTM cell.
:type memory_boot: LayerOutput | None
:param out_memory: output of previous time step
:type out_memory: LayerOutput | None
:param reverse: is lstm reversed
:type reverse: bool
:param param_attr: Parameter config, None if use default.
......@@ -792,18 +782,17 @@ def lstmemory_group(input,
:type gate_act: BaseActivation
:param state_act: lstm state activiation type.
:type state_act: BaseActivation
:param mixed_bias_attr: bias parameter attribute of mixed layer.
False means no bias, None means default bias.
:type mixed_bias_attr: ParameterAttribute|False
:param lstm_bias_attr: bias parameter attribute of lstm layer.
False means no bias, None means default bias.
:type lstm_bias_attr: ParameterAttribute|False
:param mixed_layer_attr: mixed layer's extra attribute.
:type mixed_layer_attr: ExtraLayerAttribute
:param input_proj_bias_attr: bias attribute for input-to-hidden projection.
False means no bias, None means default bias.
:type input_proj_bias_attr: ParameterAttribute|False|None
:param input_proj_layer_attr: extra layer attribute for input to hidden
projection of the LSTM unit, such as dropout, error clipping.
:type input_proj_layer_attr: ExtraLayerAttribute
:param lstm_layer_attr: lstm layer's extra attribute.
:type lstm_layer_attr: ExtraLayerAttribute
:param get_output_layer_attr: get output layer's extra attribute.
:type get_output_layer_attr: ExtraLayerAttribute
:return: the lstmemory group.
:rtype: LayerOutput
"""
......@@ -811,18 +800,17 @@ def lstmemory_group(input,
def __lstm_step__(ipt):
return lstmemory_unit(
input=ipt,
memory_boot=memory_boot,
name=name,
size=size,
mixed_bias_attr=mixed_bias_attr,
mixed_layer_attr=mixed_layer_attr,
param_attr=param_attr,
lstm_bias_attr=lstm_bias_attr,
act=act,
gate_act=gate_act,
state_act=state_act,
out_memory=out_memory,
input_proj_bias_attr=input_proj_bias_attr,
input_proj_layer_attr=input_proj_layer_attr,
param_attr=param_attr,
lstm_layer_attr=lstm_layer_attr,
get_output_layer_attr=get_output_layer_attr)
lstm_bias_attr=lstm_bias_attr)
return recurrent_group(
name='%s_recurrent_group' % name,
......
......@@ -104,7 +104,7 @@ layers {
}
bias_parameter_name: "lstm_bias"
active_gate_type: "sigmoid"
active_state_type: "sigmoid"
active_state_type: "tanh"
}
layers {
name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
......@@ -183,7 +183,7 @@ layers {
}
bias_parameter_name: "lstm_bias"
active_gate_type: "sigmoid"
active_state_type: "sigmoid"
active_state_type: "tanh"
}
layers {
name: "__lstm_group_1___state@__lstm_group_1___recurrent_group"
......
......@@ -258,7 +258,7 @@ layers {
}
bias_parameter_name: "___lstm_group_0__@__lstm_group_0___recurrent_group.wbias"
active_gate_type: "sigmoid"
active_state_type: "sigmoid"
active_state_type: "tanh"
}
layers {
name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
......
......@@ -20,12 +20,13 @@ lstm1 = lstmemory_group(
input=m1,
param_attr=lstm_param,
lstm_bias_attr=lstm_bias,
mixed_bias_attr=False)
input_proj_bias_attr=False)
lstm2 = lstmemory_group(
input=m2,
param_attr=lstm_param,
lstm_bias_attr=lstm_bias,
mixed_bias_attr=False)
input_proj_bias_attr=False)
softmax_param = ParamAttr(name='softmax_param')
......
......@@ -22,6 +22,8 @@ import importlib
import paddle.v2.dataset
import cPickle
import glob
import cPickle as pickle
import random
__all__ = [
'DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader',
......@@ -170,8 +172,6 @@ def convert(output_path,
name_prefix,
max_lines_to_shuffle=1000):
import recordio
import cPickle as pickle
import random
"""
Convert data from reader to recordio format files.
......@@ -201,8 +201,10 @@ def convert(output_path,
def write_data(w, lines):
random.shuffle(lines)
for i, d in enumerate(lines):
d = pickle.dumps(d, pickle.HIGHEST_PROTOCOL)
w[i % num_shards].write(d)
# FIXME(Yancey1989):
# dumps with protocol: pickle.HIGHEST_PROTOCOL
o = pickle.dumps(d)
w[i % num_shards].write(o)
w = open_writers()
lines = []
......
......@@ -212,19 +212,19 @@ def gen_pair(querylist, partial_order="full"):
for j in range(i + 1, len(querylist)):
query_right = querylist[j]
if query_left.relevance_score > query_right.relevance_score:
labels.append(1)
labels.append([1])
docpairs.append([
np.array(query_left.feature_vector),
np.array(query_right.feature_vector)
])
elif query_left.relevance_score < query_right.relevance_score:
labels.append(1)
labels.append([1])
docpairs.append([
np.array(query_right.feature_vector),
np.array(query_left.feature_vector)
])
for label, pair in zip(labels, docpairs):
yield label, pair[0], pair[1]
yield np.array(label), pair[0], pair[1]
def gen_list(querylist):
......
......@@ -10,8 +10,9 @@ class client(object):
client is a client to the master server.
"""
def __init__(self, addr, buf_size):
self.c = lib.paddle_new_master_client(addr, buf_size)
def __init__(self, etcd_endpoints, timeout, buf_size):
self.c = lib.paddle_new_etcd_master_client(etcd_endpoints, timeout,
buf_size)
def close(self):
lib.paddle_release_master_client(self.c)
......
import py_paddle.swig_paddle as swig_api
import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils
import paddle.trainer_config_helpers.optimizers as v1_optimizers
"""
......@@ -16,7 +17,6 @@ __all__ = [
class Optimizer(object):
def __init__(self, **kwargs):
import py_paddle.swig_paddle as swig_api
if 'batch_size' in kwargs:
del kwargs['batch_size'] # not important for python library.
......@@ -46,12 +46,12 @@ class Optimizer(object):
return swig_api.ParameterUpdater.createRemoteUpdater(
self.__opt_conf__, pass_num, use_sparse_updater)
def __create_new_remote_updater__(self, pserver_spec):
def __create_new_remote_updater__(self, pserver_spec, use_etcd):
return swig_api.ParameterUpdater.createNewRemoteUpdater(
self.__opt_conf__, pserver_spec)
self.__opt_conf__, pserver_spec, use_etcd)
def create_updater(self, is_local, num_passes, use_sparse_updater,
pserver_spec):
pserver_spec, use_etcd):
"""
create proper parameter_updater by configuration.
:param is_local: create local or remote parameter updater
......@@ -77,7 +77,7 @@ class Optimizer(object):
num_passes, use_sparse_updater)
else:
parameter_updater = self.__create_new_remote_updater__(
pserver_spec)
pserver_spec, use_etcd)
return parameter_updater
......
......@@ -45,7 +45,8 @@ class SGD(object):
update_equation,
extra_layers=None,
is_local=True,
pserver_spec=None):
pserver_spec=None,
use_etcd=True):
if not isinstance(parameters, v2_parameters.Parameters):
raise TypeError('parameters should be parameters')
......@@ -61,6 +62,7 @@ class SGD(object):
self.__topology_in_proto__ = topology.proto()
self.__is_local__ = is_local
self.__pserver_spec__ = pserver_spec
self.__use_etcd__ = use_etcd
self.__use_sparse_updater__ = self.__topology__.use_sparse_updater()
# # In local mode, disable sparse_remote_update.
......@@ -127,7 +129,7 @@ class SGD(object):
self.__parameter_updater__ = self.__optimizer__.create_updater(
self.__is_local__, num_passes, self.__use_sparse_updater__,
self.__pserver_spec__)
self.__pserver_spec__, self.__use_etcd__)
self.__parameter_updater__.init(self.__gradient_machine__)
self.__gradient_machine__.start()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册