train.py 7.5 KB
Newer Older
C
caoying03 已提交
1
#coding=utf-8
C
caoying03 已提交
2

C
caoying03 已提交
3 4 5 6 7 8 9 10
from __future__ import print_function

import os
import sys
import logging
import random
import glob
import gzip
C
caoying03 已提交
11
import numpy as np
C
caoying03 已提交
12 13 14 15 16 17 18 19 20 21 22

import reader
import paddle.v2 as paddle
from paddle.v2.layer import parse_network
from model import GNR
from config import ModelConfig, TrainerConfig

logger = logging.getLogger("paddle")
logger.setLevel(logging.INFO)


C
caoying03 已提交
23
def load_initial_model(model_path, parameters):
C
caoying03 已提交
24 25 26 27 28 29 30 31
    """ Initalize parameters in the network from a trained model.

    This is useful in resuming the training from previously saved models.

    Arguments:
        - model_path:    The path of a trained model.
        - parameters:    The parameters in a network which will be initialized
                         from the specified model.
C
caoying03 已提交
32 33 34 35 36
    """
    with gzip.open(model_path, "rb") as f:
        parameters.init_from_tar(f)


C
caoying03 已提交
37 38 39 40 41 42
def load_pretrained_parameters(path):
    """ Load one pre-trained parameter.

    Arguments:
        - path:    The path of the pre-trained parameter.
    """
C
caoying03 已提交
43
    return np.load(path)
C
caoying03 已提交
44 45


46
def save_model(trainer, save_path, parameters):
C
caoying03 已提交
47 48 49 50 51 52
    """ Save the trained parameters.

    Arguments:
        - save_path:    The path to save the trained parameters.
        - parameters:   The trained model parameters.
    """
C
caoying03 已提交
53
    with gzip.open(save_path, "w") as f:
54
        trainer.save_parameter_to_tar(f)
C
caoying03 已提交
55 56


C
caoying03 已提交
57
def show_parameter_init_info(parameters):
C
caoying03 已提交
58 59 60 61 62
    """ Print the information of initialization mean and std of parameters.

    Arguments:
        - parameters:   The parameters created in a model.
    """
C
caoying03 已提交
63 64 65 66 67 68
    for p in parameters:
        logger.info("%s : initial_mean %.4f initial_std %.4f" %
                    (p, parameters.__param_conf__[p].initial_mean,
                     parameters.__param_conf__[p].initial_std))


C
caoying03 已提交
69 70
def show_parameter_status(parameters):
    """ Print some statistical information of parameters in a network.
C
caoying03 已提交
71

C
caoying03 已提交
72
    This is used for debugging the model.
C
caoying03 已提交
73

C
caoying03 已提交
74 75 76
    Arguments:
        - parameters:   The parameters created in a model.
    """
C
caoying03 已提交
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
    for p in parameters:

        value = parameters.get(p)
        grad = parameters.get_grad(p)

        avg_abs_value = np.average(np.abs(value))
        avg_abs_grad = np.average(np.abs(grad))

        logger.info(
            ("%s avg_abs_value=%.6f avg_abs_grad=%.6f "
             "min_value=%.6f max_value=%.6f min_grad=%.6f max_grad=%.6f") %
            (p, avg_abs_value, avg_abs_grad, value.min(), value.max(),
             grad.min(), grad.max()))


C
caoying03 已提交
92
def choose_samples(path):
C
caoying03 已提交
93 94 95 96
    """Load filenames for train, dev, and augmented samples.

    Arguments:
        - path:   The path of training data.
C
caoying03 已提交
97 98 99 100 101 102 103 104 105 106 107 108 109 110
    """
    if not os.path.exists(os.path.join(path, "train")):
        print(
            "Non-existent directory as input path: {}".format(path),
            file=sys.stderr)
        sys.exit(1)

    # Get paths to all samples that we want to load.
    train_samples = glob.glob(os.path.join(path, "train", "*"))
    valid_samples = glob.glob(os.path.join(path, "dev", "*"))

    train_samples.sort()
    valid_samples.sort()

C
caoying03 已提交
111
    random.shuffle(train_samples)
C
caoying03 已提交
112 113 114 115

    return train_samples, valid_samples


C
caoying03 已提交
116
def build_reader(data_dir, batch_size):
C
caoying03 已提交
117 118 119 120 121
    """Build the data reader for this model.

    Arguments:
        - data_dir:   The path of training data.
        - batch_size:   batch size for the training task.
C
caoying03 已提交
122 123 124 125 126
    """
    train_samples, valid_samples = choose_samples(data_dir)

    train_reader = paddle.batch(
        paddle.reader.shuffle(
C
caoying03 已提交
127
            reader.data_reader(train_samples), buf_size=102400),
C
caoying03 已提交
128 129
        batch_size=batch_size)

C
caoying03 已提交
130 131
    # testing data is not shuffled
    test_reader = paddle.batch(
132 133
        reader.data_reader(
            valid_samples, is_train=False),
C
caoying03 已提交
134
        batch_size=batch_size)
C
caoying03 已提交
135
    return train_reader, test_reader, len(train_samples)
C
caoying03 已提交
136 137


C
caoying03 已提交
138 139 140 141 142 143 144
def build_event_handler(config, parameters, trainer):
    """Build the event handler for this model.

    Arguments:
        - config:        The training task configuration for this model.
        - parameters:    The parameters in the network.
        - trainer:       The trainer object.
C
caoying03 已提交
145 146 147 148 149
    """

    # End batch and end pass event handler
    def event_handler(event):
        """The event handler."""
150 151 152 153 154 155 156 157 158 159
        """
        To print the statistical information of gradients of any learnable
        parameter, the event: EndForwardBackward rather than EndIteration
        should be handled. For the reason that parameter gradients will be
        reset to zeros when EndIteration event happens in GPU training.
        """
        if config.show_parameter_status_period and \
                isinstance(event, paddle.event.EndForwardBackward):
            if not event.batch_id % config.show_parameter_status_period:
                show_parameter_status(parameters)
C
caoying03 已提交
160

C
caoying03 已提交
161
        if isinstance(event, paddle.event.EndIteration):
162
            if event.batch_id and not event.batch_id % config.checkpoint_period:
C
caoying03 已提交
163
                save_path = os.path.join(config.save_dir,
C
caoying03 已提交
164
                                         "checkpoint_param.latest.tar.gz")
165
                save_model(trainer, save_path, parameters)
C
caoying03 已提交
166

167
            if not event.batch_id % config.log_period:
C
caoying03 已提交
168 169 170
                logger.info("Pass %d, Batch %d, Cost %f" %
                            (event.pass_id, event.batch_id, event.cost))

C
caoying03 已提交
171
        if isinstance(event, paddle.event.EndPass):
C
caoying03 已提交
172 173
            save_path = os.path.join(config.save_dir,
                                     "pass_%05d.tar.gz" % event.pass_id)
174
            save_model(trainer, save_path, parameters)
C
caoying03 已提交
175 176 177 178 179

    return event_handler


def train(model_config, trainer_config):
C
caoying03 已提交
180 181 182 183 184 185 186
    """Training the GNR model.

    Arguments:
        - modle_config:     The model configuration for this model.
        - trainer_config:   The training task configuration for this model.
    """

C
caoying03 已提交
187 188 189
    if not os.path.exists(trainer_config.save_dir):
        os.mkdir(trainer_config.save_dir)

C
caoying03 已提交
190 191 192
    paddle.init(
        use_gpu=trainer_config.use_gpu,
        trainer_count=trainer_config.trainer_count)
C
caoying03 已提交
193

C
caoying03 已提交
194 195 196 197 198 199 200 201 202 203
    train_reader, test_reader, train_sample_count = build_reader(
        trainer_config.data_dir, trainer_config.train_batch_size)
    """
    Define the optimizer. The learning rate will decrease according to
    the following formula:

    lr = learning_rate * pow(learning_rate_decay_a,
                             floor(num_samples_processed /
                                   learning_rate_decay_b))
    """
C
caoying03 已提交
204 205
    optimizer = paddle.optimizer.Adam(
        learning_rate=trainer_config.learning_rate,
C
caoying03 已提交
206 207 208 209 210 211
        gradient_clipping_threshold=trainer_config.gradient_clipping_threshold,
        regularization=paddle.optimizer.L2Regularization(
            trainer_config.l2_decay_rate),
        learning_rate_decay_a=0.5,
        learning_rate_decay_b=train_sample_count,
        learning_rate_schedule="discexp")
C
caoying03 已提交
212 213

    # define network topology
C
caoying03 已提交
214 215 216
    loss = GNR(model_config)

    parameters = paddle.parameters.create(loss)
C
caoying03 已提交
217 218 219 220

    if trainer_config.init_model_path:
        load_initial_model(trainer_config.init_model_path, parameters)
    else:
C
caoying03 已提交
221
        show_parameter_init_info(parameters)
C
caoying03 已提交
222 223 224 225
        parameters.set(
            "GloveVectors",
            load_pretrained_parameters(ModelConfig.pretrained_emb_path))

226 227 228
    trainer = paddle.trainer.SGD(cost=loss,
                                 parameters=parameters,
                                 update_equation=optimizer)
C
caoying03 已提交
229 230

    event_handler = build_event_handler(trainer_config, parameters, trainer)
C
caoying03 已提交
231
    trainer.train(
C
caoying03 已提交
232
        reader=train_reader,
C
caoying03 已提交
233
        num_passes=trainer_config.epochs,
C
caoying03 已提交
234 235 236 237 238
        event_handler=event_handler)


if __name__ == "__main__":
    train(ModelConfig, TrainerConfig)