From 29bbd41ab44b0a721b614026f0a6e1f1a21c9bf0 Mon Sep 17 00:00:00 2001
From: ranqiu <ranqiu@baidu.com>
Date: Tue, 14 Nov 2017 14:43:12 +0800
Subject: [PATCH] delete redundant dir

---
 conv_seq_to_seq/README.md     |  50 ----
 conv_seq_to_seq/beamsearch.py | 163 -------------
 conv_seq_to_seq/infer.py      | 199 ----------------
 conv_seq_to_seq/model.py      | 417 ----------------------------------
 conv_seq_to_seq/reader.py     |  67 ------
 conv_seq_to_seq/train.py      | 252 --------------------
 6 files changed, 1148 deletions(-)
 delete mode 100644 conv_seq_to_seq/README.md
 delete mode 100644 conv_seq_to_seq/beamsearch.py
 delete mode 100644 conv_seq_to_seq/infer.py
 delete mode 100644 conv_seq_to_seq/model.py
 delete mode 100644 conv_seq_to_seq/reader.py
 delete mode 100644 conv_seq_to_seq/train.py
diff --git a/conv_seq_to_seq/README.md b/conv_seq_to_seq/README.md
deleted file mode 100644
index 817c464a..00000000
--- a/conv_seq_to_seq/README.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# Convolutional Sequence to Sequence Learning
-This model implements the work in the following paper:
-
-Jonas Gehring, Micheal Auli, David Grangier, et al. Convolutional Sequence to Sequence Learning. Association for Computational Linguistics (ACL), 2017
-
-# Training a Model
-- Modify the following script if needed and then run:
-
-	```bash
-	python train.py \
-	  --train_data_path ./data/train_data \
-	  --test_data_path ./data/test_data \
-	  --src_dict_path ./data/src_dict \
-	  --trg_dict_path ./data/trg_dict \
-	  --enc_blocks "[(256, 3)] * 5" \
-	  --dec_blocks "[(256, 3)] * 3" \
-	  --emb_size 256 \
-	  --pos_size 200 \
-	  --drop_rate 0.1 \
-	  --use_gpu False \
-	  --trainer_count 1 \
-	  --batch_size 32 \
-	  --num_passes 20 \
-	  >train.log 2>&1
-	```
-
-# Inferring by a Trained Model
-- Infer by a trained model by running:
-
-	```bash
-	python infer.py \
-	  --infer_data_path ./data/infer_data \
-	  --src_dict_path ./data/src_dict \
-	  --trg_dict_path ./data/trg_dict \
-	  --enc_blocks "[(256, 3)] * 5" \
-	  --dec_blocks "[(256, 3)] * 3" \
-	  --emb_size 256 \
-	  --pos_size 200 \
-	  --drop_rate 0.1 \
-	  --use_gpu False \
-	  --trainer_count 1 \
-	  --max_len 100 \
-	  --beam_size 1 \
-	  --model_path ./params.pass-0.tar.gz \
-	  1>infer_result 2>infer.log
-	```
-
-# Notes
-
-Currently, beam search will forward the encoder multiple times when predicting each target word, which requires extra computations. And we will fix it later.
diff --git a/conv_seq_to_seq/beamsearch.py b/conv_seq_to_seq/beamsearch.py
deleted file mode 100644
index 45656e80..00000000
--- a/conv_seq_to_seq/beamsearch.py
+++ /dev/null
@@ -1,163 +0,0 @@
-#coding=utf-8
-
-import sys
-import time
-import numpy as np
-
-
-class BeamSearch(object):
-    """
-    Generate sequence by beam search
-    NOTE: this class only implements generating one sentence at a time.
-    """
-
-    def __init__(self,
-                 inferer,
-                 trg_dict,
-                 pos_size,
-                 padding_num,
-                 beam_size=1,
-                 max_len=100):
-        self.inferer = inferer
-        self.trg_dict = trg_dict
-        self.word_padding = trg_dict.__len__()
-        self.pos_size = pos_size
-        self.pos_padding = pos_size
-        self.padding_num = padding_num
-        self.win_len = padding_num + 1
-        self.max_len = max_len
-        self.beam_size = beam_size
-
-    def get_beam_input(self, pre_beam_list, infer_data):
-        """
-        Get input for generation at the current iteration.
-        """
-        beam_input = []
-
-        if len(pre_beam_list) == 0:
-            cur_trg = [self.word_padding
-                       ] * self.padding_num + [self.trg_dict['<s>']]
-            cur_trg_pos = [self.pos_padding] * self.padding_num + [0]
-            beam_input.append(infer_data + [cur_trg] + [cur_trg_pos])
-        else:
-            for seq in pre_beam_list:
-                if len(seq) < self.win_len:
-                    cur_trg = [self.word_padding] * (
-                        self.win_len - len(seq) - 1
-                    ) + [self.trg_dict['<s>']] + seq
-                    cur_trg_pos = [self.pos_padding] * (
-                        self.win_len - len(seq) - 1) + [0] + range(1,
-                                                                   len(seq) + 1)
-                else:
-                    cur_trg = seq[-self.win_len:]
-                    cur_trg_pos = range(
-                        len(seq) + 1 - self.win_len, len(seq) + 1)
-
-                beam_input.append(infer_data + [cur_trg] + [cur_trg_pos])
-        return beam_input
-
-    def get_prob(self, beam_input):
-        """
-        Get the probabilities of all possible tokens.
-        """
-        row_list = [j * self.win_len for j in range(len(beam_input))]
-        prob = self.inferer.infer(beam_input, field='value')[row_list, :]
-        return prob
-
-    def get_candidate(self, pre_beam_list, pre_beam_score, prob):
-        """
-        Get top beam_size tokens and their scores for each beam.
-        """
-        if prob.ndim == 1:
-            candidate_id = prob.argsort()[-self.beam_size:][::-1]
-            candidate_log_prob = np.log(prob[candidate_id])
-        else:
-            candidate_id = prob.argsort()[:, -self.beam_size:][:, ::-1]
-            candidate_log_prob = np.zeros_like(candidate_id).astype('float32')
-            for j in range(len(pre_beam_list)):
-                candidate_log_prob[j, :] = np.log(prob[j, candidate_id[j, :]])
-
-        if pre_beam_score.size > 0:
-            candidate_score = candidate_log_prob + pre_beam_score.reshape(
-                (pre_beam_score.size, 1))
-        else:
-            candidate_score = candidate_log_prob
-
-        return candidate_id, candidate_score
-
-    def prune(self, candidate_id, candidate_score, pre_beam_list,
-              completed_seq_list, completed_seq_score, completed_seq_min_score):
-        """
-        Pruning process of the beam search. During the process, beam_size most possible sequences
-        are selected for the beam in the next iteration. Besides, their scores and the minimum score
-        of the completed sequences are updated.
-        """
-        candidate_id = candidate_id.flatten()
-        candidate_score = candidate_score.flatten()
-
-        topk_idx = candidate_score.argsort()[-self.beam_size:][::-1].tolist()
-        topk_seq_idx = [idx / self.beam_size for idx in topk_idx]
-
-        next_beam = []
-        beam_score = []
-        for j in range(len(topk_idx)):
-            if candidate_id[topk_idx[j]] == self.trg_dict['<e>']:
-                if len(
-                        completed_seq_list
-                ) < self.beam_size or completed_seq_min_score <= candidate_score[
-                        topk_idx[j]]:
-                    completed_seq_list.append(pre_beam_list[topk_seq_idx[j]])
-                    completed_seq_score.append(candidate_score[topk_idx[j]])
-
-                    if completed_seq_min_score is None or (
-                            completed_seq_min_score >=
-                            candidate_score[topk_idx[j]] and
-                            len(completed_seq_list) < self.beam_size):
-                        completed_seq_min_score = candidate_score[topk_idx[j]]
-            else:
-                seq = pre_beam_list[topk_seq_idx[
-                    j]] + [candidate_id[topk_idx[j]]]
-                score = candidate_score[topk_idx[j]]
-                next_beam.append(seq)
-                beam_score.append(score)
-
-        beam_score = np.array(beam_score)
-        return next_beam, beam_score, completed_seq_min_score
-
-    def search_one_sample(self, infer_data):
-        """
-        Beam search process for one sample.
-        """
-        completed_seq_list = []
-        completed_seq_score = []
-        completed_seq_min_score = None
-        uncompleted_seq_list = [[]]
-        uncompleted_seq_score = np.zeros(0)
-
-        for i in xrange(self.max_len):
-            beam_input = self.get_beam_input(uncompleted_seq_list, infer_data)
-
-            prob = self.get_prob(beam_input)
-
-            candidate_id, candidate_score = self.get_candidate(
-                uncompleted_seq_list, uncompleted_seq_score, prob)
-
-            uncompleted_seq_list, uncompleted_seq_score, completed_seq_min_score = self.prune(
-                candidate_id, candidate_score, uncompleted_seq_list,
-                completed_seq_list, completed_seq_score,
-                completed_seq_min_score)
-
-            if len(uncompleted_seq_list) == 0:
-                break
-            if len(completed_seq_list) >= self.beam_size:
-                seq_max_score = uncompleted_seq_score.max()
-                if seq_max_score < completed_seq_min_score:
-                    uncompleted_seq_list = []
-                    break
-
-        final_seq_list = completed_seq_list + uncompleted_seq_list
-        final_score = np.concatenate(
-            (np.array(completed_seq_score), uncompleted_seq_score))
-        max_id = final_score.argmax()
-        top_seq = final_seq_list[max_id]
-        return top_seq
diff --git a/conv_seq_to_seq/infer.py b/conv_seq_to_seq/infer.py
deleted file mode 100644
index eb46df55..00000000
--- a/conv_seq_to_seq/infer.py
+++ /dev/null
@@ -1,199 +0,0 @@
-#coding=utf-8
-
-import sys
-import argparse
-import distutils.util
-import gzip
-
-import paddle.v2 as paddle
-from model import conv_seq2seq
-from beamsearch import BeamSearch
-import reader
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(
-        description="PaddlePaddle Convolutional Seq2Seq")
-    parser.add_argument(
-        '--infer_data_path',
-        type=str,
-        required=True,
-        help="Path of the dataset for inference")
-    parser.add_argument(
-        '--src_dict_path',
-        type=str,
-        required=True,
-        help='Path of the source dictionary')
-    parser.add_argument(
-        '--trg_dict_path',
-        type=str,
-        required=True,
-        help='path of the target dictionary')
-    parser.add_argument(
-        '--enc_blocks', type=str, help='Convolution blocks of the encoder')
-    parser.add_argument(
-        '--dec_blocks', type=str, help='Convolution blocks of the decoder')
-    parser.add_argument(
-        '--emb_size',
-        type=int,
-        default=512,
-        help='Dimension of word embedding. (default: %(default)s)')
-    parser.add_argument(
-        '--pos_size',
-        type=int,
-        default=200,
-        help='Total number of the position indexes. (default: %(default)s)')
-    parser.add_argument(
-        '--drop_rate',
-        type=float,
-        default=0.,
-        help='Dropout rate. (default: %(default)s)')
-    parser.add_argument(
-        "--use_gpu",
-        default=False,
-        type=distutils.util.strtobool,
-        help="Use gpu or not. (default: %(default)s)")
-    parser.add_argument(
-        "--trainer_count",
-        default=1,
-        type=int,
-        help="Trainer number. (default: %(default)s)")
-    parser.add_argument(
-        '--max_len',
-        type=int,
-        default=100,
-        help="The maximum length of the sentence to be generated. (default: %(default)s)"
-    )
-    parser.add_argument(
-        "--beam_size",
-        default=1,
-        type=int,
-        help="The width of beam expasion. (default: %(default)s)")
-    parser.add_argument(
-        "--model_path",
-        type=str,
-        required=True,
-        help="The path of trained model. (default: %(default)s)")
-    return parser.parse_args()
-
-
-def to_sentence(seq, dictionary):
-    raw_sentence = [dictionary[id] for id in seq]
-    sentence = " ".join(raw_sentence)
-    return sentence
-
-
-def infer(infer_data_path,
-          src_dict_path,
-          trg_dict_path,
-          model_path,
-          enc_conv_blocks,
-          dec_conv_blocks,
-          emb_dim=512,
-          pos_size=200,
-          drop_rate=0.,
-          max_len=100,
-          beam_size=1):
-    """
-    Inference.
-
-    :param infer_data_path: The path of the data for inference.
-    :type infer_data_path: str
-    :param src_dict_path: The path of the source dictionary.
-    :type src_dict_path: str
-    :param trg_dict_path: The path of the target dictionary.
-    :type trg_dict_path: str
-    :param model_path: The path of a trained model.
-    :type model_path: str
-    :param enc_conv_blocks: The scale list of the encoder's convolution blocks. And each element of
-                            the list contains output dimension and context length of the corresponding
-                            convolution block.
-    :type enc_conv_blocks: list of tuple
-    :param dec_conv_blocks: The scale list of the decoder's convolution blocks. And each element of
-                            the list contains output dimension and context length of the corresponding
-                            convolution block.
-    :type dec_conv_blocks: list of tuple
-    :param emb_dim: The dimension of the embedding vector.
-    :type emb_dim: int
-    :param pos_size: The total number of the position indexes, which means
-                     the maximum value of the index is pos_size - 1.
-    :type pos_size: int
-    :param drop_rate: Dropout rate.
-    :type drop_rate: float
-    :param max_len: The maximum length of the sentence to be generated.
-    :type max_len: int
-    :param beam_size: The width of beam expansion.
-    :type beam_size: int
-    """
-    # load dict
-    src_dict = reader.load_dict(src_dict_path)
-    trg_dict = reader.load_dict(trg_dict_path)
-    src_dict_size = src_dict.__len__()
-    trg_dict_size = trg_dict.__len__()
-
-    prob = conv_seq2seq(
-        src_dict_size=src_dict_size,
-        trg_dict_size=trg_dict_size,
-        pos_size=pos_size,
-        emb_dim=emb_dim,
-        enc_conv_blocks=enc_conv_blocks,
-        dec_conv_blocks=dec_conv_blocks,
-        drop_rate=drop_rate,
-        is_infer=True)
-
-    # load parameters
-    parameters = paddle.parameters.Parameters.from_tar(gzip.open(model_path))
-
-    padding_list = [context_len - 1 for (size, context_len) in dec_conv_blocks]
-    padding_num = reduce(lambda x, y: x + y, padding_list)
-    infer_reader = reader.data_reader(
-        data_file=infer_data_path,
-        src_dict=src_dict,
-        trg_dict=trg_dict,
-        pos_size=pos_size,
-        padding_num=padding_num)
-
-    inferer = paddle.inference.Inference(
-        output_layer=prob, parameters=parameters)
-
-    searcher = BeamSearch(
-        inferer=inferer,
-        trg_dict=trg_dict,
-        pos_size=pos_size,
-        padding_num=padding_num,
-        max_len=max_len,
-        beam_size=beam_size)
-
-    reverse_trg_dict = reader.get_reverse_dict(trg_dict)
-    for i, raw_data in enumerate(infer_reader()):
-        infer_data = [raw_data[0], raw_data[1]]
-        result = searcher.search_one_sample(infer_data)
-        sentence = to_sentence(result, reverse_trg_dict)
-        print sentence
-        sys.stdout.flush()
-    return
-
-
-def main():
-    args = parse_args()
-    enc_conv_blocks = eval(args.enc_blocks)
-    dec_conv_blocks = eval(args.dec_blocks)
-
-    paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count)
-
-    infer(
-        infer_data_path=args.infer_data_path,
-        src_dict_path=args.src_dict_path,
-        trg_dict_path=args.trg_dict_path,
-        model_path=args.model_path,
-        enc_conv_blocks=enc_conv_blocks,
-        dec_conv_blocks=dec_conv_blocks,
-        emb_dim=args.emb_size,
-        pos_size=args.pos_size,
-        drop_rate=args.drop_rate,
-        max_len=args.max_len,
-        beam_size=args.beam_size)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/conv_seq_to_seq/model.py b/conv_seq_to_seq/model.py
deleted file mode 100644
index 01dd9428..00000000
--- a/conv_seq_to_seq/model.py
+++ /dev/null
@@ -1,417 +0,0 @@
-#coding=utf-8
-
-import math
-
-import paddle.v2 as paddle
-
-__all__ = ["conv_seq2seq"]
-
-
-def gated_conv_with_batchnorm(input,
-                              size,
-                              context_len,
-                              context_start=None,
-                              learning_rate=1.0,
-                              drop_rate=0.):
-    """
-    Definition of the convolution block.
-
-    :param input: The input of this block.
-    :type input: LayerOutput
-    :param size: The dimension of the block's output.
-    :type size: int
-    :param context_len: The context length of the convolution.
-    :type context_len: int
-    :param context_start: The start position of the context.
-    :type context_start: int
-    :param learning_rate: The learning rate factor of the parameters in the block.
-                          The actual learning rate is the product of the global
-                          learning rate and this factor.
-    :type learning_rate: float
-    :param drop_rate: Dropout rate.
-    :type drop_rate: float
-    :return: The output of the convolution block.
-    :rtype: LayerOutput
-    """
-    input = paddle.layer.dropout(input=input, dropout_rate=drop_rate)
-
-    context = paddle.layer.mixed(
-        size=input.size * context_len,
-        input=paddle.layer.context_projection(
-            input=input, context_len=context_len, context_start=context_start))
-
-    raw_conv = paddle.layer.fc(
-        input=context,
-        size=size * 2,
-        act=paddle.activation.Linear(),
-        param_attr=paddle.attr.Param(
-            initial_mean=0.,
-            initial_std=math.sqrt(4.0 * (1.0 - drop_rate) / context.size),
-            learning_rate=learning_rate),
-        bias_attr=False)
-
-    batch_norm_conv = paddle.layer.batch_norm(
-        input=raw_conv,
-        act=paddle.activation.Linear(),
-        param_attr=paddle.attr.Param(learning_rate=learning_rate))
-
-    with paddle.layer.mixed(size=size) as conv:
-        conv += paddle.layer.identity_projection(
-            batch_norm_conv, size=size, offset=0)
-
-    with paddle.layer.mixed(size=size, act=paddle.activation.Sigmoid()) as gate:
-        gate += paddle.layer.identity_projection(
-            batch_norm_conv, size=size, offset=size)
-
-    with paddle.layer.mixed(size=size) as gated_conv:
-        gated_conv += paddle.layer.dotmul_operator(conv, gate)
-
-    return gated_conv
-
-
-def encoder(token_emb,
-            pos_emb,
-            conv_blocks=[(256, 3)] * 5,
-            num_attention=3,
-            drop_rate=0.1):
-    """
-    Definition of the encoder.
-
-    :param token_emb: The embedding vector of the input token.
-    :type token_emb: LayerOutput
-    :param pos_emb: The embedding vector of the input token's position.
-    :type pos_emb: LayerOutput
-    :param conv_blocks: The scale list of the convolution blocks. Each element of
-                        the list contains output dimension and context length of
-                        the corresponding convolution block.
-    :type conv_blocks: list of tuple
-    :param num_attention: The total number of the attention modules used in the decoder.
-    :type num_attention: int
-    :param drop_rate: Dropout rate.
-    :type drop_rate: float
-    :return: The input token encoding.
-    :rtype: LayerOutput
-    """
-    embedding = paddle.layer.addto(
-        input=[token_emb, pos_emb],
-        layer_attr=paddle.attr.Extra(drop_rate=drop_rate))
-
-    proj_size = conv_blocks[0][0]
-    block_input = paddle.layer.fc(
-        input=embedding,
-        size=proj_size,
-        act=paddle.activation.Linear(),
-        param_attr=paddle.attr.Param(
-            initial_mean=0.,
-            initial_std=math.sqrt((1.0 - drop_rate) / embedding.size),
-            learning_rate=1.0 / (2.0 * num_attention)),
-        bias_attr=True, )
-
-    for (size, context_len) in conv_blocks:
-        if block_input.size == size:
-            residual = block_input
-        else:
-            residual = paddle.layer.fc(
-                input=block_input,
-                size=size,
-                act=paddle.activation.Linear(),
-                param_attr=paddle.attr.Param(learning_rate=1.0 /
-                                             (2.0 * num_attention)),
-                bias_attr=True)
-
-        gated_conv = gated_conv_with_batchnorm(
-            input=block_input,
-            size=size,
-            context_len=context_len,
-            learning_rate=1.0 / (2.0 * num_attention),
-            drop_rate=drop_rate)
-
-        with paddle.layer.mixed(size=size) as block_output:
-            block_output += paddle.layer.identity_projection(residual)
-            block_output += paddle.layer.identity_projection(gated_conv)
-
-        # halve the variance of the sum
-        block_output = paddle.layer.slope_intercept(
-            input=block_output, slope=math.sqrt(0.5))
-
-        block_input = block_output
-
-    emb_dim = embedding.size
-    encoded_vec = paddle.layer.fc(
-        input=block_output,
-        size=emb_dim,
-        act=paddle.activation.Linear(),
-        param_attr=paddle.attr.Param(learning_rate=1.0 / (2.0 * num_attention)),
-        bias_attr=True)
-
-    encoded_sum = paddle.layer.addto(input=[encoded_vec, embedding])
-
-    # halve the variance of the sum
-    encoded_sum = paddle.layer.slope_intercept(input=encoded_sum, slope=math.sqrt(0.5))
-
-    return encoded_vec, encoded_sum
-
-
-def attention(decoder_state, cur_embedding, encoded_vec, encoded_sum):
-    """
-    Definition of the attention.
-
-    :param decoder_state: The hidden state of the decoder.
-    :type decoder_state: LayerOutput
-    :param cur_embedding: The embedding vector of the current token.
-    :type cur_embedding: LayerOutput
-    :param encoded_vec: The source token encoding.
-    :type encoded_vec: LayerOutput
-    :param encoded_sum: The sum of the source token's encoding and embedding.
-    :type encoded_sum: LayerOutput
-    :return: A context vector.
-    :rtype: LayerOutput
-    """
-    residual = decoder_state
-
-    state_size = decoder_state.size
-    emb_dim = cur_embedding.size
-    with paddle.layer.mixed(size=emb_dim, bias_attr=True) as state_summary:
-        state_summary += paddle.layer.full_matrix_projection(decoder_state)
-        state_summary += paddle.layer.identity_projection(cur_embedding)
-
-    # halve the variance of the sum
-    state_summary = paddle.layer.slope_intercept(
-        input=state_summary, slope=math.sqrt(0.5))
-
-    expanded = paddle.layer.expand(input=state_summary, expand_as=encoded_vec)
-
-    m = paddle.layer.linear_comb(weights=expanded, vectors=encoded_vec)
-
-    attention_weight = paddle.layer.fc(
-        input=m,
-        size=1,
-        act=paddle.activation.SequenceSoftmax(),
-        bias_attr=False)
-
-    scaled = paddle.layer.scaling(weight=attention_weight, input=encoded_sum)
-
-    attended = paddle.layer.pooling(
-        input=scaled, pooling_type=paddle.pooling.Sum())
-
-    attended_proj = paddle.layer.fc(
-        input=attended,
-        size=state_size,
-        act=paddle.activation.Linear(),
-        bias_attr=True)
-
-    attention_result = paddle.layer.addto(input=[attended_proj, residual])
-
-    # halve the variance of the sum
-    attention_result = paddle.layer.slope_intercept(
-        input=attention_result, slope=math.sqrt(0.5))
-    return attention_result
-
-
-def decoder(token_emb,
-            pos_emb,
-            encoded_vec,
-            encoded_sum,
-            dict_size,
-            conv_blocks=[(256, 3)] * 3,
-            drop_rate=0.1):
-    """
-    Definition of the decoder.
-
-    :param token_emb: The embedding vector of the input token.
-    :type token_emb: LayerOutput
-    :param pos_emb: The embedding vector of the input token's position.
-    :type pos_emb: LayerOutput
-    :param encoded_vec: The source token encoding.
-    :type encoded_vec: LayerOutput
-    :param encoded_sum: The sum of the source token's encoding and embedding.
-    :type encoded_sum: LayerOutput
-    :param dict_size: The size of the target dictionary.
-    :type dict_size: int
-    :param conv_blocks: The scale list of the convolution blocks. Each element
-                        of the list contains output dimension and context length
-                        of the corresponding convolution block.
-    :type conv_blocks: list of tuple
-    :param drop_rate: Dropout rate.
-    :type drop_rate: float
-    :return: The probability of the predicted token.
-    :rtype: LayerOutput
-    """
-
-    def attention_step(decoder_state, cur_embedding, encoded_vec, encoded_sum):
-        conditional = attention(
-            decoder_state=decoder_state,
-            cur_embedding=cur_embedding,
-            encoded_vec=encoded_vec,
-            encoded_sum=encoded_sum)
-        return conditional
-
-    embedding = paddle.layer.addto(
-        input=[token_emb, pos_emb],
-        layer_attr=paddle.attr.Extra(drop_rate=drop_rate))
-
-    proj_size = conv_blocks[0][0]
-    block_input = paddle.layer.fc(
-        input=embedding,
-        size=proj_size,
-        act=paddle.activation.Linear(),
-        param_attr=paddle.attr.Param(
-            initial_mean=0.,
-            initial_std=math.sqrt((1.0 - drop_rate) / embedding.size)),
-        bias_attr=True, )
-
-    for (size, context_len) in conv_blocks:
-        if block_input.size == size:
-            residual = block_input
-        else:
-            residual = paddle.layer.fc(
-                input=block_input,
-                size=size,
-                act=paddle.activation.Linear(),
-                bias_attr=True)
-
-        decoder_state = gated_conv_with_batchnorm(
-            input=block_input,
-            size=size,
-            context_len=context_len,
-            context_start=0,
-            drop_rate=drop_rate)
-
-        group_inputs = [
-            decoder_state,
-            embedding,
-            paddle.layer.StaticInput(input=encoded_vec),
-            paddle.layer.StaticInput(input=encoded_sum),
-        ]
-
-        conditional = paddle.layer.recurrent_group(
-            step=attention_step, input=group_inputs)
-
-        block_output = paddle.layer.addto(input=[conditional, residual])
-
-        # halve the variance of the sum
-        block_output = paddle.layer.slope_intercept(
-            input=block_output, slope=math.sqrt(0.5))
-
-        block_input = block_output
-
-    out_emb_dim = embedding.size
-    block_output = paddle.layer.fc(
-        input=block_output,
-        size=out_emb_dim,
-        act=paddle.activation.Linear(),
-        layer_attr=paddle.attr.Extra(drop_rate=drop_rate))
-
-    decoder_out = paddle.layer.fc(
-        input=block_output,
-        size=dict_size,
-        act=paddle.activation.Softmax(),
-        param_attr=paddle.attr.Param(
-            initial_mean=0.,
-            initial_std=math.sqrt((1.0 - drop_rate) / block_output.size)),
-        bias_attr=True)
-
-    return decoder_out
-
-
-def conv_seq2seq(src_dict_size,
-                 trg_dict_size,
-                 pos_size,
-                 emb_dim,
-                 enc_conv_blocks=[(256, 3)] * 5,
-                 dec_conv_blocks=[(256, 3)] * 3,
-                 drop_rate=0.1,
-                 is_infer=False):
-    """
-    Definition of convolutional sequence-to-sequence network.
-
-    :param src_dict_size: The size of the source dictionary.
-    :type src_dict_size: int
-    :param trg_dict_size: The size of the target dictionary.
-    :type trg_dict_size: int
-    :param pos_size: The total number of the position indexes, which means
-                     the maximum value of the index is pos_size - 1.
-    :type pos_size: int
-    :param emb_dim: The dimension of the embedding vector.
-    :type emb_dim: int
-    :param enc_conv_blocks: The scale list of the encoder's convolution blocks. Each element
-                            of the list contains output dimension and context length of the
-                            corresponding convolution block.
-    :type enc_conv_blocks: list of tuple
-    :param dec_conv_blocks: The scale list of the decoder's convolution blocks. Each element
-                            of the list contains output dimension and context length of the
-                            corresponding convolution block.
-    :type dec_conv_blocks: list of tuple
-    :param drop_rate: Dropout rate.
-    :type drop_rate: float
-    :param is_infer: Whether infer or not.
-    :type is_infer: bool
-    :return: Cost or output layer.
-    :rtype: LayerOutput
-    """
-    src = paddle.layer.data(
-        name='src_word',
-        type=paddle.data_type.integer_value_sequence(src_dict_size))
-    src_pos = paddle.layer.data(
-        name='src_word_pos',
-        type=paddle.data_type.integer_value_sequence(pos_size +
-                                                     1))  # one for padding
-
-    src_emb = paddle.layer.embedding(
-        input=src,
-        size=emb_dim,
-        name='src_word_emb',
-        param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1))
-    src_pos_emb = paddle.layer.embedding(
-        input=src_pos,
-        size=emb_dim,
-        name='src_pos_emb',
-        param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1))
-
-    num_attention = len(dec_conv_blocks)
-    encoded_vec, encoded_sum = encoder(
-        token_emb=src_emb,
-        pos_emb=src_pos_emb,
-        conv_blocks=enc_conv_blocks,
-        num_attention=num_attention,
-        drop_rate=drop_rate)
-
-    trg = paddle.layer.data(
-        name='trg_word',
-        type=paddle.data_type.integer_value_sequence(trg_dict_size +
-                                                     1))  # one for padding
-    trg_pos = paddle.layer.data(
-        name='trg_word_pos',
-        type=paddle.data_type.integer_value_sequence(pos_size +
-                                                     1))  # one for padding
-
-    trg_emb = paddle.layer.embedding(
-        input=trg,
-        size=emb_dim,
-        name='trg_word_emb',
-        param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1))
-    trg_pos_emb = paddle.layer.embedding(
-        input=trg_pos,
-        size=emb_dim,
-        name='trg_pos_emb',
-        param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1))
-
-    decoder_out = decoder(
-        token_emb=trg_emb,
-        pos_emb=trg_pos_emb,
-        encoded_vec=encoded_vec,
-        encoded_sum=encoded_sum,
-        dict_size=trg_dict_size,
-        conv_blocks=dec_conv_blocks,
-        drop_rate=drop_rate)
-
-    if is_infer:
-        return decoder_out
-
-    trg_next_word = paddle.layer.data(
-        name='trg_next_word',
-        type=paddle.data_type.integer_value_sequence(trg_dict_size))
-    cost = paddle.layer.classification_cost(
-        input=decoder_out, label=trg_next_word)
-
-    return cost
diff --git a/conv_seq_to_seq/reader.py b/conv_seq_to_seq/reader.py
deleted file mode 100644
index 6d4db49f..00000000
--- a/conv_seq_to_seq/reader.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#coding=utf-8
-
-import random
-
-
-def load_dict(dict_file):
-    word_dict = dict()
-    with open(dict_file, 'r') as f:
-        for i, line in enumerate(f):
-            w = line.strip().split()[0]
-            word_dict[w] = i
-    return word_dict
-
-
-def get_reverse_dict(dictionary):
-    reverse_dict = {dictionary[k]: k for k in dictionary.keys()}
-    return reverse_dict
-
-
-def load_data(data_file, src_dict, trg_dict):
-    UNK_IDX = src_dict['<unk>']
-    with open(data_file, 'r') as f:
-        for line in f:
-            line_split = line.strip().split('\t')
-            if len(line_split) < 2:
-                continue
-            src, trg = line_split
-            src_words = src.strip().split()
-            trg_words = trg.strip().split()
-            src_seq = [src_dict.get(w, UNK_IDX) for w in src_words]
-            trg_seq = [trg_dict.get(w, UNK_IDX) for w in trg_words]
-            yield src_seq, trg_seq
-
-
-def data_reader(data_file, src_dict, trg_dict, pos_size, padding_num):
-    def reader():
-        UNK_IDX = src_dict['<unk>']
-        word_padding = trg_dict.__len__()
-        pos_padding = pos_size
-
-        def _get_pos(pos_list, pos_size, pos_padding):
-            return [pos if pos < pos_size else pos_padding for pos in pos_list]
-
-        with open(data_file, 'r') as f:
-            for line in f:
-                line_split = line.strip().split('\t')
-                if len(line_split) != 2:
-                    continue
-                src, trg = line_split
-                src = src.strip().split()
-                src_word = [src_dict.get(w, UNK_IDX) for w in src]
-                src_word_pos = range(len(src_word))
-                src_word_pos = _get_pos(src_word_pos, pos_size, pos_padding)
-
-                trg = trg.strip().split()
-                trg_word = [trg_dict['<s>']
-                            ] + [trg_dict.get(w, UNK_IDX) for w in trg]
-                trg_word_pos = range(len(trg_word))
-                trg_word_pos = _get_pos(trg_word_pos, pos_size, pos_padding)
-
-                trg_next_word = trg_word[1:] + [trg_dict['<e>']]
-                trg_word = [word_padding] * padding_num + trg_word
-                trg_word_pos = [pos_padding] * padding_num + trg_word_pos
-                trg_next_word = trg_next_word + [trg_dict['<e>']] * padding_num
-                yield src_word, src_word_pos, trg_word, trg_word_pos, trg_next_word
-
-    return reader
diff --git a/conv_seq_to_seq/train.py b/conv_seq_to_seq/train.py
deleted file mode 100644
index c6ce0dff..00000000
--- a/conv_seq_to_seq/train.py
+++ /dev/null
@@ -1,252 +0,0 @@
-#coding=utf-8
-
-import os
-import sys
-import time
-import argparse
-import distutils.util
-import gzip
-import numpy as np
-
-import paddle.v2 as paddle
-from model import conv_seq2seq
-import reader
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(
-        description="PaddlePaddle Convolutional Seq2Seq")
-    parser.add_argument(
-        '--train_data_path',
-        type=str,
-        required=True,
-        help="Path of the training set")
-    parser.add_argument(
-        '--test_data_path', type=str, help='Path of the test set')
-    parser.add_argument(
-        '--src_dict_path',
-        type=str,
-        required=True,
-        help='Path of source dictionary')
-    parser.add_argument(
-        '--trg_dict_path',
-        type=str,
-        required=True,
-        help='Path of target dictionary')
-    parser.add_argument(
-        '--enc_blocks', type=str, help='Convolution blocks of the encoder')
-    parser.add_argument(
-        '--dec_blocks', type=str, help='Convolution blocks of the decoder')
-    parser.add_argument(
-        '--emb_size',
-        type=int,
-        default=512,
-        help='Dimension of word embedding. (default: %(default)s)')
-    parser.add_argument(
-        '--pos_size',
-        type=int,
-        default=200,
-        help='Total number of the position indexes. (default: %(default)s)')
-    parser.add_argument(
-        '--drop_rate',
-        type=float,
-        default=0.,
-        help='Dropout rate. (default: %(default)s)')
-    parser.add_argument(
-        "--use_gpu",
-        default=False,
-        type=distutils.util.strtobool,
-        help="Use gpu or not. (default: %(default)s)")
-    parser.add_argument(
-        "--trainer_count",
-        default=1,
-        type=int,
-        help="Trainer number. (default: %(default)s)")
-    parser.add_argument(
-        '--batch_size',
-        type=int,
-        default=32,
-        help="Size of a mini-batch. (default: %(default)s)")
-    parser.add_argument(
-        '--num_passes',
-        type=int,
-        default=15,
-        help="Number of passes to train. (default: %(default)s)")
-    return parser.parse_args()
-
-
-def create_reader(padding_num,
-                  train_data_path,
-                  test_data_path=None,
-                  src_dict=None,
-                  trg_dict=None,
-                  pos_size=200,
-                  batch_size=32):
-
-    train_reader = paddle.batch(
-        reader=paddle.reader.shuffle(
-            reader=reader.data_reader(
-                data_file=train_data_path,
-                src_dict=src_dict,
-                trg_dict=trg_dict,
-                pos_size=pos_size,
-                padding_num=padding_num),
-            buf_size=10240),
-        batch_size=batch_size)
-
-    test_reader = None
-    if test_data_path:
-        test_reader = paddle.batch(
-            reader=paddle.reader.shuffle(
-                reader=reader.data_reader(
-                    data_file=test_data_path,
-                    src_dict=src_dict,
-                    trg_dict=trg_dict,
-                    pos_size=pos_size,
-                    padding_num=padding_num),
-                buf_size=10240),
-            batch_size=batch_size)
-
-    return train_reader, test_reader
-
-
-def train(train_data_path,
-          test_data_path,
-          src_dict_path,
-          trg_dict_path,
-          enc_conv_blocks,
-          dec_conv_blocks,
-          emb_dim=512,
-          pos_size=200,
-          drop_rate=0.,
-          batch_size=32,
-          num_passes=15):
-    """
-    Train the convolution sequence-to-sequence model.    
-
-    :param train_data_path: The path of the training set.
-    :type train_data_path: str
-    :param test_data_path: The path of the test set.
-    :type test_data_path: str
-    :param src_dict_path: The path of the source dictionary.
-    :type src_dict_path: str
-    :param trg_dict_path: The path of the target dictionary.
-    :type trg_dict_path: str
-    :param enc_conv_blocks: The scale list of the encoder's convolution blocks. And each element of
-                            the list contains output dimension and context length of the corresponding
-                            convolution block.
-    :type enc_conv_blocks: list of tuple
-    :param dec_conv_blocks: The scale list of the decoder's convolution blocks. And each element of
-                            the list contains output dimension and context length of the corresponding
-                            convolution block.
-    :type dec_conv_blocks: list of tuple
-    :param emb_dim: The dimension of the embedding vector.
-    :type emb_dim: int
-    :param pos_size: The total number of the position indexes, which means
-                     the maximum value of the index is pos_size - 1.
-    :type pos_size: int
-    :param drop_rate: Dropout rate.
-    :type drop_rate: float
-    :param batch_size: The size of a mini-batch.
-    :type batch_size: int
-    :param num_passes: The total number of the passes to train.
-    :type num_passes: int
-    """
-    # load dict
-    src_dict = reader.load_dict(src_dict_path)
-    trg_dict = reader.load_dict(trg_dict_path)
-    src_dict_size = src_dict.__len__()
-    trg_dict_size = trg_dict.__len__()
-
-    optimizer = paddle.optimizer.Adam(
-        learning_rate=1e-3, )
-
-    cost = conv_seq2seq(
-        src_dict_size=src_dict_size,
-        trg_dict_size=trg_dict_size,
-        pos_size=pos_size,
-        emb_dim=emb_dim,
-        enc_conv_blocks=enc_conv_blocks,
-        dec_conv_blocks=dec_conv_blocks,
-        drop_rate=drop_rate,
-        is_infer=False)
-
-    # create parameters and trainer
-    parameters = paddle.parameters.create(cost)
-    trainer = paddle.trainer.SGD(
-        cost=cost, parameters=parameters, update_equation=optimizer)
-
-    padding_list = [context_len - 1 for (size, context_len) in dec_conv_blocks]
-    padding_num = reduce(lambda x, y: x + y, padding_list)
-    train_reader, test_reader = create_reader(
-        padding_num=padding_num,
-        train_data_path=train_data_path,
-        test_data_path=test_data_path,
-        src_dict=src_dict,
-        trg_dict=trg_dict,
-        pos_size=pos_size,
-        batch_size=batch_size)
-
-    feeding = {
-        'src_word': 0,
-        'src_word_pos': 1,
-        'trg_word': 2,
-        'trg_word_pos': 3,
-        'trg_next_word': 4
-    }
-
-    # create event handler
-    def event_handler(event):
-        if isinstance(event, paddle.event.EndIteration):
-            if event.batch_id % 20 == 0:
-                cur_time = time.strftime('%Y.%m.%d %H:%M:%S', time.localtime())
-                print "[%s]: Pass: %d, Batch: %d, TrainCost: %f, %s" % (
-                    cur_time, event.pass_id, event.batch_id, event.cost,
-                    event.metrics)
-            else:
-                sys.stdout.flush()
-
-        if isinstance(event, paddle.event.EndPass):
-            if test_reader is not None:
-                cur_time = time.strftime('%Y.%m.%d %H:%M:%S', time.localtime())
-                result = trainer.test(reader=test_reader, feeding=feeding)
-                print "[%s]: Pass: %d, TestCost: %f, %s" % (
-                    cur_time, event.pass_id, result.cost, result.metrics)
-                sys.stdout.flush()
-            with gzip.open("output/params.pass-%d.tar.gz" % event.pass_id,
-                           'w') as f:
-                trainer.save_parameter_to_tar(f)
-
-    if not os.path.exists('output'):
-        os.mkdir('output')
-
-    trainer.train(
-        reader=train_reader,
-        event_handler=event_handler,
-        num_passes=num_passes,
-        feeding=feeding)
-
-
-def main():
-    args = parse_args()
-    enc_conv_blocks = eval(args.enc_blocks)
-    dec_conv_blocks = eval(args.dec_blocks)
-
-    paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count)
-
-    train(
-        train_data_path=args.train_data_path,
-        test_data_path=args.test_data_path,
-        src_dict_path=args.src_dict_path,
-        trg_dict_path=args.trg_dict_path,
-        enc_conv_blocks=enc_conv_blocks,
-        dec_conv_blocks=dec_conv_blocks,
-        emb_dim=args.emb_size,
-        pos_size=args.pos_size,
-        drop_rate=args.drop_rate,
-        batch_size=args.batch_size,
-        num_passes=args.num_passes)
-
-
-if __name__ == '__main__':
-    main()
-- 
GitLab