diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8ff36e098ba9ea25faec99ef2bf5ced768483975..ede1c53a4aefc9cc144ea20f57193bc5dbb886ec 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,11 +33,3 @@ entry: bash .clang_format.hook -i language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$ -- repo: local - hooks: - - id: convert-markdown-into-html - name: convert-markdown-into-html - description: Convert README.md into index.html - entry: python .pre-commit-hooks/convert_markdown_into_html.py - language: system - files: .+README\.md$ diff --git a/.pre-commit-hooks/convert_markdown_into_html.py b/.pre-commit-hooks/convert_markdown_into_html.py deleted file mode 100644 index 66f44ef23c5d9a82436dfbe4b6bcdfc4e69ab55a..0000000000000000000000000000000000000000 --- a/.pre-commit-hooks/convert_markdown_into_html.py +++ /dev/null @@ -1,95 +0,0 @@ -import argparse -import re -import sys - -HEAD = """ - - - - - - - - - - - - - - - - -
-
- - - - - - - -""" - - -def convert_markdown_into_html(argv=None): - parser = argparse.ArgumentParser() - parser.add_argument('filenames', nargs='*', help='Filenames to fix') - args = parser.parse_args(argv) - - retv = 0 - - for filename in args.filenames: - with open( - re.sub(r"README", "index", re.sub(r"\.md$", ".html", filename)), - "w") as output: - output.write(HEAD) - with open(filename) as input: - for line in input: - output.write(line) - output.write(TAIL) - - return retv - - -if __name__ == '__main__': - sys.exit(convert_markdown_into_html()) diff --git a/.travis.yml b/.travis.yml index 0f67f656fde89e087d1324c2a19db2f506e930d2..52bfd5a1ba02b8ff32ef4248e00530fdd1319174 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,20 +17,26 @@ addons: - python-pip - python2.7-dev ssh_known_hosts: 52.76.173.135 + before_install: - sudo pip install -U virtualenv pre-commit pip - docker pull paddlepaddle/paddle:latest + script: - - .travis/precommit.sh - - docker run -i --rm -v "$PWD:/py_unittest" paddlepaddle/paddle:latest /bin/bash -c - 'cd /py_unittest; sh .travis/unittest.sh' + - exit_code=0 + - .travis/precommit.sh || exit_code=$(( exit_code | $? )) + - docker run -i --rm -v "$PWD:/py_unittest" paddlepaddle/paddle:latest /bin/bash -c + 'cd /py_unittest; sh .travis/unittest.sh' || exit_code=$(( exit_code | $? )) - | - if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi; - if [[ "$TRAVIS_BRANCH" != "develop" && ! "$TRAVIS_BRANCH" =~ ^v[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then echo "not develop branch, no deploy"; exit 0; fi; + if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit $exit_code; fi; + if [[ "$TRAVIS_BRANCH" != "develop" && ! "$TRAVIS_BRANCH" =~ ^v[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then echo "not develop branch, no deploy"; exit $exit_code; fi; export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/master/scripts/deploy/deploy_docs.sh export MODELS_DIR=`pwd` cd .. curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH $MODELS_DIR + exit_code=$(( exit_code | $? )) + exit $exit_code + notifications: email: on_success: change diff --git a/README.md b/README.md index 178d76de7a01d13f44da1b94569689a7527119bc..3b2da82aeebb5fc0535144940965598c8d9dfc02 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ The word embedding expresses words with a real vector. Each dimension of the vec In the example of word vectors, we show how to use Hierarchical-Sigmoid and Noise Contrastive Estimation (NCE) to accelerate word-vector learning. - 1.1 [Hsigmoid Accelerated Word Vector Training](https://github.com/PaddlePaddle/models/tree/develop/hsigmoid) -- 1.2 [Noise Contrast Estimation Accelerated Word Vector Training](https://github.com/PaddlePaddle/models/tree/develop/nce_cost) +- 1.2 [Noise Contrastive Estimation Accelerated Word Vector Training](https://github.com/PaddlePaddle/models/tree/develop/nce_cost) ## 2. RNN language model diff --git a/conv_seq2seq/README.md b/conv_seq2seq/README.md index 817c464a3a08d5f6626f432c3dd6f8f81327d66f..75ea8770266cc277843608de8320d74d54d1e8e4 100644 --- a/conv_seq2seq/README.md +++ b/conv_seq2seq/README.md @@ -3,47 +3,55 @@ This model implements the work in the following paper: Jonas Gehring, Micheal Auli, David Grangier, et al. Convolutional Sequence to Sequence Learning. Association for Computational Linguistics (ACL), 2017 +# Data Preparation + +- In this tutorial, each line in a data file contains one sample and each sample consists of a source sentence and a target sentence. And the two sentences are seperated by '\t'. So, to use your own data, it should be organized as follows: + + ``` + \t + ``` + # Training a Model - Modify the following script if needed and then run: - ```bash - python train.py \ - --train_data_path ./data/train_data \ - --test_data_path ./data/test_data \ - --src_dict_path ./data/src_dict \ - --trg_dict_path ./data/trg_dict \ - --enc_blocks "[(256, 3)] * 5" \ - --dec_blocks "[(256, 3)] * 3" \ - --emb_size 256 \ - --pos_size 200 \ - --drop_rate 0.1 \ - --use_gpu False \ - --trainer_count 1 \ - --batch_size 32 \ - --num_passes 20 \ - >train.log 2>&1 - ``` + ```bash + python train.py \ + --train_data_path ./data/train_data \ + --test_data_path ./data/test_data \ + --src_dict_path ./data/src_dict \ + --trg_dict_path ./data/trg_dict \ + --enc_blocks "[(256, 3)] * 5" \ + --dec_blocks "[(256, 3)] * 3" \ + --emb_size 256 \ + --pos_size 200 \ + --drop_rate 0.1 \ + --use_gpu False \ + --trainer_count 1 \ + --batch_size 32 \ + --num_passes 20 \ + >train.log 2>&1 + ``` # Inferring by a Trained Model - Infer by a trained model by running: - ```bash - python infer.py \ - --infer_data_path ./data/infer_data \ - --src_dict_path ./data/src_dict \ - --trg_dict_path ./data/trg_dict \ - --enc_blocks "[(256, 3)] * 5" \ - --dec_blocks "[(256, 3)] * 3" \ - --emb_size 256 \ - --pos_size 200 \ - --drop_rate 0.1 \ - --use_gpu False \ - --trainer_count 1 \ - --max_len 100 \ - --beam_size 1 \ - --model_path ./params.pass-0.tar.gz \ - 1>infer_result 2>infer.log - ``` + ```bash + python infer.py \ + --infer_data_path ./data/infer_data \ + --src_dict_path ./data/src_dict \ + --trg_dict_path ./data/trg_dict \ + --enc_blocks "[(256, 3)] * 5" \ + --dec_blocks "[(256, 3)] * 3" \ + --emb_size 256 \ + --pos_size 200 \ + --drop_rate 0.1 \ + --use_gpu False \ + --trainer_count 1 \ + --max_len 100 \ + --beam_size 1 \ + --model_path ./params.pass-0.tar.gz \ + 1>infer_result 2>infer.log + ``` # Notes diff --git a/conv_seq2seq/model.py b/conv_seq2seq/model.py index 01dd94288b4bbee2c4099a029ac042cec0fdc53d..85f23862ce53871edc37f2c0a617f0130798a66b 100644 --- a/conv_seq2seq/model.py +++ b/conv_seq2seq/model.py @@ -147,7 +147,8 @@ def encoder(token_emb, encoded_sum = paddle.layer.addto(input=[encoded_vec, embedding]) # halve the variance of the sum - encoded_sum = paddle.layer.slope_intercept(input=encoded_sum, slope=math.sqrt(0.5)) + encoded_sum = paddle.layer.slope_intercept( + input=encoded_sum, slope=math.sqrt(0.5)) return encoded_vec, encoded_sum diff --git a/conv_seq_to_seq/README.md b/conv_seq_to_seq/README.md deleted file mode 100644 index 817c464a3a08d5f6626f432c3dd6f8f81327d66f..0000000000000000000000000000000000000000 --- a/conv_seq_to_seq/README.md +++ /dev/null @@ -1,50 +0,0 @@ -# Convolutional Sequence to Sequence Learning -This model implements the work in the following paper: - -Jonas Gehring, Micheal Auli, David Grangier, et al. Convolutional Sequence to Sequence Learning. Association for Computational Linguistics (ACL), 2017 - -# Training a Model -- Modify the following script if needed and then run: - - ```bash - python train.py \ - --train_data_path ./data/train_data \ - --test_data_path ./data/test_data \ - --src_dict_path ./data/src_dict \ - --trg_dict_path ./data/trg_dict \ - --enc_blocks "[(256, 3)] * 5" \ - --dec_blocks "[(256, 3)] * 3" \ - --emb_size 256 \ - --pos_size 200 \ - --drop_rate 0.1 \ - --use_gpu False \ - --trainer_count 1 \ - --batch_size 32 \ - --num_passes 20 \ - >train.log 2>&1 - ``` - -# Inferring by a Trained Model -- Infer by a trained model by running: - - ```bash - python infer.py \ - --infer_data_path ./data/infer_data \ - --src_dict_path ./data/src_dict \ - --trg_dict_path ./data/trg_dict \ - --enc_blocks "[(256, 3)] * 5" \ - --dec_blocks "[(256, 3)] * 3" \ - --emb_size 256 \ - --pos_size 200 \ - --drop_rate 0.1 \ - --use_gpu False \ - --trainer_count 1 \ - --max_len 100 \ - --beam_size 1 \ - --model_path ./params.pass-0.tar.gz \ - 1>infer_result 2>infer.log - ``` - -# Notes - -Currently, beam search will forward the encoder multiple times when predicting each target word, which requires extra computations. And we will fix it later. diff --git a/conv_seq_to_seq/beamsearch.py b/conv_seq_to_seq/beamsearch.py deleted file mode 100644 index 45656e809b4000af4b502bda157a4864ad3fff3a..0000000000000000000000000000000000000000 --- a/conv_seq_to_seq/beamsearch.py +++ /dev/null @@ -1,163 +0,0 @@ -#coding=utf-8 - -import sys -import time -import numpy as np - - -class BeamSearch(object): - """ - Generate sequence by beam search - NOTE: this class only implements generating one sentence at a time. - """ - - def __init__(self, - inferer, - trg_dict, - pos_size, - padding_num, - beam_size=1, - max_len=100): - self.inferer = inferer - self.trg_dict = trg_dict - self.word_padding = trg_dict.__len__() - self.pos_size = pos_size - self.pos_padding = pos_size - self.padding_num = padding_num - self.win_len = padding_num + 1 - self.max_len = max_len - self.beam_size = beam_size - - def get_beam_input(self, pre_beam_list, infer_data): - """ - Get input for generation at the current iteration. - """ - beam_input = [] - - if len(pre_beam_list) == 0: - cur_trg = [self.word_padding - ] * self.padding_num + [self.trg_dict['']] - cur_trg_pos = [self.pos_padding] * self.padding_num + [0] - beam_input.append(infer_data + [cur_trg] + [cur_trg_pos]) - else: - for seq in pre_beam_list: - if len(seq) < self.win_len: - cur_trg = [self.word_padding] * ( - self.win_len - len(seq) - 1 - ) + [self.trg_dict['']] + seq - cur_trg_pos = [self.pos_padding] * ( - self.win_len - len(seq) - 1) + [0] + range(1, - len(seq) + 1) - else: - cur_trg = seq[-self.win_len:] - cur_trg_pos = range( - len(seq) + 1 - self.win_len, len(seq) + 1) - - beam_input.append(infer_data + [cur_trg] + [cur_trg_pos]) - return beam_input - - def get_prob(self, beam_input): - """ - Get the probabilities of all possible tokens. - """ - row_list = [j * self.win_len for j in range(len(beam_input))] - prob = self.inferer.infer(beam_input, field='value')[row_list, :] - return prob - - def get_candidate(self, pre_beam_list, pre_beam_score, prob): - """ - Get top beam_size tokens and their scores for each beam. - """ - if prob.ndim == 1: - candidate_id = prob.argsort()[-self.beam_size:][::-1] - candidate_log_prob = np.log(prob[candidate_id]) - else: - candidate_id = prob.argsort()[:, -self.beam_size:][:, ::-1] - candidate_log_prob = np.zeros_like(candidate_id).astype('float32') - for j in range(len(pre_beam_list)): - candidate_log_prob[j, :] = np.log(prob[j, candidate_id[j, :]]) - - if pre_beam_score.size > 0: - candidate_score = candidate_log_prob + pre_beam_score.reshape( - (pre_beam_score.size, 1)) - else: - candidate_score = candidate_log_prob - - return candidate_id, candidate_score - - def prune(self, candidate_id, candidate_score, pre_beam_list, - completed_seq_list, completed_seq_score, completed_seq_min_score): - """ - Pruning process of the beam search. During the process, beam_size most possible sequences - are selected for the beam in the next iteration. Besides, their scores and the minimum score - of the completed sequences are updated. - """ - candidate_id = candidate_id.flatten() - candidate_score = candidate_score.flatten() - - topk_idx = candidate_score.argsort()[-self.beam_size:][::-1].tolist() - topk_seq_idx = [idx / self.beam_size for idx in topk_idx] - - next_beam = [] - beam_score = [] - for j in range(len(topk_idx)): - if candidate_id[topk_idx[j]] == self.trg_dict['']: - if len( - completed_seq_list - ) < self.beam_size or completed_seq_min_score <= candidate_score[ - topk_idx[j]]: - completed_seq_list.append(pre_beam_list[topk_seq_idx[j]]) - completed_seq_score.append(candidate_score[topk_idx[j]]) - - if completed_seq_min_score is None or ( - completed_seq_min_score >= - candidate_score[topk_idx[j]] and - len(completed_seq_list) < self.beam_size): - completed_seq_min_score = candidate_score[topk_idx[j]] - else: - seq = pre_beam_list[topk_seq_idx[ - j]] + [candidate_id[topk_idx[j]]] - score = candidate_score[topk_idx[j]] - next_beam.append(seq) - beam_score.append(score) - - beam_score = np.array(beam_score) - return next_beam, beam_score, completed_seq_min_score - - def search_one_sample(self, infer_data): - """ - Beam search process for one sample. - """ - completed_seq_list = [] - completed_seq_score = [] - completed_seq_min_score = None - uncompleted_seq_list = [[]] - uncompleted_seq_score = np.zeros(0) - - for i in xrange(self.max_len): - beam_input = self.get_beam_input(uncompleted_seq_list, infer_data) - - prob = self.get_prob(beam_input) - - candidate_id, candidate_score = self.get_candidate( - uncompleted_seq_list, uncompleted_seq_score, prob) - - uncompleted_seq_list, uncompleted_seq_score, completed_seq_min_score = self.prune( - candidate_id, candidate_score, uncompleted_seq_list, - completed_seq_list, completed_seq_score, - completed_seq_min_score) - - if len(uncompleted_seq_list) == 0: - break - if len(completed_seq_list) >= self.beam_size: - seq_max_score = uncompleted_seq_score.max() - if seq_max_score < completed_seq_min_score: - uncompleted_seq_list = [] - break - - final_seq_list = completed_seq_list + uncompleted_seq_list - final_score = np.concatenate( - (np.array(completed_seq_score), uncompleted_seq_score)) - max_id = final_score.argmax() - top_seq = final_seq_list[max_id] - return top_seq diff --git a/conv_seq_to_seq/infer.py b/conv_seq_to_seq/infer.py deleted file mode 100644 index eb46df5549f11217fa2881b8657b2b6add4bd7ce..0000000000000000000000000000000000000000 --- a/conv_seq_to_seq/infer.py +++ /dev/null @@ -1,199 +0,0 @@ -#coding=utf-8 - -import sys -import argparse -import distutils.util -import gzip - -import paddle.v2 as paddle -from model import conv_seq2seq -from beamsearch import BeamSearch -import reader - - -def parse_args(): - parser = argparse.ArgumentParser( - description="PaddlePaddle Convolutional Seq2Seq") - parser.add_argument( - '--infer_data_path', - type=str, - required=True, - help="Path of the dataset for inference") - parser.add_argument( - '--src_dict_path', - type=str, - required=True, - help='Path of the source dictionary') - parser.add_argument( - '--trg_dict_path', - type=str, - required=True, - help='path of the target dictionary') - parser.add_argument( - '--enc_blocks', type=str, help='Convolution blocks of the encoder') - parser.add_argument( - '--dec_blocks', type=str, help='Convolution blocks of the decoder') - parser.add_argument( - '--emb_size', - type=int, - default=512, - help='Dimension of word embedding. (default: %(default)s)') - parser.add_argument( - '--pos_size', - type=int, - default=200, - help='Total number of the position indexes. (default: %(default)s)') - parser.add_argument( - '--drop_rate', - type=float, - default=0., - help='Dropout rate. (default: %(default)s)') - parser.add_argument( - "--use_gpu", - default=False, - type=distutils.util.strtobool, - help="Use gpu or not. (default: %(default)s)") - parser.add_argument( - "--trainer_count", - default=1, - type=int, - help="Trainer number. (default: %(default)s)") - parser.add_argument( - '--max_len', - type=int, - default=100, - help="The maximum length of the sentence to be generated. (default: %(default)s)" - ) - parser.add_argument( - "--beam_size", - default=1, - type=int, - help="The width of beam expasion. (default: %(default)s)") - parser.add_argument( - "--model_path", - type=str, - required=True, - help="The path of trained model. (default: %(default)s)") - return parser.parse_args() - - -def to_sentence(seq, dictionary): - raw_sentence = [dictionary[id] for id in seq] - sentence = " ".join(raw_sentence) - return sentence - - -def infer(infer_data_path, - src_dict_path, - trg_dict_path, - model_path, - enc_conv_blocks, - dec_conv_blocks, - emb_dim=512, - pos_size=200, - drop_rate=0., - max_len=100, - beam_size=1): - """ - Inference. - - :param infer_data_path: The path of the data for inference. - :type infer_data_path: str - :param src_dict_path: The path of the source dictionary. - :type src_dict_path: str - :param trg_dict_path: The path of the target dictionary. - :type trg_dict_path: str - :param model_path: The path of a trained model. - :type model_path: str - :param enc_conv_blocks: The scale list of the encoder's convolution blocks. And each element of - the list contains output dimension and context length of the corresponding - convolution block. - :type enc_conv_blocks: list of tuple - :param dec_conv_blocks: The scale list of the decoder's convolution blocks. And each element of - the list contains output dimension and context length of the corresponding - convolution block. - :type dec_conv_blocks: list of tuple - :param emb_dim: The dimension of the embedding vector. - :type emb_dim: int - :param pos_size: The total number of the position indexes, which means - the maximum value of the index is pos_size - 1. - :type pos_size: int - :param drop_rate: Dropout rate. - :type drop_rate: float - :param max_len: The maximum length of the sentence to be generated. - :type max_len: int - :param beam_size: The width of beam expansion. - :type beam_size: int - """ - # load dict - src_dict = reader.load_dict(src_dict_path) - trg_dict = reader.load_dict(trg_dict_path) - src_dict_size = src_dict.__len__() - trg_dict_size = trg_dict.__len__() - - prob = conv_seq2seq( - src_dict_size=src_dict_size, - trg_dict_size=trg_dict_size, - pos_size=pos_size, - emb_dim=emb_dim, - enc_conv_blocks=enc_conv_blocks, - dec_conv_blocks=dec_conv_blocks, - drop_rate=drop_rate, - is_infer=True) - - # load parameters - parameters = paddle.parameters.Parameters.from_tar(gzip.open(model_path)) - - padding_list = [context_len - 1 for (size, context_len) in dec_conv_blocks] - padding_num = reduce(lambda x, y: x + y, padding_list) - infer_reader = reader.data_reader( - data_file=infer_data_path, - src_dict=src_dict, - trg_dict=trg_dict, - pos_size=pos_size, - padding_num=padding_num) - - inferer = paddle.inference.Inference( - output_layer=prob, parameters=parameters) - - searcher = BeamSearch( - inferer=inferer, - trg_dict=trg_dict, - pos_size=pos_size, - padding_num=padding_num, - max_len=max_len, - beam_size=beam_size) - - reverse_trg_dict = reader.get_reverse_dict(trg_dict) - for i, raw_data in enumerate(infer_reader()): - infer_data = [raw_data[0], raw_data[1]] - result = searcher.search_one_sample(infer_data) - sentence = to_sentence(result, reverse_trg_dict) - print sentence - sys.stdout.flush() - return - - -def main(): - args = parse_args() - enc_conv_blocks = eval(args.enc_blocks) - dec_conv_blocks = eval(args.dec_blocks) - - paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) - - infer( - infer_data_path=args.infer_data_path, - src_dict_path=args.src_dict_path, - trg_dict_path=args.trg_dict_path, - model_path=args.model_path, - enc_conv_blocks=enc_conv_blocks, - dec_conv_blocks=dec_conv_blocks, - emb_dim=args.emb_size, - pos_size=args.pos_size, - drop_rate=args.drop_rate, - max_len=args.max_len, - beam_size=args.beam_size) - - -if __name__ == '__main__': - main() diff --git a/conv_seq_to_seq/model.py b/conv_seq_to_seq/model.py deleted file mode 100644 index 01dd94288b4bbee2c4099a029ac042cec0fdc53d..0000000000000000000000000000000000000000 --- a/conv_seq_to_seq/model.py +++ /dev/null @@ -1,417 +0,0 @@ -#coding=utf-8 - -import math - -import paddle.v2 as paddle - -__all__ = ["conv_seq2seq"] - - -def gated_conv_with_batchnorm(input, - size, - context_len, - context_start=None, - learning_rate=1.0, - drop_rate=0.): - """ - Definition of the convolution block. - - :param input: The input of this block. - :type input: LayerOutput - :param size: The dimension of the block's output. - :type size: int - :param context_len: The context length of the convolution. - :type context_len: int - :param context_start: The start position of the context. - :type context_start: int - :param learning_rate: The learning rate factor of the parameters in the block. - The actual learning rate is the product of the global - learning rate and this factor. - :type learning_rate: float - :param drop_rate: Dropout rate. - :type drop_rate: float - :return: The output of the convolution block. - :rtype: LayerOutput - """ - input = paddle.layer.dropout(input=input, dropout_rate=drop_rate) - - context = paddle.layer.mixed( - size=input.size * context_len, - input=paddle.layer.context_projection( - input=input, context_len=context_len, context_start=context_start)) - - raw_conv = paddle.layer.fc( - input=context, - size=size * 2, - act=paddle.activation.Linear(), - param_attr=paddle.attr.Param( - initial_mean=0., - initial_std=math.sqrt(4.0 * (1.0 - drop_rate) / context.size), - learning_rate=learning_rate), - bias_attr=False) - - batch_norm_conv = paddle.layer.batch_norm( - input=raw_conv, - act=paddle.activation.Linear(), - param_attr=paddle.attr.Param(learning_rate=learning_rate)) - - with paddle.layer.mixed(size=size) as conv: - conv += paddle.layer.identity_projection( - batch_norm_conv, size=size, offset=0) - - with paddle.layer.mixed(size=size, act=paddle.activation.Sigmoid()) as gate: - gate += paddle.layer.identity_projection( - batch_norm_conv, size=size, offset=size) - - with paddle.layer.mixed(size=size) as gated_conv: - gated_conv += paddle.layer.dotmul_operator(conv, gate) - - return gated_conv - - -def encoder(token_emb, - pos_emb, - conv_blocks=[(256, 3)] * 5, - num_attention=3, - drop_rate=0.1): - """ - Definition of the encoder. - - :param token_emb: The embedding vector of the input token. - :type token_emb: LayerOutput - :param pos_emb: The embedding vector of the input token's position. - :type pos_emb: LayerOutput - :param conv_blocks: The scale list of the convolution blocks. Each element of - the list contains output dimension and context length of - the corresponding convolution block. - :type conv_blocks: list of tuple - :param num_attention: The total number of the attention modules used in the decoder. - :type num_attention: int - :param drop_rate: Dropout rate. - :type drop_rate: float - :return: The input token encoding. - :rtype: LayerOutput - """ - embedding = paddle.layer.addto( - input=[token_emb, pos_emb], - layer_attr=paddle.attr.Extra(drop_rate=drop_rate)) - - proj_size = conv_blocks[0][0] - block_input = paddle.layer.fc( - input=embedding, - size=proj_size, - act=paddle.activation.Linear(), - param_attr=paddle.attr.Param( - initial_mean=0., - initial_std=math.sqrt((1.0 - drop_rate) / embedding.size), - learning_rate=1.0 / (2.0 * num_attention)), - bias_attr=True, ) - - for (size, context_len) in conv_blocks: - if block_input.size == size: - residual = block_input - else: - residual = paddle.layer.fc( - input=block_input, - size=size, - act=paddle.activation.Linear(), - param_attr=paddle.attr.Param(learning_rate=1.0 / - (2.0 * num_attention)), - bias_attr=True) - - gated_conv = gated_conv_with_batchnorm( - input=block_input, - size=size, - context_len=context_len, - learning_rate=1.0 / (2.0 * num_attention), - drop_rate=drop_rate) - - with paddle.layer.mixed(size=size) as block_output: - block_output += paddle.layer.identity_projection(residual) - block_output += paddle.layer.identity_projection(gated_conv) - - # halve the variance of the sum - block_output = paddle.layer.slope_intercept( - input=block_output, slope=math.sqrt(0.5)) - - block_input = block_output - - emb_dim = embedding.size - encoded_vec = paddle.layer.fc( - input=block_output, - size=emb_dim, - act=paddle.activation.Linear(), - param_attr=paddle.attr.Param(learning_rate=1.0 / (2.0 * num_attention)), - bias_attr=True) - - encoded_sum = paddle.layer.addto(input=[encoded_vec, embedding]) - - # halve the variance of the sum - encoded_sum = paddle.layer.slope_intercept(input=encoded_sum, slope=math.sqrt(0.5)) - - return encoded_vec, encoded_sum - - -def attention(decoder_state, cur_embedding, encoded_vec, encoded_sum): - """ - Definition of the attention. - - :param decoder_state: The hidden state of the decoder. - :type decoder_state: LayerOutput - :param cur_embedding: The embedding vector of the current token. - :type cur_embedding: LayerOutput - :param encoded_vec: The source token encoding. - :type encoded_vec: LayerOutput - :param encoded_sum: The sum of the source token's encoding and embedding. - :type encoded_sum: LayerOutput - :return: A context vector. - :rtype: LayerOutput - """ - residual = decoder_state - - state_size = decoder_state.size - emb_dim = cur_embedding.size - with paddle.layer.mixed(size=emb_dim, bias_attr=True) as state_summary: - state_summary += paddle.layer.full_matrix_projection(decoder_state) - state_summary += paddle.layer.identity_projection(cur_embedding) - - # halve the variance of the sum - state_summary = paddle.layer.slope_intercept( - input=state_summary, slope=math.sqrt(0.5)) - - expanded = paddle.layer.expand(input=state_summary, expand_as=encoded_vec) - - m = paddle.layer.linear_comb(weights=expanded, vectors=encoded_vec) - - attention_weight = paddle.layer.fc( - input=m, - size=1, - act=paddle.activation.SequenceSoftmax(), - bias_attr=False) - - scaled = paddle.layer.scaling(weight=attention_weight, input=encoded_sum) - - attended = paddle.layer.pooling( - input=scaled, pooling_type=paddle.pooling.Sum()) - - attended_proj = paddle.layer.fc( - input=attended, - size=state_size, - act=paddle.activation.Linear(), - bias_attr=True) - - attention_result = paddle.layer.addto(input=[attended_proj, residual]) - - # halve the variance of the sum - attention_result = paddle.layer.slope_intercept( - input=attention_result, slope=math.sqrt(0.5)) - return attention_result - - -def decoder(token_emb, - pos_emb, - encoded_vec, - encoded_sum, - dict_size, - conv_blocks=[(256, 3)] * 3, - drop_rate=0.1): - """ - Definition of the decoder. - - :param token_emb: The embedding vector of the input token. - :type token_emb: LayerOutput - :param pos_emb: The embedding vector of the input token's position. - :type pos_emb: LayerOutput - :param encoded_vec: The source token encoding. - :type encoded_vec: LayerOutput - :param encoded_sum: The sum of the source token's encoding and embedding. - :type encoded_sum: LayerOutput - :param dict_size: The size of the target dictionary. - :type dict_size: int - :param conv_blocks: The scale list of the convolution blocks. Each element - of the list contains output dimension and context length - of the corresponding convolution block. - :type conv_blocks: list of tuple - :param drop_rate: Dropout rate. - :type drop_rate: float - :return: The probability of the predicted token. - :rtype: LayerOutput - """ - - def attention_step(decoder_state, cur_embedding, encoded_vec, encoded_sum): - conditional = attention( - decoder_state=decoder_state, - cur_embedding=cur_embedding, - encoded_vec=encoded_vec, - encoded_sum=encoded_sum) - return conditional - - embedding = paddle.layer.addto( - input=[token_emb, pos_emb], - layer_attr=paddle.attr.Extra(drop_rate=drop_rate)) - - proj_size = conv_blocks[0][0] - block_input = paddle.layer.fc( - input=embedding, - size=proj_size, - act=paddle.activation.Linear(), - param_attr=paddle.attr.Param( - initial_mean=0., - initial_std=math.sqrt((1.0 - drop_rate) / embedding.size)), - bias_attr=True, ) - - for (size, context_len) in conv_blocks: - if block_input.size == size: - residual = block_input - else: - residual = paddle.layer.fc( - input=block_input, - size=size, - act=paddle.activation.Linear(), - bias_attr=True) - - decoder_state = gated_conv_with_batchnorm( - input=block_input, - size=size, - context_len=context_len, - context_start=0, - drop_rate=drop_rate) - - group_inputs = [ - decoder_state, - embedding, - paddle.layer.StaticInput(input=encoded_vec), - paddle.layer.StaticInput(input=encoded_sum), - ] - - conditional = paddle.layer.recurrent_group( - step=attention_step, input=group_inputs) - - block_output = paddle.layer.addto(input=[conditional, residual]) - - # halve the variance of the sum - block_output = paddle.layer.slope_intercept( - input=block_output, slope=math.sqrt(0.5)) - - block_input = block_output - - out_emb_dim = embedding.size - block_output = paddle.layer.fc( - input=block_output, - size=out_emb_dim, - act=paddle.activation.Linear(), - layer_attr=paddle.attr.Extra(drop_rate=drop_rate)) - - decoder_out = paddle.layer.fc( - input=block_output, - size=dict_size, - act=paddle.activation.Softmax(), - param_attr=paddle.attr.Param( - initial_mean=0., - initial_std=math.sqrt((1.0 - drop_rate) / block_output.size)), - bias_attr=True) - - return decoder_out - - -def conv_seq2seq(src_dict_size, - trg_dict_size, - pos_size, - emb_dim, - enc_conv_blocks=[(256, 3)] * 5, - dec_conv_blocks=[(256, 3)] * 3, - drop_rate=0.1, - is_infer=False): - """ - Definition of convolutional sequence-to-sequence network. - - :param src_dict_size: The size of the source dictionary. - :type src_dict_size: int - :param trg_dict_size: The size of the target dictionary. - :type trg_dict_size: int - :param pos_size: The total number of the position indexes, which means - the maximum value of the index is pos_size - 1. - :type pos_size: int - :param emb_dim: The dimension of the embedding vector. - :type emb_dim: int - :param enc_conv_blocks: The scale list of the encoder's convolution blocks. Each element - of the list contains output dimension and context length of the - corresponding convolution block. - :type enc_conv_blocks: list of tuple - :param dec_conv_blocks: The scale list of the decoder's convolution blocks. Each element - of the list contains output dimension and context length of the - corresponding convolution block. - :type dec_conv_blocks: list of tuple - :param drop_rate: Dropout rate. - :type drop_rate: float - :param is_infer: Whether infer or not. - :type is_infer: bool - :return: Cost or output layer. - :rtype: LayerOutput - """ - src = paddle.layer.data( - name='src_word', - type=paddle.data_type.integer_value_sequence(src_dict_size)) - src_pos = paddle.layer.data( - name='src_word_pos', - type=paddle.data_type.integer_value_sequence(pos_size + - 1)) # one for padding - - src_emb = paddle.layer.embedding( - input=src, - size=emb_dim, - name='src_word_emb', - param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1)) - src_pos_emb = paddle.layer.embedding( - input=src_pos, - size=emb_dim, - name='src_pos_emb', - param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1)) - - num_attention = len(dec_conv_blocks) - encoded_vec, encoded_sum = encoder( - token_emb=src_emb, - pos_emb=src_pos_emb, - conv_blocks=enc_conv_blocks, - num_attention=num_attention, - drop_rate=drop_rate) - - trg = paddle.layer.data( - name='trg_word', - type=paddle.data_type.integer_value_sequence(trg_dict_size + - 1)) # one for padding - trg_pos = paddle.layer.data( - name='trg_word_pos', - type=paddle.data_type.integer_value_sequence(pos_size + - 1)) # one for padding - - trg_emb = paddle.layer.embedding( - input=trg, - size=emb_dim, - name='trg_word_emb', - param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1)) - trg_pos_emb = paddle.layer.embedding( - input=trg_pos, - size=emb_dim, - name='trg_pos_emb', - param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1)) - - decoder_out = decoder( - token_emb=trg_emb, - pos_emb=trg_pos_emb, - encoded_vec=encoded_vec, - encoded_sum=encoded_sum, - dict_size=trg_dict_size, - conv_blocks=dec_conv_blocks, - drop_rate=drop_rate) - - if is_infer: - return decoder_out - - trg_next_word = paddle.layer.data( - name='trg_next_word', - type=paddle.data_type.integer_value_sequence(trg_dict_size)) - cost = paddle.layer.classification_cost( - input=decoder_out, label=trg_next_word) - - return cost diff --git a/conv_seq_to_seq/reader.py b/conv_seq_to_seq/reader.py deleted file mode 100644 index 6d4db49f2d34152f4e178034c6e60d287352fe38..0000000000000000000000000000000000000000 --- a/conv_seq_to_seq/reader.py +++ /dev/null @@ -1,67 +0,0 @@ -#coding=utf-8 - -import random - - -def load_dict(dict_file): - word_dict = dict() - with open(dict_file, 'r') as f: - for i, line in enumerate(f): - w = line.strip().split()[0] - word_dict[w] = i - return word_dict - - -def get_reverse_dict(dictionary): - reverse_dict = {dictionary[k]: k for k in dictionary.keys()} - return reverse_dict - - -def load_data(data_file, src_dict, trg_dict): - UNK_IDX = src_dict[''] - with open(data_file, 'r') as f: - for line in f: - line_split = line.strip().split('\t') - if len(line_split) < 2: - continue - src, trg = line_split - src_words = src.strip().split() - trg_words = trg.strip().split() - src_seq = [src_dict.get(w, UNK_IDX) for w in src_words] - trg_seq = [trg_dict.get(w, UNK_IDX) for w in trg_words] - yield src_seq, trg_seq - - -def data_reader(data_file, src_dict, trg_dict, pos_size, padding_num): - def reader(): - UNK_IDX = src_dict[''] - word_padding = trg_dict.__len__() - pos_padding = pos_size - - def _get_pos(pos_list, pos_size, pos_padding): - return [pos if pos < pos_size else pos_padding for pos in pos_list] - - with open(data_file, 'r') as f: - for line in f: - line_split = line.strip().split('\t') - if len(line_split) != 2: - continue - src, trg = line_split - src = src.strip().split() - src_word = [src_dict.get(w, UNK_IDX) for w in src] - src_word_pos = range(len(src_word)) - src_word_pos = _get_pos(src_word_pos, pos_size, pos_padding) - - trg = trg.strip().split() - trg_word = [trg_dict[''] - ] + [trg_dict.get(w, UNK_IDX) for w in trg] - trg_word_pos = range(len(trg_word)) - trg_word_pos = _get_pos(trg_word_pos, pos_size, pos_padding) - - trg_next_word = trg_word[1:] + [trg_dict['']] - trg_word = [word_padding] * padding_num + trg_word - trg_word_pos = [pos_padding] * padding_num + trg_word_pos - trg_next_word = trg_next_word + [trg_dict['']] * padding_num - yield src_word, src_word_pos, trg_word, trg_word_pos, trg_next_word - - return reader diff --git a/conv_seq_to_seq/train.py b/conv_seq_to_seq/train.py deleted file mode 100644 index c6ce0dff12f7b8c3066e791f13c957ef695f1cf5..0000000000000000000000000000000000000000 --- a/conv_seq_to_seq/train.py +++ /dev/null @@ -1,252 +0,0 @@ -#coding=utf-8 - -import os -import sys -import time -import argparse -import distutils.util -import gzip -import numpy as np - -import paddle.v2 as paddle -from model import conv_seq2seq -import reader - - -def parse_args(): - parser = argparse.ArgumentParser( - description="PaddlePaddle Convolutional Seq2Seq") - parser.add_argument( - '--train_data_path', - type=str, - required=True, - help="Path of the training set") - parser.add_argument( - '--test_data_path', type=str, help='Path of the test set') - parser.add_argument( - '--src_dict_path', - type=str, - required=True, - help='Path of source dictionary') - parser.add_argument( - '--trg_dict_path', - type=str, - required=True, - help='Path of target dictionary') - parser.add_argument( - '--enc_blocks', type=str, help='Convolution blocks of the encoder') - parser.add_argument( - '--dec_blocks', type=str, help='Convolution blocks of the decoder') - parser.add_argument( - '--emb_size', - type=int, - default=512, - help='Dimension of word embedding. (default: %(default)s)') - parser.add_argument( - '--pos_size', - type=int, - default=200, - help='Total number of the position indexes. (default: %(default)s)') - parser.add_argument( - '--drop_rate', - type=float, - default=0., - help='Dropout rate. (default: %(default)s)') - parser.add_argument( - "--use_gpu", - default=False, - type=distutils.util.strtobool, - help="Use gpu or not. (default: %(default)s)") - parser.add_argument( - "--trainer_count", - default=1, - type=int, - help="Trainer number. (default: %(default)s)") - parser.add_argument( - '--batch_size', - type=int, - default=32, - help="Size of a mini-batch. (default: %(default)s)") - parser.add_argument( - '--num_passes', - type=int, - default=15, - help="Number of passes to train. (default: %(default)s)") - return parser.parse_args() - - -def create_reader(padding_num, - train_data_path, - test_data_path=None, - src_dict=None, - trg_dict=None, - pos_size=200, - batch_size=32): - - train_reader = paddle.batch( - reader=paddle.reader.shuffle( - reader=reader.data_reader( - data_file=train_data_path, - src_dict=src_dict, - trg_dict=trg_dict, - pos_size=pos_size, - padding_num=padding_num), - buf_size=10240), - batch_size=batch_size) - - test_reader = None - if test_data_path: - test_reader = paddle.batch( - reader=paddle.reader.shuffle( - reader=reader.data_reader( - data_file=test_data_path, - src_dict=src_dict, - trg_dict=trg_dict, - pos_size=pos_size, - padding_num=padding_num), - buf_size=10240), - batch_size=batch_size) - - return train_reader, test_reader - - -def train(train_data_path, - test_data_path, - src_dict_path, - trg_dict_path, - enc_conv_blocks, - dec_conv_blocks, - emb_dim=512, - pos_size=200, - drop_rate=0., - batch_size=32, - num_passes=15): - """ - Train the convolution sequence-to-sequence model. - - :param train_data_path: The path of the training set. - :type train_data_path: str - :param test_data_path: The path of the test set. - :type test_data_path: str - :param src_dict_path: The path of the source dictionary. - :type src_dict_path: str - :param trg_dict_path: The path of the target dictionary. - :type trg_dict_path: str - :param enc_conv_blocks: The scale list of the encoder's convolution blocks. And each element of - the list contains output dimension and context length of the corresponding - convolution block. - :type enc_conv_blocks: list of tuple - :param dec_conv_blocks: The scale list of the decoder's convolution blocks. And each element of - the list contains output dimension and context length of the corresponding - convolution block. - :type dec_conv_blocks: list of tuple - :param emb_dim: The dimension of the embedding vector. - :type emb_dim: int - :param pos_size: The total number of the position indexes, which means - the maximum value of the index is pos_size - 1. - :type pos_size: int - :param drop_rate: Dropout rate. - :type drop_rate: float - :param batch_size: The size of a mini-batch. - :type batch_size: int - :param num_passes: The total number of the passes to train. - :type num_passes: int - """ - # load dict - src_dict = reader.load_dict(src_dict_path) - trg_dict = reader.load_dict(trg_dict_path) - src_dict_size = src_dict.__len__() - trg_dict_size = trg_dict.__len__() - - optimizer = paddle.optimizer.Adam( - learning_rate=1e-3, ) - - cost = conv_seq2seq( - src_dict_size=src_dict_size, - trg_dict_size=trg_dict_size, - pos_size=pos_size, - emb_dim=emb_dim, - enc_conv_blocks=enc_conv_blocks, - dec_conv_blocks=dec_conv_blocks, - drop_rate=drop_rate, - is_infer=False) - - # create parameters and trainer - parameters = paddle.parameters.create(cost) - trainer = paddle.trainer.SGD( - cost=cost, parameters=parameters, update_equation=optimizer) - - padding_list = [context_len - 1 for (size, context_len) in dec_conv_blocks] - padding_num = reduce(lambda x, y: x + y, padding_list) - train_reader, test_reader = create_reader( - padding_num=padding_num, - train_data_path=train_data_path, - test_data_path=test_data_path, - src_dict=src_dict, - trg_dict=trg_dict, - pos_size=pos_size, - batch_size=batch_size) - - feeding = { - 'src_word': 0, - 'src_word_pos': 1, - 'trg_word': 2, - 'trg_word_pos': 3, - 'trg_next_word': 4 - } - - # create event handler - def event_handler(event): - if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 20 == 0: - cur_time = time.strftime('%Y.%m.%d %H:%M:%S', time.localtime()) - print "[%s]: Pass: %d, Batch: %d, TrainCost: %f, %s" % ( - cur_time, event.pass_id, event.batch_id, event.cost, - event.metrics) - else: - sys.stdout.flush() - - if isinstance(event, paddle.event.EndPass): - if test_reader is not None: - cur_time = time.strftime('%Y.%m.%d %H:%M:%S', time.localtime()) - result = trainer.test(reader=test_reader, feeding=feeding) - print "[%s]: Pass: %d, TestCost: %f, %s" % ( - cur_time, event.pass_id, result.cost, result.metrics) - sys.stdout.flush() - with gzip.open("output/params.pass-%d.tar.gz" % event.pass_id, - 'w') as f: - trainer.save_parameter_to_tar(f) - - if not os.path.exists('output'): - os.mkdir('output') - - trainer.train( - reader=train_reader, - event_handler=event_handler, - num_passes=num_passes, - feeding=feeding) - - -def main(): - args = parse_args() - enc_conv_blocks = eval(args.enc_blocks) - dec_conv_blocks = eval(args.dec_blocks) - - paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) - - train( - train_data_path=args.train_data_path, - test_data_path=args.test_data_path, - src_dict_path=args.src_dict_path, - trg_dict_path=args.trg_dict_path, - enc_conv_blocks=enc_conv_blocks, - dec_conv_blocks=dec_conv_blocks, - emb_dim=args.emb_size, - pos_size=args.pos_size, - drop_rate=args.drop_rate, - batch_size=args.batch_size, - num_passes=args.num_passes) - - -if __name__ == '__main__': - main() diff --git a/ctr/avazu_data_processer.py b/ctr/avazu_data_processer.py index dd148adc244efc64021446b17488ec7f2b1c9bd9..18aa85330ae765a236416ab1826092c1fe6a133a 100644 --- a/ctr/avazu_data_processer.py +++ b/ctr/avazu_data_processer.py @@ -2,6 +2,7 @@ import sys import csv import cPickle import argparse +import os import numpy as np from utils import logger, TaskMode diff --git a/ctr/index.html b/ctr/index.html deleted file mode 100644 index 78dc8e825928780f6998fbd9f8178479c4c2aa04..0000000000000000000000000000000000000000 --- a/ctr/index.html +++ /dev/null @@ -1,403 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - diff --git a/ctr/network_conf.py b/ctr/network_conf.py index b01e4872f108896f60f5d662fe6e1d57295de3f2..f6f4e4a59e17114b279149048c3bf144f2987faa 100644 --- a/ctr/network_conf.py +++ b/ctr/network_conf.py @@ -50,7 +50,7 @@ class CTRmodel(object): self.lr_merged_input = layer.data( name='lr_input', - type=paddle.data_type.sparse_vector(self.lr_input_dim)) + type=paddle.data_type.sparse_float_vector(self.lr_input_dim)) if not self.is_infer: self.click = paddle.layer.data( diff --git a/deep_speech_2/README.md b/deep_speech_2/README.md index 543af0ad108acce896a944ff0da3775262d9c886..427331fcc7bde2a277b6d2cd8314c0d8dd9997df 100644 --- a/deep_speech_2/README.md +++ b/deep_speech_2/README.md @@ -1,3 +1,5 @@ +Deprecated: please check out the new repository [DeepSpeech](https://github.com/PaddlePaddle/DeepSpeech). + # DeepSpeech2 on PaddlePaddle *DeepSpeech2 on PaddlePaddle* is an open-source implementation of end-to-end Automatic Speech Recognition (ASR) engine, based on [Baidu's Deep Speech 2 paper](http://proceedings.mlr.press/v48/amodei16.pdf), with [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) platform. Our vision is to empower both industrial application and academic research on speech recognition, via an easy-to-use, efficient and scalable implementation, including training, inference & testing module, distributed [PaddleCloud](https://github.com/PaddlePaddle/cloud) training, and demo deployment. Besides, several pre-trained models for both English and Mandarin are also released. @@ -187,7 +189,7 @@ Six optional augmentation components are provided to be selected, configured and - Noise Perturbation (need background noise audio files) - Impulse Response (need impulse audio files) -In order to inform the trainer of what augmentation components are needed and what their processing orders are, it is required to prepare in advance a *augmentation configuration file* in [JSON](http://www.json.org/) format. For example: +In order to inform the trainer of what augmentation components are needed and what their processing orders are, it is required to prepare in advance an *augmentation configuration file* in [JSON](http://www.json.org/) format. For example: ``` [{ @@ -226,7 +228,7 @@ If you wish to train your own better language model, please refer to [KenLM](htt #### English LM -The English corpus is from the [Common Crawl Repository](http://commoncrawl.org) and you can download it from [statmt](http://data.statmt.org/ngrams/deduped_en). We use part en.00 to train our English languge model. There are some preprocessing steps before training: +The English corpus is from the [Common Crawl Repository](http://commoncrawl.org) and you can download it from [statmt](http://data.statmt.org/ngrams/deduped_en). We use part en.00 to train our English language model. There are some preprocessing steps before training: * Characters not in \[A-Za-z0-9\s'\] (\s represents whitespace characters) are removed and Arabic numbers are converted to English numbers like 1000 to one thousand. * Repeated whitespace characters are squeezed to one and the beginning whitespace characters are removed. Notice that all transcriptions are lowercase, so all characters are converted to lowercase. diff --git a/dssm/index.html b/dssm/index.html deleted file mode 100644 index 5c4a1a9d316821f25bbf204c3ba7698573722b94..0000000000000000000000000000000000000000 --- a/dssm/index.html +++ /dev/null @@ -1,328 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - diff --git a/generate_chinese_poetry/index.html b/generate_chinese_poetry/index.html deleted file mode 100644 index c1ccd322d6109c69b7d72f26f25abee671cb6148..0000000000000000000000000000000000000000 --- a/generate_chinese_poetry/index.html +++ /dev/null @@ -1,175 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - diff --git a/generate_sequence_by_rnn_lm/index.html b/generate_sequence_by_rnn_lm/index.html deleted file mode 100644 index 17e81d9e52494efc18948f8e6e70c5d253614fbc..0000000000000000000000000000000000000000 --- a/generate_sequence_by_rnn_lm/index.html +++ /dev/null @@ -1,226 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - diff --git a/globally_normalized_reader/index.html b/globally_normalized_reader/index.html deleted file mode 100644 index c8c23c5bad340c7f043f662e4334e9c0f772e55e..0000000000000000000000000000000000000000 --- a/globally_normalized_reader/index.html +++ /dev/null @@ -1,119 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - diff --git a/hsigmoid/index.html b/hsigmoid/index.html deleted file mode 100644 index 738e81f25a01cfce2ef948f1bef70a38932de6fa..0000000000000000000000000000000000000000 --- a/hsigmoid/index.html +++ /dev/null @@ -1,226 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - diff --git a/image_classification/index.html b/image_classification/index.html deleted file mode 100644 index 48009093f9505fa425890d3103bf0c8e21073b63..0000000000000000000000000000000000000000 --- a/image_classification/index.html +++ /dev/null @@ -1,302 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - diff --git a/ltr/index.html b/ltr/index.html deleted file mode 100644 index 59fb2d3260e7d88841ae099fac96b00890463fdf..0000000000000000000000000000000000000000 --- a/ltr/index.html +++ /dev/null @@ -1,422 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - diff --git a/nce_cost/index.html b/nce_cost/index.html deleted file mode 100644 index 1a7d8271a0f8ee3a951359eda0609044f3cf5542..0000000000000000000000000000000000000000 --- a/nce_cost/index.html +++ /dev/null @@ -1,217 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - diff --git a/nested_sequence/index.html b/nested_sequence/index.html deleted file mode 100644 index 51f66f93f70eeb7a52e848f190b8e33ead14bbb0..0000000000000000000000000000000000000000 --- a/nested_sequence/index.html +++ /dev/null @@ -1,73 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - diff --git a/nested_sequence/text_classification/index.html b/nested_sequence/text_classification/index.html deleted file mode 100644 index 1838415cecd63ec95c4966d31bf0f3d0bf6beb63..0000000000000000000000000000000000000000 --- a/nested_sequence/text_classification/index.html +++ /dev/null @@ -1,302 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - diff --git a/neural_qa/index.html b/neural_qa/index.html deleted file mode 100644 index 53786d97abb674d298d151437035e4bdfc6b9321..0000000000000000000000000000000000000000 --- a/neural_qa/index.html +++ /dev/null @@ -1,188 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - diff --git a/nmt_without_attention/index.html b/nmt_without_attention/index.html deleted file mode 100644 index cf610710ddec3b6510d1fa48ce414c4a441aa59a..0000000000000000000000000000000000000000 --- a/nmt_without_attention/index.html +++ /dev/null @@ -1,412 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - diff --git a/scene_text_recognition/index.html b/scene_text_recognition/index.html deleted file mode 100644 index 4331b2b9636c159aedc1e96f0731e44cca9889cf..0000000000000000000000000000000000000000 --- a/scene_text_recognition/index.html +++ /dev/null @@ -1,192 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - diff --git a/scheduled_sampling/index.html b/scheduled_sampling/index.html deleted file mode 100644 index 8d327c4b1ad687ef18ca81302cf6dccf049cab51..0000000000000000000000000000000000000000 --- a/scheduled_sampling/index.html +++ /dev/null @@ -1,277 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - diff --git a/sequence_tagging_for_ner/data/vocab.txt b/sequence_tagging_for_ner/data/vocab.txt index 3c8c45a2ddf432c95a694087a51364647d81ebaa..19d518f85ccf1d6237142fba003599039f9a9905 100644 --- a/sequence_tagging_for_ner/data/vocab.txt +++ b/sequence_tagging_for_ner/data/vocab.txt @@ -1,4 +1,4 @@ - +UUUNKKK the , . diff --git a/sequence_tagging_for_ner/index.html b/sequence_tagging_for_ner/index.html deleted file mode 100644 index 389f4d502c94af9f46a5e5af0a7e3c812dca8d2f..0000000000000000000000000000000000000000 --- a/sequence_tagging_for_ner/index.html +++ /dev/null @@ -1,236 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - diff --git a/sequence_tagging_for_ner/network_conf.py b/sequence_tagging_for_ner/network_conf.py index 7074ae96d85ef71d0d168a421b7b9aeba54df2ec..cc101f9bc60474ef614fc4ecf0a98fec185e9566 100644 --- a/sequence_tagging_for_ner/network_conf.py +++ b/sequence_tagging_for_ner/network_conf.py @@ -11,16 +11,16 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): hidden_dim = 128 word = paddle.layer.data( - name='word', + name="word", type=paddle.data_type.integer_value_sequence(word_dict_len)) word_embedding = paddle.layer.embedding( input=word, size=word_dim, param_attr=paddle.attr.Param( - name='emb', initial_std=math.sqrt(1. / word_dim), is_static=True)) + name="emb", initial_std=math.sqrt(1. / word_dim), is_static=True)) mark = paddle.layer.data( - name='mark', + name="mark", type=paddle.data_type.integer_value_sequence(mark_dict_len)) mark_embedding = paddle.layer.embedding( input=mark, @@ -35,7 +35,8 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): hidden_para_attr = paddle.attr.Param( initial_std=1 / math.sqrt(hidden_dim), learning_rate=mix_hidden_lr) - # the first rnn layer shares the input-to-hidden mappings. + # the first forward and backward rnn layer share the + # input-to-hidden mappings. hidden = paddle.layer.fc( name="__hidden00__", size=hidden_dim, @@ -72,32 +73,40 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): input=fea, param_attr=[hidden_para_attr, rnn_para_attr] * 2) + # NOTE: This fully connected layer calculates the emission feature for + # the CRF layer. Because the paddle.layer.crf performs global normalization + # over all possible sequences internally, it expects UNSCALED emission + # feature weights. + # Please do not add any nonlinear activation to this fully connected layer. + # The default activation for paddle.layer.fc is the tanh, here needs to set + # it to linear explictly. emission = paddle.layer.fc( size=label_dict_len, bias_attr=False, input=rnn_fea, + act=paddle.activation.Linear(), param_attr=rnn_para_attr) if is_train: target = paddle.layer.data( - name='target', + name="target", type=paddle.data_type.integer_value_sequence(label_dict_len)) crf = paddle.layer.crf( size=label_dict_len, input=emission, label=target, - param_attr=paddle.attr.Param(name='crfw', initial_std=1e-3)) + param_attr=paddle.attr.Param(name="crfw", initial_std=1e-3)) crf_dec = paddle.layer.crf_decoding( size=label_dict_len, input=emission, label=target, - param_attr=paddle.attr.Param(name='crfw')) + param_attr=paddle.attr.Param(name="crfw")) return crf, crf_dec, target else: predict = paddle.layer.crf_decoding( size=label_dict_len, input=emission, - param_attr=paddle.attr.Param(name='crfw')) + param_attr=paddle.attr.Param(name="crfw")) return predict diff --git a/sequence_tagging_for_ner/train.py b/sequence_tagging_for_ner/train.py index 398f526e25877046e500cf36775547c8b40b7a11..1f9351f5628059ea64d09f3f4c72b6d5877a207a 100644 --- a/sequence_tagging_for_ner/train.py +++ b/sequence_tagging_for_ner/train.py @@ -1,3 +1,4 @@ +import os import gzip import numpy as np diff --git a/ssd/index.html b/ssd/index.html deleted file mode 100644 index 6a2788901657e770a0e2f320637bdf3c705e19d9..0000000000000000000000000000000000000000 --- a/ssd/index.html +++ /dev/null @@ -1,293 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - diff --git a/text_classification/index.html b/text_classification/index.html deleted file mode 100644 index 21e4e14dacb01f9ed373798d27e3f8482f2cd13c..0000000000000000000000000000000000000000 --- a/text_classification/index.html +++ /dev/null @@ -1,262 +0,0 @@ - - - - - - - - - - - - - - - - - -
-
- - - - - - - diff --git a/text_classification/train.py b/text_classification/train.py index cda04bfc6a33ee9e39298910d724bc716f1b53df..888fde356f3aec1addb5e5fcf35e17d0c82f37c3 100644 --- a/text_classification/train.py +++ b/text_classification/train.py @@ -46,10 +46,10 @@ def train(topology, word_dict = paddle.dataset.imdb.word_dict() train_reader = paddle.batch( paddle.reader.shuffle( - lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000), + lambda: paddle.dataset.imdb.train(word_dict)(), buf_size=1000), batch_size=100) test_reader = paddle.batch( - lambda: paddle.dataset.imdb.test(word_dict), batch_size=100) + lambda: paddle.dataset.imdb.test(word_dict)(), batch_size=100) class_num = 2 else: