Merge pull request #3 from PaddlePaddle/develop

update

Merge pull request #3 from PaddlePaddle/develop
update
20abbf31 · zhengya01 · GitHub · c0b6a1d1 · 022c0899 · 20abbf31
20 changed file
--- a/fluid/PaddleCV/deeplabv3+/.run_ce.sh
+++ b/fluid/PaddleCV/deeplabv3+/.run_ce.sh
+#!/bin/bash
+
+export MKL_NUM_THREADS=1
+export OMP_NUM_THREADS=1
+
+DATASET_PATH=${HOME}/.cache/paddle/dataset/cityscape/
+
+cudaid=${deeplabv3plus:=0} # use 0-th card as default
+export CUDA_VISIBLE_DEVICES=$cudaid
+
+FLAGS_benchmark=true  python train.py \
+--batch_size=2 \
+--train_crop_size=769 \
+--total_step=50 \
+--save_weights_path=output1 \
+--dataset_path=$DATASET_PATH \
+--enable_ce | python _ce.py
+
+cudaid=${deeplabv3plus_m:=0,1,2,3} # use 0,1,2,3 card as default
+export CUDA_VISIBLE_DEVICES=$cudaid
+
+FLAGS_benchmark=true  python train.py \
+--batch_size=2 \
+--train_crop_size=769 \
+--total_step=50 \
+--save_weights_path=output4 \
+--dataset_path=$DATASET_PATH \
+--enable_ce | python _ce.py
--- a/fluid/PaddleCV/deeplabv3+/__init__.py
+++ b/fluid/PaddleCV/deeplabv3+/__init__.py
--- a/fluid/PaddleCV/deeplabv3+/_ce.py
+++ b/fluid/PaddleCV/deeplabv3+/_ce.py
+# this file is only used for continuous evaluation test!
+
+import os
+import sys
+sys.path.append(os.environ['ceroot'])
+from kpi import CostKpi
+from kpi import DurationKpi
+
+each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.1, 0, actived=True)
+train_loss_card1_kpi = CostKpi('train_loss_card1', 0.05, 0)
+each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.1, 0, actived=True)
+train_loss_card4_kpi = CostKpi('train_loss_card4', 0.05, 0)
+
+tracking_kpis = [
+        each_pass_duration_card1_kpi,
+        train_loss_card1_kpi,
+        each_pass_duration_card4_kpi,
+        train_loss_card4_kpi,
+        ]
+
+
+def parse_log(log):
+    '''
+    This method should be implemented by model developers.
+
+    The suggestion:
+
+    each line in the log should be key, value, for example:
+
+    "
+    train_cost\t1.0
+    test_cost\t1.0
+    train_cost\t1.0
+    train_cost\t1.0
+    train_acc\t1.2
+    "
+    '''
+    for line in log.split('\n'):
+        fs = line.strip().split('\t')
+        print(fs)
+        if len(fs) == 3 and fs[0] == 'kpis':
+            kpi_name = fs[1]
+            kpi_value = float(fs[2])
+            yield kpi_name, kpi_value
+
+
+def log_to_ce(log):
+    kpi_tracker = {}
+    for kpi in tracking_kpis:
+        kpi_tracker[kpi.name] = kpi
+
+    for (kpi_name, kpi_value) in parse_log(log):
+        print(kpi_name, kpi_value)
+        kpi_tracker[kpi_name].add_record(kpi_value)
+        kpi_tracker[kpi_name].persist()
+
+
+if __name__ == '__main__':
+    log = sys.stdin.read()
+    log_to_ce(log)
--- a/fluid/PaddleCV/deeplabv3+/train.py
+++ b/fluid/PaddleCV/deeplabv3+/train.py
@@ -34,6 +34,10 @@ def add_arguments():
    add_argument('parallel', bool, False, "using ParallelExecutor.")
    add_argument('use_gpu', bool, True, "Whether use GPU or CPU.")
    add_argument('num_classes', int, 19, "Number of classes.")
+    parser.add_argument(
+        '--enable_ce',
+        action='store_true',
+        help='If set, run the task with continuous evaluation logs.')


 def load_model():
@@ -51,7 +55,10 @@ def load_model():
    else:
        if args.num_classes == 19:
            fluid.io.load_params(
-                exe, dirname=args.init_weights_path, main_program=tp)
+                exe,
+                dirname="",
+                filename=args.init_weights_path,
+                main_program=tp)
        else:
            fluid.io.load_vars(
                exe, dirname="", filename=args.init_weights_path, vars=myvars)
@@ -84,6 +91,15 @@ def loss(logit, label):
    return loss, label_nignore


+def get_cards(args):
+    if args.enable_ce:
+        cards = os.environ.get('CUDA_VISIBLE_DEVICES')
+        num = len(cards.split(","))
+        return num
+    else:
+        return args.num_devices
+
+
 CityscapeDataset = reader.CityscapeDataset
 parser = argparse.ArgumentParser()

@@ -99,6 +115,13 @@ deeplabv3p = models.deeplabv3p

 sp = fluid.Program()
 tp = fluid.Program()
+
+# only for ce
+if args.enable_ce:
+    SEED = 102
+    sp.random_seed = SEED
+    tp.random_seed = SEED
+
 crop_size = args.train_crop_size
 batch_size = args.batch_size
 image_shape = [crop_size, crop_size]
@@ -155,7 +178,13 @@ if args.parallel:

 batches = dataset.get_batch_generator(batch_size, total_step)

+total_time = 0.0
+epoch_idx = 0
+train_loss = 0
+
 for i, imgs, labels, names in batches:
+    epoch_idx += 1
+    begin_time = time.time()
    prev_start_time = time.time()
    if args.parallel:
        retv = exe_p.run(fetch_list=[pred.name, loss_mean.name],
@@ -167,11 +196,21 @@ for i, imgs, labels, names in batches:
                             'label': labels},
                       fetch_list=[pred, loss_mean])
    end_time = time.time()
+    total_time += end_time - begin_time
    if i % 100 == 0:
        print("Model is saved to", args.save_weights_path)
        save_model()
    print("step {:d}, loss: {:.6f}, step_time_cost: {:.3f}".format(
        i, np.mean(retv[1]), end_time - prev_start_time))

+    # only for ce
+    train_loss = np.mean(retv[1])
+
+if args.enable_ce:
+    gpu_num = get_cards(args)
+    print("kpis\teach_pass_duration_card%s\t%s" %
+          (gpu_num, total_time / epoch_idx))
+    print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, train_loss))
+
 print("Training done. Model is saved to", args.save_weights_path)
 save_model()
--- a/fluid/PaddleCV/image_classification/.run_ce.sh
+++ b/fluid/PaddleCV/image_classification/.run_ce.sh
@@ -7,6 +7,7 @@ cudaid=${object_detection_cudaid:=0}
 export CUDA_VISIBLE_DEVICES=$cudaid
 python train.py --batch_size=${BATCH_SIZE} --num_epochs=5 --enable_ce=True --lr_strategy=cosine_decay | python _ce.py

+BATCH_SIZE=224
 cudaid=${object_detection_cudaid_m:=0, 1, 2, 3}
 export CUDA_VISIBLE_DEVICES=$cudaid
 python train.py --batch_size=${BATCH_SIZE} --num_epochs=5 --enable_ce=True --lr_strategy=cosine_decay | python _ce.py
--- a/fluid/PaddleCV/image_classification/train.py
+++ b/fluid/PaddleCV/image_classification/train.py
@@ -242,7 +242,7 @@ def train(args):
        device_num = subprocess.check_output(['nvidia-smi', '-L']).decode().count('\n')

    train_batch_size = args.batch_size / device_num
-    test_batch_size = 8
+    test_batch_size = 16
    if not args.enable_ce:
        train_reader = paddle.batch(
            reader.train(), batch_size=train_batch_size, drop_last=True)

--- a/fluid/PaddleCV/object_detection/data_util.py
+++ b/fluid/PaddleCV/object_detection/data_util.py
@@ -68,6 +68,7 @@ class GeneratorEnqueuer(object):
                        try:
                            task()
                        except Exception:
+                            traceback.print_exc()
                            self._stop_event.set()
                            break
            else:
@@ -75,6 +76,7 @@ class GeneratorEnqueuer(object):
                    try:
                        task()
                    except Exception:
+                        traceback.print_exc()
                        self._stop_event.set()
                        break


--- a/fluid/PaddleCV/object_detection/reader.py
+++ b/fluid/PaddleCV/object_detection/reader.py
@@ -176,10 +176,17 @@ def coco(settings, file_list, mode, batch_size, shuffle):
        if mode == 'train' and shuffle:
            np.random.shuffle(images)
        batch_out = []
+        if '2014' in file_list:
+            sub_dir = "train2014" if model == "train" else "val2014"
+        elif '2017' in file_list:
+            sub_dir = "train2017" if mode == "train" else "val2017"
+        data_dir = os.path.join(settings.data_dir, sub_dir)
        for image in images:
            image_name = image['file_name']
-            image_path = os.path.join(settings.data_dir, image_name)
-
+            image_path = os.path.join(data_dir, image_name)
+            if not os.path.exists(image_path):
+                raise ValueError("%s is not exist, you should specify "
+                                 "data path correctly." % image_path)
            im = Image.open(image_path)
            if im.mode == 'L':
                im = im.convert('RGB')
@@ -242,7 +249,9 @@ def pascalvoc(settings, file_list, mode, batch_size, shuffle):
            image_path, label_path = image.split()
            image_path = os.path.join(settings.data_dir, image_path)
            label_path = os.path.join(settings.data_dir, label_path)
-
+            if not os.path.exists(image_path):
+                raise ValueError("%s is not exist, you should specify "
+                                 "data path correctly." % image_path)
            im = Image.open(image_path)
            if im.mode == 'L':
                im = im.convert('RGB')
@@ -295,7 +304,6 @@ def train(settings,
          max_queue=24,
          enable_ce=False):
    file_list = os.path.join(settings.data_dir, file_list)
-
    if 'coco' in settings.dataset:
        generator = coco(settings, file_list, "train", batch_size, shuffle)
    else:
@@ -341,6 +349,9 @@ def test(settings, file_list, batch_size):

 def infer(settings, image_path):
    def reader():
+        if not os.path.exists(image_path):
+            raise ValueError("%s is not exist, you should specify "
+                             "data path correctly." % image_path)
        img = Image.open(image_path)
        if img.mode == 'L':
            img = im.convert('RGB')

--- a/fluid/PaddleNLP/chinese_ner/infer.py
+++ b/fluid/PaddleNLP/chinese_ner/infer.py
@@ -52,7 +52,7 @@ def parse_args():

 def print_arguments(args):
    print('-----------  Configuration Arguments -----------')
-    for arg, value in sorted(vars(args).iteritems()):
+    for arg, value in sorted(vars(args).items()):
        print('%s: %s' % (arg, value))
    print('------------------------------------------------')

@@ -61,6 +61,7 @@ def load_reverse_dict(dict_path):
    return dict((idx, line.strip().split("\t")[0])
                for idx, line in enumerate(open(dict_path, "r").readlines()))

+
 def to_lodtensor(data, place):
    seq_lens = [len(seq) for seq in data]
    cur_len = 0
@@ -76,7 +77,6 @@ def to_lodtensor(data, place):
    return res


-
 def infer(args):
    word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1)
    mention = fluid.layers.data(
@@ -108,8 +108,8 @@ def infer(args):
                profiler.reset_profiler()
            iters = 0
            for data in test_data():
-                word = to_lodtensor(map(lambda x: x[0], data), place)
-                mention = to_lodtensor(map(lambda x: x[1], data), place)
+                word = to_lodtensor(list(map(lambda x: x[0], data)), place)
+                mention = to_lodtensor(list(map(lambda x: x[1], data)), place)

                start = time.time()
                crf_decode = exe.run(inference_program,
@@ -122,12 +122,12 @@ def infer(args):
                np_data = np.array(crf_decode[0])
                word_count = 0
                assert len(data) == len(lod_info) - 1
-                for sen_index in xrange(len(data)):
+                for sen_index in range(len(data)):
                    assert len(data[sen_index][0]) == lod_info[
                        sen_index + 1] - lod_info[sen_index]
                    word_index = 0
-                    for tag_index in xrange(lod_info[sen_index],
-                                            lod_info[sen_index + 1]):
+                    for tag_index in range(lod_info[sen_index],
+                                           lod_info[sen_index + 1]):
                        word = str(data[sen_index][0][word_index])
                        gold_tag = label_reverse_dict[data[sen_index][2][
                            word_index]]

--- a/fluid/PaddleNLP/chinese_ner/train.py
+++ b/fluid/PaddleNLP/chinese_ner/train.py
@@ -65,7 +65,7 @@ def parse_args():

 def print_arguments(args):
    print('-----------  Configuration Arguments -----------')
-    for arg, value in sorted(vars(args).iteritems()):
+    for arg, value in sorted(vars(args).items()):
        print('%s: %s' % (arg, value))
    print('------------------------------------------------')

@@ -220,9 +220,9 @@ def test2(exe, chunk_evaluator, inference_program, test_data, place,
          cur_fetch_list):
    chunk_evaluator.reset()
    for data in test_data():
-        word = to_lodtensor(map(lambda x: x[0], data), place)
-        mention = to_lodtensor(map(lambda x: x[1], data), place)
-        target = to_lodtensor(map(lambda x: x[2], data), place)
+        word = to_lodtensor(list(map(lambda x: x[0], data)), place)
+        mention = to_lodtensor(list(map(lambda x: x[1], data)), place)
+        target = to_lodtensor(list(map(lambda x: x[2], data)), place)
        result_list = exe.run(
            inference_program,
            feed={"word": word,
@@ -232,8 +232,9 @@ def test2(exe, chunk_evaluator, inference_program, test_data, place,
        number_infer = np.array(result_list[0])
        number_label = np.array(result_list[1])
        number_correct = np.array(result_list[2])
-        chunk_evaluator.update(number_infer[0], number_label[0],
-                               number_correct[0])
+        chunk_evaluator.update(number_infer[0].astype('int64'),
+                               number_label[0].astype('int64'),
+                               number_correct[0].astype('int64'))
    return chunk_evaluator.eval()


@@ -241,9 +242,9 @@ def test(test_exe, chunk_evaluator, inference_program, test_data, place,
         cur_fetch_list):
    chunk_evaluator.reset()
    for data in test_data():
-        word = to_lodtensor(map(lambda x: x[0], data), place)
-        mention = to_lodtensor(map(lambda x: x[1], data), place)
-        target = to_lodtensor(map(lambda x: x[2], data), place)
+        word = to_lodtensor(list(map(lambda x: x[0], data)), place)
+        mention = to_lodtensor(list(map(lambda x: x[1], data)), place)
+        target = to_lodtensor(list(map(lambda x: x[2], data)), place)
        result_list = test_exe.run(
            fetch_list=cur_fetch_list,
            feed={"word": word,
@@ -252,8 +253,9 @@ def test(test_exe, chunk_evaluator, inference_program, test_data, place,
        number_infer = np.array(result_list[0])
        number_label = np.array(result_list[1])
        number_correct = np.array(result_list[2])
-        chunk_evaluator.update(number_infer.sum(),
-                               number_label.sum(), number_correct.sum())
+        chunk_evaluator.update(number_infer.sum().astype('int64'),
+                               number_label.sum().astype('int64'),
+                               number_correct.sum().astype('int64'))
    return chunk_evaluator.eval()


@@ -270,11 +272,6 @@ def main(args):
        crf_decode = fluid.layers.crf_decoding(
            input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))

-        inference_program = fluid.default_main_program().clone(for_test=True)
-
-        sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
-        sgd_optimizer.minimize(avg_cost)
-
        (precision, recall, f1_score, num_infer_chunks, num_label_chunks,
         num_correct_chunks) = fluid.layers.chunk_eval(
             input=crf_decode,
@@ -282,6 +279,11 @@ def main(args):
             chunk_scheme="IOB",
             num_chunk_types=int(math.ceil((args.label_dict_len - 1) / 2.0)))

+        inference_program = fluid.default_main_program().clone(for_test=True)
+
+        sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
+        sgd_optimizer.minimize(avg_cost)
+
        chunk_evaluator = fluid.metrics.ChunkEvaluator()

        train_reader = paddle.batch(
@@ -312,7 +314,7 @@ def main(args):
            test_exe = exe

        batch_id = 0
-        for pass_id in xrange(args.num_passes):
+        for pass_id in range(args.num_passes):
            chunk_evaluator.reset()
            train_reader_iter = train_reader()
            start_time = time.time()
@@ -326,9 +328,9 @@ def main(args):
                        ],
                        feed=feeder.feed(cur_batch))
                    chunk_evaluator.update(
-                        np.array(nums_infer).sum(),
-                        np.array(nums_label).sum(),
-                        np.array(nums_correct).sum())
+                        np.array(nums_infer).sum().astype("int64"),
+                        np.array(nums_label).sum().astype("int64"),
+                        np.array(nums_correct).sum().astype("int64"))
                    cost_list = np.array(cost)
                    batch_id += 1
                except StopIteration:

--- a/fluid/PaddleNLP/deep_attention_matching_net/_ce.py
+++ b/fluid/PaddleNLP/deep_attention_matching_net/_ce.py
@@ -7,8 +7,8 @@ from kpi import CostKpi, DurationKpi, AccKpi

 #### NOTE kpi.py should shared in models in some way!!!!

-train_cost_kpi = CostKpi('train_cost', 0.02, actived=True)
-train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True)
+train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True)
+train_duration_kpi = DurationKpi('train_duration', 0.05, 0, actived=True)

 tracking_kpis = [
    train_cost_kpi,

--- a/fluid/PaddleNLP/machine_reading_comprehension/_ce.py
+++ b/fluid/PaddleNLP/machine_reading_comprehension/_ce.py
@@ -3,6 +3,7 @@
 import os
 import sys
 #sys.path.insert(0, os.environ['ceroot'])
+sys.path.append(os.environ['ceroot'])
 from kpi import CostKpi, DurationKpi, AccKpi

 #### NOTE kpi.py should shared in models in some way!!!!

--- a/fluid/PaddleNLP/machine_reading_comprehension/dataset.py
+++ b/fluid/PaddleNLP/machine_reading_comprehension/dataset.py
@@ -23,6 +23,7 @@ import json
 import logging
 import numpy as np
 from collections import Counter
+import io


 class BRCDataset(object):
@@ -67,7 +68,7 @@ class BRCDataset(object):
        Args:
            data_path: the data file to load
        """
-        with open(data_path) as fin:
+        with io.open(data_path, 'r', encoding='utf-8') as fin:
            data_set = []
            for lidx, line in enumerate(fin):
                sample = json.loads(line.strip())

--- a/fluid/PaddleNLP/machine_reading_comprehension/run.py
+++ b/fluid/PaddleNLP/machine_reading_comprehension/run.py
@@ -22,6 +22,7 @@ import os
 import random
 import json
 import six
+import multiprocessing

 import paddle
 import paddle.fluid as fluid

--- a/fluid/PaddleNLP/neural_machine_translation/transformer/train.py
+++ b/fluid/PaddleNLP/neural_machine_translation/transformer/train.py
@@ -469,7 +469,7 @@ def train_loop(exe,
    # For faster executor
    exec_strategy = fluid.ExecutionStrategy()
    exec_strategy.use_experimental_executor = True
-    # exec_strategy.num_iteration_per_drop_scope = 5
+    exec_strategy.num_iteration_per_drop_scope = int(args.fetch_steps)
    build_strategy = fluid.BuildStrategy()
    # Since the token number differs among devices, customize gradient scale to
    # use token average cost among multi-devices. and the gradient scale is

--- a/fluid/PaddleNLP/text_classification/train.py
+++ b/fluid/PaddleNLP/text_classification/train.py
@@ -89,7 +89,7 @@ def train(train_reader,

 def train_net():
    word_dict, train_reader, test_reader = utils.prepare_data(
-        "imdb", self_dict=False, batch_size=4, buf_size=50000)
+        "imdb", self_dict=False, batch_size=128, buf_size=50000)

    if sys.argv[1] == "bow":
        train(

--- a/fluid/PaddleNLP/text_matching_on_quora/.run_ce.sh
+++ b/fluid/PaddleNLP/text_matching_on_quora/.run_ce.sh
@@ -6,9 +6,9 @@ export OMP_NUM_THREADS=1
 cudaid=${text_matching_on_quora:=0} # use 0-th card as default
 export CUDA_VISIBLE_DEVICES=$cudaid

-FLAGS_benchmark=true  python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce | python _ce.py
+FLAGS_benchmark=true  python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce --epoch_num=5 | python _ce.py

 cudaid=${text_matching_on_quora_m:=0,1,2,3} # use 0,1,2,3 card as default
 export CUDA_VISIBLE_DEVICES=$cudaid

-FLAGS_benchmark=true  python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce | python _ce.py
+FLAGS_benchmark=true  python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce --epoch_num=5 | python _ce.py
--- a/fluid/PaddleNLP/text_matching_on_quora/_ce.py
+++ b/fluid/PaddleNLP/text_matching_on_quora/_ce.py
@@ -7,11 +7,11 @@ from kpi import CostKpi
 from kpi import DurationKpi


-each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.05, 0, actived=True)
-train_avg_cost_card1_kpi = CostKpi('train_avg_cost_card1', 0.2, 0)
+each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.08, 0, actived=True)
+train_avg_cost_card1_kpi = CostKpi('train_avg_cost_card1', 0.08, 0)
 train_avg_acc_card1_kpi = CostKpi('train_avg_acc_card1', 0.02, 0)
-each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.05, 0, actived=True)
-train_avg_cost_card4_kpi = CostKpi('train_avg_cost_card4', 0.2, 0)
+each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.08, 0, actived=True)
+train_avg_cost_card4_kpi = CostKpi('train_avg_cost_card4', 0.08, 0)
 train_avg_acc_card4_kpi = CostKpi('train_avg_acc_card4', 0.02, 0)

 tracking_kpis = [

--- a/fluid/PaddleNLP/text_matching_on_quora/train_and_evaluate.py
+++ b/fluid/PaddleNLP/text_matching_on_quora/train_and_evaluate.py
@@ -34,6 +34,7 @@ parser = argparse.ArgumentParser(description=__doc__)
 parser.add_argument('--model_name',       type=str,   default='cdssmNet',                  help="Which model to train")
 parser.add_argument('--config',           type=str,   default='cdssm_base',       help="The global config setting")
 parser.add_argument('--enable_ce', action='store_true', help='If set, run the task with continuous evaluation logs.')
+parser.add_argument('--epoch_num', type=int, help='Number of epoch')

 DATA_DIR = os.path.join(os.path.expanduser('~'), '.cache/paddle/dataset')

@@ -241,6 +242,9 @@ def main():
    args = parser.parse_args()
    global_config = configs.__dict__[args.config]()

+    if args.epoch_num != None:
+        global_config.epoch_num = args.epoch_num
+
    print("net_name: ", args.model_name)
    net = models.__dict__[args.model_name](global_config)


--- a/fluid/PaddleRec/multiview_simnet/nets.py
+++ b/fluid/PaddleRec/multiview_simnet/nets.py
@@ -33,7 +33,7 @@ class CNNEncoder(object):
    """ cnn-encoder"""

    def __init__(self,
-                 param_name="cnn.w",
+                 param_name="cnn",
                 win_size=3,
                 ksize=128,
                 act='tanh',
@@ -51,13 +51,15 @@ class CNNEncoder(object):
            filter_size=self.win_size,
            act=self.act,
            pool_type=self.pool_type,
-            param_attr=str(self.param_name))
+            param_attr=self.param_name + ".param",
+            bias_attr=self.param_name + ".bias")
+        


 class GrnnEncoder(object):
    """ grnn-encoder """

-    def __init__(self, param_name="grnn.w", hidden_size=128):
+    def __init__(self, param_name="grnn", hidden_size=128):
        self.param_name = param_name
        self.hidden_size = hidden_size

@@ -65,13 +67,15 @@ class GrnnEncoder(object):
        fc0 = nn.fc(
            input=emb, 
            size=self.hidden_size * 3, 
-            param_attr=str(str(self.param_name) + "_fc")
-        )
+            param_attr=self.param_name + "_fc.w",
+            bias_attr=False)
+        
        gru_h = nn.dynamic_gru(
            input=fc0,
            size=self.hidden_size,
            is_reverse=False,
-            param_attr=str(self.param_name))
+            param_attr=self.param_name + ".param",
+            bias_attr=self.param_name + ".bias")
        return nn.sequence_pool(input=gru_h, pool_type='max')


@@ -139,17 +143,17 @@ class MultiviewSimnet(object):
        # lookup embedding for each slot
        q_embs = [
            nn.embedding(
-                input=query, size=self.emb_shape, param_attr="emb.w")
+                input=query, size=self.emb_shape, param_attr="emb")
            for query in q_slots
        ]
        pt_embs = [
            nn.embedding(
-                input=title, size=self.emb_shape, param_attr="emb.w")
+                input=title, size=self.emb_shape, param_attr="emb")
            for title in pt_slots
        ]
        nt_embs = [
            nn.embedding(
-                input=title, size=self.emb_shape, param_attr="emb.w")
+                input=title, size=self.emb_shape, param_attr="emb")
            for title in nt_slots
        ]

@@ -170,9 +174,9 @@ class MultiviewSimnet(object):
        nt_concat = nn.concat(nt_encodes)

        # projection of hidden layer
-        q_hid = nn.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w')
-        pt_hid = nn.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w')
-        nt_hid = nn.fc(nt_concat, size=self.hidden_size, param_attr='t_fc.w')
+        q_hid = nn.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w', bias_attr='q_fc.b')
+        pt_hid = nn.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b')
+        nt_hid = nn.fc(nt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b')

        # cosine of hidden layers
        cos_pos = nn.cos_sim(q_hid, pt_hid)
@@ -213,12 +217,12 @@ class MultiviewSimnet(object):
        # lookup embedding for each slot
        q_embs = [
            nn.embedding(
-                input=query, size=self.emb_shape, param_attr="emb.w")
+                input=query, size=self.emb_shape, param_attr="emb")
            for query in q_slots
        ]
        pt_embs = [
            nn.embedding(
-                input=title, size=self.emb_shape, param_attr="emb.w")
+                input=title, size=self.emb_shape, param_attr="emb")
            for title in pt_slots
        ]
        # encode each embedding field with encoder
@@ -232,8 +236,8 @@ class MultiviewSimnet(object):
        q_concat = nn.concat(q_encodes)
        pt_concat = nn.concat(pt_encodes)
        # projection of hidden layer
-        q_hid = nn.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w')
-        pt_hid = nn.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w')
+        q_hid = nn.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w', bias_attr='q_fc.b')
+        pt_hid = nn.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b')
        # cosine of hidden layers
        cos = nn.cos_sim(q_hid, pt_hid)
        return cos