Upgrade label semantic roles api

9a42ec08 · Yibing Liu · 1508acd3 · 9a42ec08 · 9a42ec08 · 9a42ec08
9 changed file
--- a/04.word2vec/README.cn.md
+++ b/04.word2vec/README.cn.md
@@ -262,32 +262,32 @@ dict_size = len(word_dict)
 ```
 更大的`BATCH_SIZE`将使得训练更快收敛，但也会消耗更多内存。由于词向量计算规模较大，如果环境允许，请开启使用GPU进行训练，能更快得到结果。
-不同于之前的PaddlePaddle v2版本，在新的Fluid版本里，我们不必再手动计算词向量。PaddlePaddle提供了一个内置的方法`fluid.layers.embedding`，我们就可以直接用它来构造 N-gram 神经网络。
+不同于之前的PaddlePaddle v2版本，在新的Fluid版本里，我们不必再手动计算词向量。PaddlePaddle提供了一个内置的方法`fluid.embedding`，我们就可以直接用它来构造 N-gram 神经网络。
 - 我们来定义我们的 N-gram 神经网络结构。这个结构在训练和预测中都会使用到。因为词向量比较稀疏，我们传入参数 `is_sparse == True`, 可以加速稀疏矩阵的更新。
 ```python
 def inference_program(words, is_sparse):
-    embed_first = fluid.layers.embedding(
+    embed_first = fluid.embedding(
        input=words[0],
        size=[dict_size, EMBED_SIZE],
        dtype='float32',
        is_sparse=is_sparse,
        param_attr='shared_w')
-    embed_second = fluid.layers.embedding(
+    embed_second = fluid.embedding(
        input=words[1],
        size=[dict_size, EMBED_SIZE],
        dtype='float32',
        is_sparse=is_sparse,
        param_attr='shared_w')
-    embed_third = fluid.layers.embedding(
+    embed_third = fluid.embedding(
        input=words[2],
        size=[dict_size, EMBED_SIZE],
        dtype='float32',
        is_sparse=is_sparse,
        param_attr='shared_w')
-    embed_fourth = fluid.layers.embedding(
+    embed_fourth = fluid.embedding(
        input=words[3],
        size=[dict_size, EMBED_SIZE],
        dtype='float32',
@@ -310,7 +310,7 @@ def train_program(predict_word):
    # 'next_word'的定义必须要在inference_program的声明之后，
    # 否则train program输入数据的顺序就变成了[next_word, firstw, secondw,
    # thirdw, fourthw], 这是不正确的.
-    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
+    next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
    cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
    avg_cost = fluid.layers.mean(cost)
    return avg_cost
@@ -335,11 +335,11 @@ def train(if_use_cuda, params_dirname, is_sparse=True):
    test_reader = paddle.batch(
        paddle.dataset.imikolov.test(word_dict, N), BATCH_SIZE)
-    first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
+    first_word = fluid.data(name='firstw', shape=[None, 1], dtype='int64')
-    second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
+    second_word = fluid.data(name='secondw', shape=[None, 1], dtype='int64')
-    third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
+    third_word = fluid.data(name='thirdw', shape=[None, 1], dtype='int64')
-    forth_word = fluid.layers.data(name='fourthw', shape=[1], dtype='int64')
+    forth_word = fluid.data(name='fourthw', shape=[None, 1], dtype='int64')
-    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
+    next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
    word_list = [first_word, second_word, third_word, forth_word, next_word]
    feed_order = ['firstw', 'secondw', 'thirdw', 'fourthw', 'nextw']

--- a/04.word2vec/README.md
+++ b/04.word2vec/README.md
@@ -227,7 +227,7 @@ dict_size = len(word_dict)
 ```
 A larger `BATCH_SIZE` will make the training converge faster, but it will also consume more memory. Since the word vector calculation is large, if the environment allows, please turn on the GPU for training, and get results faster.
-Unlike the previous PaddlePaddle v2 version, in the new Fluid version, we don't have to manually calculate the word vector. PaddlePaddle provides a built-in method `fluid.layers.embedding`, which we can use directly to construct an N-gram neural network.
+Unlike the previous PaddlePaddle v2 version, in the new Fluid version, we don't have to manually calculate the word vector. PaddlePaddle provides a built-in method `fluid.embedding`, which we can use directly to construct an N-gram neural network.
 - Let's define our N-gram neural network structure. This structure is used in both training and predicting. Because the word vector is sparse, we pass the parameter `is_sparse == True` to speed up the update of the sparse matrix.
@@ -275,7 +275,7 @@ def train_program(predict_word):
    # The definition of'next_word' must be after the declaration of inference_program.
    # Otherwise the sequence of the train program input data becomes [next_word, firstw, secondw,
    #thirdw, fourthw], This is not true.
-    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
+    next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
    cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
    avg_cost = fluid.layers.mean(cost)
    return avg_cost

--- a/04.word2vec/index.cn.html
+++ b/04.word2vec/index.cn.html
@@ -304,32 +304,32 @@ dict_size = len(word_dict)
 ```
 更大的`BATCH_SIZE`将使得训练更快收敛，但也会消耗更多内存。由于词向量计算规模较大，如果环境允许，请开启使用GPU进行训练，能更快得到结果。
-不同于之前的PaddlePaddle v2版本，在新的Fluid版本里，我们不必再手动计算词向量。PaddlePaddle提供了一个内置的方法`fluid.layers.embedding`，我们就可以直接用它来构造 N-gram 神经网络。
+不同于之前的PaddlePaddle v2版本，在新的Fluid版本里，我们不必再手动计算词向量。PaddlePaddle提供了一个内置的方法`fluid.embedding`，我们就可以直接用它来构造 N-gram 神经网络。
 - 我们来定义我们的 N-gram 神经网络结构。这个结构在训练和预测中都会使用到。因为词向量比较稀疏，我们传入参数 `is_sparse == True`, 可以加速稀疏矩阵的更新。
 ```python
 def inference_program(words, is_sparse):
-    embed_first = fluid.layers.embedding(
+    embed_first = fluid.embedding(
        input=words[0],
        size=[dict_size, EMBED_SIZE],
        dtype='float32',
        is_sparse=is_sparse,
        param_attr='shared_w')
-    embed_second = fluid.layers.embedding(
+    embed_second = fluid.embedding(
        input=words[1],
        size=[dict_size, EMBED_SIZE],
        dtype='float32',
        is_sparse=is_sparse,
        param_attr='shared_w')
-    embed_third = fluid.layers.embedding(
+    embed_third = fluid.embedding(
        input=words[2],
        size=[dict_size, EMBED_SIZE],
        dtype='float32',
        is_sparse=is_sparse,
        param_attr='shared_w')
-    embed_fourth = fluid.layers.embedding(
+    embed_fourth = fluid.embedding(
        input=words[3],
        size=[dict_size, EMBED_SIZE],
        dtype='float32',
@@ -352,7 +352,7 @@ def train_program(predict_word):
    # 'next_word'的定义必须要在inference_program的声明之后，
    # 否则train program输入数据的顺序就变成了[next_word, firstw, secondw,
    # thirdw, fourthw], 这是不正确的.
-    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
+    next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
    cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
    avg_cost = fluid.layers.mean(cost)
    return avg_cost
@@ -377,11 +377,11 @@ def train(if_use_cuda, params_dirname, is_sparse=True):
    test_reader = paddle.batch(
        paddle.dataset.imikolov.test(word_dict, N), BATCH_SIZE)
-    first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
+    first_word = fluid.data(name='firstw', shape=[None, 1], dtype='int64')
-    second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
+    second_word = fluid.data(name='secondw', shape=[None, 1], dtype='int64')
-    third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
+    third_word = fluid.data(name='thirdw', shape=[None, 1], dtype='int64')
-    forth_word = fluid.layers.data(name='fourthw', shape=[1], dtype='int64')
+    forth_word = fluid.data(name='fourthw', shape=[None, 1], dtype='int64')
-    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
+    next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
    word_list = [first_word, second_word, third_word, forth_word, next_word]
    feed_order = ['firstw', 'secondw', 'thirdw', 'fourthw', 'nextw']

--- a/04.word2vec/index.html
+++ b/04.word2vec/index.html
@@ -269,7 +269,7 @@ dict_size = len(word_dict)
 ```
 A larger `BATCH_SIZE` will make the training converge faster, but it will also consume more memory. Since the word vector calculation is large, if the environment allows, please turn on the GPU for training, and get results faster.
-Unlike the previous PaddlePaddle v2 version, in the new Fluid version, we don't have to manually calculate the word vector. PaddlePaddle provides a built-in method `fluid.layers.embedding`, which we can use directly to construct an N-gram neural network.
+Unlike the previous PaddlePaddle v2 version, in the new Fluid version, we don't have to manually calculate the word vector. PaddlePaddle provides a built-in method `fluid.embedding`, which we can use directly to construct an N-gram neural network.
 - Let's define our N-gram neural network structure. This structure is used in both training and predicting. Because the word vector is sparse, we pass the parameter `is_sparse == True` to speed up the update of the sparse matrix.
@@ -317,7 +317,7 @@ def train_program(predict_word):
    # The definition of'next_word' must be after the declaration of inference_program.
    # Otherwise the sequence of the train program input data becomes [next_word, firstw, secondw,
    #thirdw, fourthw], This is not true.
-    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
+    next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
    cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
    avg_cost = fluid.layers.mean(cost)
    return avg_cost

--- a/07.label_semantic_roles/README.cn.md
+++ b/07.label_semantic_roles/README.cn.md
@@ -270,42 +270,42 @@ is_local = True
 ```python
 # 句子序列
-word = fluid.layers.data(
+word = fluid.data(
-    name='word_data', shape=[1], dtype='int64', lod_level=1)
+    name='word_data', shape=[None, 1], dtype='int64', lod_level=1)
 # 谓词
-predicate = fluid.layers.data(
+predicate = fluid.data(
-    name='verb_data', shape=[1], dtype='int64', lod_level=1)
+    name='verb_data', shape=[None, 1], dtype='int64', lod_level=1)
 # 谓词上下文5个特征
-ctx_n2 = fluid.layers.data(
+ctx_n2 = fluid.data(
-    name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_n2_data', shape=[None, 1], dtype='int64', lod_level=1)
-ctx_n1 = fluid.layers.data(
+ctx_n1 = fluid.data(
-    name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_n1_data', shape=[None, 1], dtype='int64', lod_level=1)
-ctx_0 = fluid.layers.data(
+ctx_0 = fluid.data(
-    name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_0_data', shape=[None, 1], dtype='int64', lod_level=1)
-ctx_p1 = fluid.layers.data(
+ctx_p1 = fluid.data(
-    name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_p1_data', shape=[None, 1], dtype='int64', lod_level=1)
-ctx_p2 = fluid.layers.data(
+ctx_p2 = fluid.data(
-    name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_p2_data', shape=[None, 1], dtype='int64', lod_level=1)
 # 谓词上下区域标志
-mark = fluid.layers.data(
+mark = fluid.data(
-    name='mark_data', shape=[1], dtype='int64', lod_level=1)
+    name='mark_data', shape=[None, 1], dtype='int64', lod_level=1)
 ```
 ### 定义网络结构
 首先预训练并定义模型输入层
 ```python
 #预训练谓词和谓词上下区域标志
-predicate_embedding = fluid.layers.embedding(
+predicate_embedding = fluid.embedding(
    input=predicate,
    size=[pred_dict_len, word_dim],
    dtype='float32',
    is_sparse=IS_SPARSE,
    param_attr='vemb')
-mark_embedding = fluid.layers.embedding(
+mark_embedding = fluid.embedding(
    input=mark,
    size=[mark_dict_len, mark_dim],
    dtype='float32',
@@ -316,7 +316,7 @@ word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
 # 因词向量是预训练好的，这里不再训练embedding表，
 # 参数属性trainable设置成False阻止了embedding表在训练过程中被更新
 emb_layers = [
-    fluid.layers.embedding(
+    fluid.embedding(
        size=[word_dict_len, word_dim],
        input=x,
        param_attr=fluid.ParamAttr(
@@ -374,8 +374,8 @@ feature_out = fluid.layers.sums(input=[
 ])
 # 标注序列
-target = fluid.layers.data(
+target = fluid.data(
-    name='target', shape=[1], dtype='int64', lod_level=1)
+    name='target', shape=[None, 1], dtype='int64', lod_level=1)
 # 学习 CRF 的转移特征
 crf_cost = fluid.layers.linear_chain_crf(

--- a/07.label_semantic_roles/README.md
+++ b/07.label_semantic_roles/README.md
@@ -252,42 +252,42 @@ Defines the format of the model input features, including the sentence sequence,
 ```python
 # Sentence sequences
-word = fluid.layers.data(
+word = fluid.data(
-    name='word_data', shape=[1], dtype='int64', lod_level=1)
+    name='word_data', shape=[None, 1], dtype='int64', lod_level=1)
 # predicate
-predicate = fluid.layers.data(
+predicate = fluid.data(
-    name='verb_data', shape=[1], dtype='int64', lod_level=1)
+    name='verb_data', shape=[None, 1], dtype='int64', lod_level=1)
 # predicate context's 5 features
-ctx_n2 = fluid.layers.data(
+ctx_n2 = fluid.data(
-    name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_n2_data', shape=[None, 1], dtype='int64', lod_level=1)
-ctx_n1 = fluid.layers.data(
+ctx_n1 = fluid.data(
-    name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_n1_data', shape=[None, 1], dtype='int64', lod_level=1)
-ctx_0 = fluid.layers.data(
+ctx_0 = fluid.data(
-    name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_0_data', shape=[None, 1], dtype='int64', lod_level=1)
-ctx_p1 = fluid.layers.data(
+ctx_p1 = fluid.data(
-    name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_p1_data', shape=[None, 1], dtype='int64', lod_level=1)
-ctx_p2 = fluid.layers.data(
+ctx_p2 = fluid.data(
-    name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_p2_data', shape=[None, 1], dtype='int64', lod_level=1)
 # Predicate conotext area flag
-mark = fluid.layers.data(
+mark = fluid.data(
-    name='mark_data', shape=[1], dtype='int64', lod_level=1)
+    name='mark_data', shape=[None, 1], dtype='int64', lod_level=1)
 ```
 ### Defining the network structure
 First pre-train and define the model input layer
 ```python
 #pre-training predicate and predicate context area flags
-predicate_embedding = fluid.layers.embedding(
+predicate_embedding = fluid.embedding(
    input=predicate,
    size=[pred_dict_len, word_dim],
    dtype='float32',
    is_sparse=IS_SPARSE,
    param_attr='vemb')
-mark_embedding = fluid.layers.embedding(
+mark_embedding = fluid.embedding(
    input=mark,
    size=[mark_dict_len, mark_dim],
    dtype='float32',
@@ -298,7 +298,7 @@ word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
 #Because word vector is pre-trained, no longer training embedding table,
 # The trainable's parameter attribute set to False prevents the embedding table from being updated during training
 emb_layers = [
-    fluid.layers.embedding(
+    fluid.embedding(
        size=[word_dict_len, word_dim],
        input=x,
        param_attr=fluid.ParamAttr(
@@ -356,7 +356,7 @@ feature_out = fluid.layers.sums(input=[
 ])
 # tag/label sequence
-target = fluid.layers.data(
+target = fluid.data(
    name='target', shape=[1], dtype='int64', lod_level=1)
 # Learning CRF transfer features

--- a/07.label_semantic_roles/index.cn.html
+++ b/07.label_semantic_roles/index.cn.html
@@ -312,42 +312,42 @@ is_local = True
 ```python
 # 句子序列
-word = fluid.layers.data(
+word = fluid.data(
-    name='word_data', shape=[1], dtype='int64', lod_level=1)
+    name='word_data', shape=[None, 1], dtype='int64', lod_level=1)
 # 谓词
-predicate = fluid.layers.data(
+predicate = fluid.data(
-    name='verb_data', shape=[1], dtype='int64', lod_level=1)
+    name='verb_data', shape=[None, 1], dtype='int64', lod_level=1)
 # 谓词上下文5个特征
-ctx_n2 = fluid.layers.data(
+ctx_n2 = fluid.data(
-    name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_n2_data', shape=[None, 1], dtype='int64', lod_level=1)
-ctx_n1 = fluid.layers.data(
+ctx_n1 = fluid.data(
-    name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_n1_data', shape=[None, 1], dtype='int64', lod_level=1)
-ctx_0 = fluid.layers.data(
+ctx_0 = fluid.data(
-    name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_0_data', shape=[None, 1], dtype='int64', lod_level=1)
-ctx_p1 = fluid.layers.data(
+ctx_p1 = fluid.data(
-    name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_p1_data', shape=[None, 1], dtype='int64', lod_level=1)
-ctx_p2 = fluid.layers.data(
+ctx_p2 = fluid.data(
-    name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_p2_data', shape=[None, 1], dtype='int64', lod_level=1)
 # 谓词上下区域标志
-mark = fluid.layers.data(
+mark = fluid.data(
-    name='mark_data', shape=[1], dtype='int64', lod_level=1)
+    name='mark_data', shape=[None, 1], dtype='int64', lod_level=1)
 ```
 ### 定义网络结构
 首先预训练并定义模型输入层
 ```python
 #预训练谓词和谓词上下区域标志
-predicate_embedding = fluid.layers.embedding(
+predicate_embedding = fluid.embedding(
    input=predicate,
    size=[pred_dict_len, word_dim],
    dtype='float32',
    is_sparse=IS_SPARSE,
    param_attr='vemb')
-mark_embedding = fluid.layers.embedding(
+mark_embedding = fluid.embedding(
    input=mark,
    size=[mark_dict_len, mark_dim],
    dtype='float32',
@@ -358,7 +358,7 @@ word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
 # 因词向量是预训练好的，这里不再训练embedding表，
 # 参数属性trainable设置成False阻止了embedding表在训练过程中被更新
 emb_layers = [
-    fluid.layers.embedding(
+    fluid.embedding(
        size=[word_dict_len, word_dim],
        input=x,
        param_attr=fluid.ParamAttr(
@@ -416,8 +416,8 @@ feature_out = fluid.layers.sums(input=[
 ])
 # 标注序列
-target = fluid.layers.data(
+target = fluid.data(
-    name='target', shape=[1], dtype='int64', lod_level=1)
+    name='target', shape=[None, 1], dtype='int64', lod_level=1)
 # 学习 CRF 的转移特征
 crf_cost = fluid.layers.linear_chain_crf(

--- a/07.label_semantic_roles/index.html
+++ b/07.label_semantic_roles/index.html
@@ -294,42 +294,42 @@ Defines the format of the model input features, including the sentence sequence,
 ```python
 # Sentence sequences
-word = fluid.layers.data(
+word = fluid.data(
-    name='word_data', shape=[1], dtype='int64', lod_level=1)
+    name='word_data', shape=[None, 1], dtype='int64', lod_level=1)
 # predicate
-predicate = fluid.layers.data(
+predicate = fluid.data(
-    name='verb_data', shape=[1], dtype='int64', lod_level=1)
+    name='verb_data', shape=[None, 1], dtype='int64', lod_level=1)
 # predicate context's 5 features
-ctx_n2 = fluid.layers.data(
+ctx_n2 = fluid.data(
-    name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_n2_data', shape=[None, 1], dtype='int64', lod_level=1)
-ctx_n1 = fluid.layers.data(
+ctx_n1 = fluid.data(
-    name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_n1_data', shape=[None, 1], dtype='int64', lod_level=1)
-ctx_0 = fluid.layers.data(
+ctx_0 = fluid.data(
-    name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_0_data', shape=[None, 1], dtype='int64', lod_level=1)
-ctx_p1 = fluid.layers.data(
+ctx_p1 = fluid.data(
-    name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_p1_data', shape=[None, 1], dtype='int64', lod_level=1)
-ctx_p2 = fluid.layers.data(
+ctx_p2 = fluid.data(
-    name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
+    name='ctx_p2_data', shape=[None, 1], dtype='int64', lod_level=1)
 # Predicate conotext area flag
-mark = fluid.layers.data(
+mark = fluid.data(
-    name='mark_data', shape=[1], dtype='int64', lod_level=1)
+    name='mark_data', shape=[None, 1], dtype='int64', lod_level=1)
 ```
 ### Defining the network structure
 First pre-train and define the model input layer
 ```python
 #pre-training predicate and predicate context area flags
-predicate_embedding = fluid.layers.embedding(
+predicate_embedding = fluid.embedding(
    input=predicate,
    size=[pred_dict_len, word_dim],
    dtype='float32',
    is_sparse=IS_SPARSE,
    param_attr='vemb')
-mark_embedding = fluid.layers.embedding(
+mark_embedding = fluid.embedding(
    input=mark,
    size=[mark_dict_len, mark_dim],
    dtype='float32',
@@ -340,7 +340,7 @@ word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
 #Because word vector is pre-trained, no longer training embedding table,
 # The trainable's parameter attribute set to False prevents the embedding table from being updated during training
 emb_layers = [
-    fluid.layers.embedding(
+    fluid.embedding(
        size=[word_dict_len, word_dim],
        input=x,
        param_attr=fluid.ParamAttr(
@@ -398,7 +398,7 @@ feature_out = fluid.layers.sums(input=[
 ])
 # tag/label sequence
-target = fluid.layers.data(
+target = fluid.data(
    name='target', shape=[1], dtype='int64', lod_level=1)
 # Learning CRF transfer features

--- a/07.label_semantic_roles/train.py
+++ b/07.label_semantic_roles/train.py
@@ -53,14 +53,14 @@ def load_parameter(file_name, h, w):
 def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
            **ignored):
    # 8 features
-    predicate_embedding = fluid.layers.embedding(
+    predicate_embedding = fluid.embedding(
        input=predicate,
        size=[pred_dict_len, word_dim],
        dtype='float32',
        is_sparse=IS_SPARSE,
        param_attr='vemb')
-    mark_embedding = fluid.layers.embedding(
+    mark_embedding = fluid.embedding(
        input=mark,
        size=[mark_dict_len, mark_dim],
        dtype='float32',
@@ -68,7 +68,7 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
    word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
    emb_layers = [
-        fluid.layers.embedding(
+        fluid.embedding(
            size=[word_dict_len, word_dim],
            input=x,
            param_attr=fluid.ParamAttr(name=embedding_name, trainable=False))
@@ -120,22 +120,22 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
 def train(use_cuda, save_dirname=None, is_local=True):
    # define data layers
-    word = fluid.layers.data(
+    word = fluid.data(
-        name='word_data', shape=[1], dtype='int64', lod_level=1)
+        name='word_data', shape=[None, 1], dtype='int64', lod_level=1)
-    predicate = fluid.layers.data(
+    predicate = fluid.data(
-        name='verb_data', shape=[1], dtype='int64', lod_level=1)
+        name='verb_data', shape=[None, 1], dtype='int64', lod_level=1)
-    ctx_n2 = fluid.layers.data(
+    ctx_n2 = fluid.data(
-        name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
+        name='ctx_n2_data', shape=[None, 1], dtype='int64', lod_level=1)
-    ctx_n1 = fluid.layers.data(
+    ctx_n1 = fluid.data(
-        name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
+        name='ctx_n1_data', shape=[None, 1], dtype='int64', lod_level=1)
-    ctx_0 = fluid.layers.data(
+    ctx_0 = fluid.data(
-        name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
+        name='ctx_0_data', shape=[None, 1], dtype='int64', lod_level=1)
-    ctx_p1 = fluid.layers.data(
+    ctx_p1 = fluid.data(
-        name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
+        name='ctx_p1_data', shape=[None, 1], dtype='int64', lod_level=1)
-    ctx_p2 = fluid.layers.data(
+    ctx_p2 = fluid.data(
-        name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
+        name='ctx_p2_data', shape=[None, 1], dtype='int64', lod_level=1)
-    mark = fluid.layers.data(
+    mark = fluid.data(
-        name='mark_data', shape=[1], dtype='int64', lod_level=1)
+        name='mark_data', shape=[None, 1], dtype='int64', lod_level=1)
    if args.enable_ce:
        fluid.default_startup_program().random_seed = 90