From 9a42ec08429928b8b4182013af7136d036829040 Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Wed, 16 Oct 2019 08:26:58 +0000
Subject: [PATCH] Upgrade label semantic roles api

---
 04.word2vec/README.cn.md              | 22 +++++++-------
 04.word2vec/README.md                 |  4 +--
 04.word2vec/index.cn.html             | 22 +++++++-------
 04.word2vec/index.html                |  4 +--
 07.label_semantic_roles/README.cn.md  | 42 +++++++++++++--------------
 07.label_semantic_roles/README.md     | 40 ++++++++++++-------------
 07.label_semantic_roles/index.cn.html | 42 +++++++++++++--------------
 07.label_semantic_roles/index.html    | 40 ++++++++++++-------------
 07.label_semantic_roles/train.py      | 38 ++++++++++++------------
 9 files changed, 127 insertions(+), 127 deletions(-)

diff --git a/04.word2vec/README.cn.md b/04.word2vec/README.cn.md
index c7d9654..912e211 100644
--- a/04.word2vec/README.cn.md
+++ b/04.word2vec/README.cn.md
@@ -262,32 +262,32 @@ dict_size = len(word_dict)
 ```
 
 更大的`BATCH_SIZE`将使得训练更快收敛，但也会消耗更多内存。由于词向量计算规模较大，如果环境允许，请开启使用GPU进行训练，能更快得到结果。
-不同于之前的PaddlePaddle v2版本，在新的Fluid版本里，我们不必再手动计算词向量。PaddlePaddle提供了一个内置的方法`fluid.layers.embedding`，我们就可以直接用它来构造 N-gram 神经网络。
+不同于之前的PaddlePaddle v2版本，在新的Fluid版本里，我们不必再手动计算词向量。PaddlePaddle提供了一个内置的方法`fluid.embedding`，我们就可以直接用它来构造 N-gram 神经网络。
 
 - 我们来定义我们的 N-gram 神经网络结构。这个结构在训练和预测中都会使用到。因为词向量比较稀疏，我们传入参数 `is_sparse == True`, 可以加速稀疏矩阵的更新。
 
 ```python
 def inference_program(words, is_sparse):
 
-    embed_first = fluid.layers.embedding(
+    embed_first = fluid.embedding(
         input=words[0],
         size=[dict_size, EMBED_SIZE],
         dtype='float32',
         is_sparse=is_sparse,
         param_attr='shared_w')
-    embed_second = fluid.layers.embedding(
+    embed_second = fluid.embedding(
         input=words[1],
         size=[dict_size, EMBED_SIZE],
         dtype='float32',
         is_sparse=is_sparse,
         param_attr='shared_w')
-    embed_third = fluid.layers.embedding(
+    embed_third = fluid.embedding(
         input=words[2],
         size=[dict_size, EMBED_SIZE],
         dtype='float32',
         is_sparse=is_sparse,
         param_attr='shared_w')
-    embed_fourth = fluid.layers.embedding(
+    embed_fourth = fluid.embedding(
         input=words[3],
         size=[dict_size, EMBED_SIZE],
         dtype='float32',
@@ -310,7 +310,7 @@ def train_program(predict_word):
     # 'next_word'的定义必须要在inference_program的声明之后，
     # 否则train program输入数据的顺序就变成了[next_word, firstw, secondw,
     # thirdw, fourthw], 这是不正确的.
-    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
+    next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
     cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
     avg_cost = fluid.layers.mean(cost)
     return avg_cost
@@ -335,11 +335,11 @@ def train(if_use_cuda, params_dirname, is_sparse=True):
     test_reader = paddle.batch(
         paddle.dataset.imikolov.test(word_dict, N), BATCH_SIZE)
 
-    first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
-    second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
-    third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
-    forth_word = fluid.layers.data(name='fourthw', shape=[1], dtype='int64')
-    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
+    first_word = fluid.data(name='firstw', shape=[None, 1], dtype='int64')
+    second_word = fluid.data(name='secondw', shape=[None, 1], dtype='int64')
+    third_word = fluid.data(name='thirdw', shape=[None, 1], dtype='int64')
+    forth_word = fluid.data(name='fourthw', shape=[None, 1], dtype='int64')
+    next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
 
     word_list = [first_word, second_word, third_word, forth_word, next_word]
     feed_order = ['firstw', 'secondw', 'thirdw', 'fourthw', 'nextw']
diff --git a/04.word2vec/README.md b/04.word2vec/README.md
index 2813d61..2a54af5 100644
--- a/04.word2vec/README.md
+++ b/04.word2vec/README.md
@@ -227,7 +227,7 @@ dict_size = len(word_dict)
 ```
 
 A larger `BATCH_SIZE` will make the training converge faster, but it will also consume more memory. Since the word vector calculation is large, if the environment allows, please turn on the GPU for training, and get results faster.
-Unlike the previous PaddlePaddle v2 version, in the new Fluid version, we don't have to manually calculate the word vector. PaddlePaddle provides a built-in method `fluid.layers.embedding`, which we can use directly to construct an N-gram neural network.
+Unlike the previous PaddlePaddle v2 version, in the new Fluid version, we don't have to manually calculate the word vector. PaddlePaddle provides a built-in method `fluid.embedding`, which we can use directly to construct an N-gram neural network.
 
 - Let's define our N-gram neural network structure. This structure is used in both training and predicting. Because the word vector is sparse, we pass the parameter `is_sparse == True` to speed up the update of the sparse matrix.
 
@@ -275,7 +275,7 @@ def train_program(predict_word):
     # The definition of'next_word' must be after the declaration of inference_program.
     # Otherwise the sequence of the train program input data becomes [next_word, firstw, secondw,
     #thirdw, fourthw], This is not true.
-    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
+    next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
     cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
     avg_cost = fluid.layers.mean(cost)
     return avg_cost
diff --git a/04.word2vec/index.cn.html b/04.word2vec/index.cn.html
index 10374b9..7f329cc 100644
--- a/04.word2vec/index.cn.html
+++ b/04.word2vec/index.cn.html
@@ -304,32 +304,32 @@ dict_size = len(word_dict)
 ```
 
 更大的`BATCH_SIZE`将使得训练更快收敛，但也会消耗更多内存。由于词向量计算规模较大，如果环境允许，请开启使用GPU进行训练，能更快得到结果。
-不同于之前的PaddlePaddle v2版本，在新的Fluid版本里，我们不必再手动计算词向量。PaddlePaddle提供了一个内置的方法`fluid.layers.embedding`，我们就可以直接用它来构造 N-gram 神经网络。
+不同于之前的PaddlePaddle v2版本，在新的Fluid版本里，我们不必再手动计算词向量。PaddlePaddle提供了一个内置的方法`fluid.embedding`，我们就可以直接用它来构造 N-gram 神经网络。
 
 - 我们来定义我们的 N-gram 神经网络结构。这个结构在训练和预测中都会使用到。因为词向量比较稀疏，我们传入参数 `is_sparse == True`, 可以加速稀疏矩阵的更新。
 
 ```python
 def inference_program(words, is_sparse):
 
-    embed_first = fluid.layers.embedding(
+    embed_first = fluid.embedding(
         input=words[0],
         size=[dict_size, EMBED_SIZE],
         dtype='float32',
         is_sparse=is_sparse,
         param_attr='shared_w')
-    embed_second = fluid.layers.embedding(
+    embed_second = fluid.embedding(
         input=words[1],
         size=[dict_size, EMBED_SIZE],
         dtype='float32',
         is_sparse=is_sparse,
         param_attr='shared_w')
-    embed_third = fluid.layers.embedding(
+    embed_third = fluid.embedding(
         input=words[2],
         size=[dict_size, EMBED_SIZE],
         dtype='float32',
         is_sparse=is_sparse,
         param_attr='shared_w')
-    embed_fourth = fluid.layers.embedding(
+    embed_fourth = fluid.embedding(
         input=words[3],
         size=[dict_size, EMBED_SIZE],
         dtype='float32',
@@ -352,7 +352,7 @@ def train_program(predict_word):
     # 'next_word'的定义必须要在inference_program的声明之后，
     # 否则train program输入数据的顺序就变成了[next_word, firstw, secondw,
     # thirdw, fourthw], 这是不正确的.
-    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
+    next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
     cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
     avg_cost = fluid.layers.mean(cost)
     return avg_cost
@@ -377,11 +377,11 @@ def train(if_use_cuda, params_dirname, is_sparse=True):
     test_reader = paddle.batch(
         paddle.dataset.imikolov.test(word_dict, N), BATCH_SIZE)
 
-    first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
-    second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
-    third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
-    forth_word = fluid.layers.data(name='fourthw', shape=[1], dtype='int64')
-    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
+    first_word = fluid.data(name='firstw', shape=[None, 1], dtype='int64')
+    second_word = fluid.data(name='secondw', shape=[None, 1], dtype='int64')
+    third_word = fluid.data(name='thirdw', shape=[None, 1], dtype='int64')
+    forth_word = fluid.data(name='fourthw', shape=[None, 1], dtype='int64')
+    next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
 
     word_list = [first_word, second_word, third_word, forth_word, next_word]
     feed_order = ['firstw', 'secondw', 'thirdw', 'fourthw', 'nextw']
diff --git a/04.word2vec/index.html b/04.word2vec/index.html
index 9831c3a..a19e0af 100644
--- a/04.word2vec/index.html
+++ b/04.word2vec/index.html
@@ -269,7 +269,7 @@ dict_size = len(word_dict)
 ```
 
 A larger `BATCH_SIZE` will make the training converge faster, but it will also consume more memory. Since the word vector calculation is large, if the environment allows, please turn on the GPU for training, and get results faster.
-Unlike the previous PaddlePaddle v2 version, in the new Fluid version, we don't have to manually calculate the word vector. PaddlePaddle provides a built-in method `fluid.layers.embedding`, which we can use directly to construct an N-gram neural network.
+Unlike the previous PaddlePaddle v2 version, in the new Fluid version, we don't have to manually calculate the word vector. PaddlePaddle provides a built-in method `fluid.embedding`, which we can use directly to construct an N-gram neural network.
 
 - Let's define our N-gram neural network structure. This structure is used in both training and predicting. Because the word vector is sparse, we pass the parameter `is_sparse == True` to speed up the update of the sparse matrix.
 
@@ -317,7 +317,7 @@ def train_program(predict_word):
     # The definition of'next_word' must be after the declaration of inference_program.
     # Otherwise the sequence of the train program input data becomes [next_word, firstw, secondw,
     #thirdw, fourthw], This is not true.
-    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
+    next_word = fluid.data(name='nextw', shape=[None, 1], dtype='int64')
     cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
     avg_cost = fluid.layers.mean(cost)
     return avg_cost
diff --git a/07.label_semantic_roles/README.cn.md b/07.label_semantic_roles/README.cn.md
index 076a703..9fe04d2 100644
--- a/07.label_semantic_roles/README.cn.md
+++ b/07.label_semantic_roles/README.cn.md
@@ -270,42 +270,42 @@ is_local = True
 
 ```python
 # 句子序列
-word = fluid.layers.data(
-    name='word_data', shape=[1], dtype='int64', lod_level=1)
+word = fluid.data(
+    name='word_data', shape=[None, 1], dtype='int64', lod_level=1)
 
 # 谓词
-predicate = fluid.layers.data(
-    name='verb_data', shape=[1], dtype='int64', lod_level=1)
+predicate = fluid.data(
+    name='verb_data', shape=[None, 1], dtype='int64', lod_level=1)
 
 # 谓词上下文5个特征
-ctx_n2 = fluid.layers.data(
-    name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
-ctx_n1 = fluid.layers.data(
-    name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
-ctx_0 = fluid.layers.data(
-    name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
-ctx_p1 = fluid.layers.data(
-    name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
-ctx_p2 = fluid.layers.data(
-    name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
+ctx_n2 = fluid.data(
+    name='ctx_n2_data', shape=[None, 1], dtype='int64', lod_level=1)
+ctx_n1 = fluid.data(
+    name='ctx_n1_data', shape=[None, 1], dtype='int64', lod_level=1)
+ctx_0 = fluid.data(
+    name='ctx_0_data', shape=[None, 1], dtype='int64', lod_level=1)
+ctx_p1 = fluid.data(
+    name='ctx_p1_data', shape=[None, 1], dtype='int64', lod_level=1)
+ctx_p2 = fluid.data(
+    name='ctx_p2_data', shape=[None, 1], dtype='int64', lod_level=1)
 
 # 谓词上下区域标志
-mark = fluid.layers.data(
-    name='mark_data', shape=[1], dtype='int64', lod_level=1)
+mark = fluid.data(
+    name='mark_data', shape=[None, 1], dtype='int64', lod_level=1)
 ```
 ### 定义网络结构
 首先预训练并定义模型输入层
 
 ```python
 #预训练谓词和谓词上下区域标志
-predicate_embedding = fluid.layers.embedding(
+predicate_embedding = fluid.embedding(
     input=predicate,
     size=[pred_dict_len, word_dim],
     dtype='float32',
     is_sparse=IS_SPARSE,
     param_attr='vemb')
 
-mark_embedding = fluid.layers.embedding(
+mark_embedding = fluid.embedding(
     input=mark,
     size=[mark_dict_len, mark_dim],
     dtype='float32',
@@ -316,7 +316,7 @@ word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
 # 因词向量是预训练好的，这里不再训练embedding表，
 # 参数属性trainable设置成False阻止了embedding表在训练过程中被更新
 emb_layers = [
-    fluid.layers.embedding(
+    fluid.embedding(
         size=[word_dict_len, word_dim],
         input=x,
         param_attr=fluid.ParamAttr(
@@ -374,8 +374,8 @@ feature_out = fluid.layers.sums(input=[
 ])
 
 # 标注序列
-target = fluid.layers.data(
-    name='target', shape=[1], dtype='int64', lod_level=1)
+target = fluid.data(
+    name='target', shape=[None, 1], dtype='int64', lod_level=1)
 
 # 学习 CRF 的转移特征
 crf_cost = fluid.layers.linear_chain_crf(
diff --git a/07.label_semantic_roles/README.md b/07.label_semantic_roles/README.md
index d84440f..d0dd676 100644
--- a/07.label_semantic_roles/README.md
+++ b/07.label_semantic_roles/README.md
@@ -252,42 +252,42 @@ Defines the format of the model input features, including the sentence sequence,
 
 ```python
 # Sentence sequences
-word = fluid.layers.data(
-    name='word_data', shape=[1], dtype='int64', lod_level=1)
+word = fluid.data(
+    name='word_data', shape=[None, 1], dtype='int64', lod_level=1)
 
 # predicate
-predicate = fluid.layers.data(
-    name='verb_data', shape=[1], dtype='int64', lod_level=1)
+predicate = fluid.data(
+    name='verb_data', shape=[None, 1], dtype='int64', lod_level=1)
 
 # predicate context's 5 features
-ctx_n2 = fluid.layers.data(
-    name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
-ctx_n1 = fluid.layers.data(
-    name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
-ctx_0 = fluid.layers.data(
-    name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
-ctx_p1 = fluid.layers.data(
-    name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
-ctx_p2 = fluid.layers.data(
-    name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
+ctx_n2 = fluid.data(
+    name='ctx_n2_data', shape=[None, 1], dtype='int64', lod_level=1)
+ctx_n1 = fluid.data(
+    name='ctx_n1_data', shape=[None, 1], dtype='int64', lod_level=1)
+ctx_0 = fluid.data(
+    name='ctx_0_data', shape=[None, 1], dtype='int64', lod_level=1)
+ctx_p1 = fluid.data(
+    name='ctx_p1_data', shape=[None, 1], dtype='int64', lod_level=1)
+ctx_p2 = fluid.data(
+    name='ctx_p2_data', shape=[None, 1], dtype='int64', lod_level=1)
 
 # Predicate conotext area flag
-mark = fluid.layers.data(
-    name='mark_data', shape=[1], dtype='int64', lod_level=1)
+mark = fluid.data(
+    name='mark_data', shape=[None, 1], dtype='int64', lod_level=1)
 ```
 ### Defining the network structure
 First pre-train and define the model input layer
 
 ```python
 #pre-training predicate and predicate context area flags
-predicate_embedding = fluid.layers.embedding(
+predicate_embedding = fluid.embedding(
     input=predicate,
     size=[pred_dict_len, word_dim],
     dtype='float32',
     is_sparse=IS_SPARSE,
     param_attr='vemb')
 
-mark_embedding = fluid.layers.embedding(
+mark_embedding = fluid.embedding(
     input=mark,
     size=[mark_dict_len, mark_dim],
     dtype='float32',
@@ -298,7 +298,7 @@ word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
 #Because word vector is pre-trained, no longer training embedding table,
 # The trainable's parameter attribute set to False prevents the embedding table from being updated during training
 emb_layers = [
-    fluid.layers.embedding(
+    fluid.embedding(
         size=[word_dict_len, word_dim],
         input=x,
         param_attr=fluid.ParamAttr(
@@ -356,7 +356,7 @@ feature_out = fluid.layers.sums(input=[
 ])
 
 # tag/label sequence
-target = fluid.layers.data(
+target = fluid.data(
     name='target', shape=[1], dtype='int64', lod_level=1)
 
 # Learning CRF transfer features
diff --git a/07.label_semantic_roles/index.cn.html b/07.label_semantic_roles/index.cn.html
index 8ee60a9..c9deb3a 100644
--- a/07.label_semantic_roles/index.cn.html
+++ b/07.label_semantic_roles/index.cn.html
@@ -312,42 +312,42 @@ is_local = True
 
 ```python
 # 句子序列
-word = fluid.layers.data(
-    name='word_data', shape=[1], dtype='int64', lod_level=1)
+word = fluid.data(
+    name='word_data', shape=[None, 1], dtype='int64', lod_level=1)
 
 # 谓词
-predicate = fluid.layers.data(
-    name='verb_data', shape=[1], dtype='int64', lod_level=1)
+predicate = fluid.data(
+    name='verb_data', shape=[None, 1], dtype='int64', lod_level=1)
 
 # 谓词上下文5个特征
-ctx_n2 = fluid.layers.data(
-    name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
-ctx_n1 = fluid.layers.data(
-    name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
-ctx_0 = fluid.layers.data(
-    name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
-ctx_p1 = fluid.layers.data(
-    name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
-ctx_p2 = fluid.layers.data(
-    name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
+ctx_n2 = fluid.data(
+    name='ctx_n2_data', shape=[None, 1], dtype='int64', lod_level=1)
+ctx_n1 = fluid.data(
+    name='ctx_n1_data', shape=[None, 1], dtype='int64', lod_level=1)
+ctx_0 = fluid.data(
+    name='ctx_0_data', shape=[None, 1], dtype='int64', lod_level=1)
+ctx_p1 = fluid.data(
+    name='ctx_p1_data', shape=[None, 1], dtype='int64', lod_level=1)
+ctx_p2 = fluid.data(
+    name='ctx_p2_data', shape=[None, 1], dtype='int64', lod_level=1)
 
 # 谓词上下区域标志
-mark = fluid.layers.data(
-    name='mark_data', shape=[1], dtype='int64', lod_level=1)
+mark = fluid.data(
+    name='mark_data', shape=[None, 1], dtype='int64', lod_level=1)
 ```
 ### 定义网络结构
 首先预训练并定义模型输入层
 
 ```python
 #预训练谓词和谓词上下区域标志
-predicate_embedding = fluid.layers.embedding(
+predicate_embedding = fluid.embedding(
     input=predicate,
     size=[pred_dict_len, word_dim],
     dtype='float32',
     is_sparse=IS_SPARSE,
     param_attr='vemb')
 
-mark_embedding = fluid.layers.embedding(
+mark_embedding = fluid.embedding(
     input=mark,
     size=[mark_dict_len, mark_dim],
     dtype='float32',
@@ -358,7 +358,7 @@ word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
 # 因词向量是预训练好的，这里不再训练embedding表，
 # 参数属性trainable设置成False阻止了embedding表在训练过程中被更新
 emb_layers = [
-    fluid.layers.embedding(
+    fluid.embedding(
         size=[word_dict_len, word_dim],
         input=x,
         param_attr=fluid.ParamAttr(
@@ -416,8 +416,8 @@ feature_out = fluid.layers.sums(input=[
 ])
 
 # 标注序列
-target = fluid.layers.data(
-    name='target', shape=[1], dtype='int64', lod_level=1)
+target = fluid.data(
+    name='target', shape=[None, 1], dtype='int64', lod_level=1)
 
 # 学习 CRF 的转移特征
 crf_cost = fluid.layers.linear_chain_crf(
diff --git a/07.label_semantic_roles/index.html b/07.label_semantic_roles/index.html
index 151ffb6..8896b0b 100644
--- a/07.label_semantic_roles/index.html
+++ b/07.label_semantic_roles/index.html
@@ -294,42 +294,42 @@ Defines the format of the model input features, including the sentence sequence,
 
 ```python
 # Sentence sequences
-word = fluid.layers.data(
-    name='word_data', shape=[1], dtype='int64', lod_level=1)
+word = fluid.data(
+    name='word_data', shape=[None, 1], dtype='int64', lod_level=1)
 
 # predicate
-predicate = fluid.layers.data(
-    name='verb_data', shape=[1], dtype='int64', lod_level=1)
+predicate = fluid.data(
+    name='verb_data', shape=[None, 1], dtype='int64', lod_level=1)
 
 # predicate context's 5 features
-ctx_n2 = fluid.layers.data(
-    name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
-ctx_n1 = fluid.layers.data(
-    name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
-ctx_0 = fluid.layers.data(
-    name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
-ctx_p1 = fluid.layers.data(
-    name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
-ctx_p2 = fluid.layers.data(
-    name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
+ctx_n2 = fluid.data(
+    name='ctx_n2_data', shape=[None, 1], dtype='int64', lod_level=1)
+ctx_n1 = fluid.data(
+    name='ctx_n1_data', shape=[None, 1], dtype='int64', lod_level=1)
+ctx_0 = fluid.data(
+    name='ctx_0_data', shape=[None, 1], dtype='int64', lod_level=1)
+ctx_p1 = fluid.data(
+    name='ctx_p1_data', shape=[None, 1], dtype='int64', lod_level=1)
+ctx_p2 = fluid.data(
+    name='ctx_p2_data', shape=[None, 1], dtype='int64', lod_level=1)
 
 # Predicate conotext area flag
-mark = fluid.layers.data(
-    name='mark_data', shape=[1], dtype='int64', lod_level=1)
+mark = fluid.data(
+    name='mark_data', shape=[None, 1], dtype='int64', lod_level=1)
 ```
 ### Defining the network structure
 First pre-train and define the model input layer
 
 ```python
 #pre-training predicate and predicate context area flags
-predicate_embedding = fluid.layers.embedding(
+predicate_embedding = fluid.embedding(
     input=predicate,
     size=[pred_dict_len, word_dim],
     dtype='float32',
     is_sparse=IS_SPARSE,
     param_attr='vemb')
 
-mark_embedding = fluid.layers.embedding(
+mark_embedding = fluid.embedding(
     input=mark,
     size=[mark_dict_len, mark_dim],
     dtype='float32',
@@ -340,7 +340,7 @@ word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
 #Because word vector is pre-trained, no longer training embedding table,
 # The trainable's parameter attribute set to False prevents the embedding table from being updated during training
 emb_layers = [
-    fluid.layers.embedding(
+    fluid.embedding(
         size=[word_dict_len, word_dim],
         input=x,
         param_attr=fluid.ParamAttr(
@@ -398,7 +398,7 @@ feature_out = fluid.layers.sums(input=[
 ])
 
 # tag/label sequence
-target = fluid.layers.data(
+target = fluid.data(
     name='target', shape=[1], dtype='int64', lod_level=1)
 
 # Learning CRF transfer features
diff --git a/07.label_semantic_roles/train.py b/07.label_semantic_roles/train.py
index 12af7f7..7ef075d 100644
--- a/07.label_semantic_roles/train.py
+++ b/07.label_semantic_roles/train.py
@@ -53,14 +53,14 @@ def load_parameter(file_name, h, w):
 def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
             **ignored):
     # 8 features
-    predicate_embedding = fluid.layers.embedding(
+    predicate_embedding = fluid.embedding(
         input=predicate,
         size=[pred_dict_len, word_dim],
         dtype='float32',
         is_sparse=IS_SPARSE,
         param_attr='vemb')
 
-    mark_embedding = fluid.layers.embedding(
+    mark_embedding = fluid.embedding(
         input=mark,
         size=[mark_dict_len, mark_dim],
         dtype='float32',
@@ -68,7 +68,7 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
 
     word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
     emb_layers = [
-        fluid.layers.embedding(
+        fluid.embedding(
             size=[word_dict_len, word_dim],
             input=x,
             param_attr=fluid.ParamAttr(name=embedding_name, trainable=False))
@@ -120,22 +120,22 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
 
 def train(use_cuda, save_dirname=None, is_local=True):
     # define data layers
-    word = fluid.layers.data(
-        name='word_data', shape=[1], dtype='int64', lod_level=1)
-    predicate = fluid.layers.data(
-        name='verb_data', shape=[1], dtype='int64', lod_level=1)
-    ctx_n2 = fluid.layers.data(
-        name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
-    ctx_n1 = fluid.layers.data(
-        name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
-    ctx_0 = fluid.layers.data(
-        name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
-    ctx_p1 = fluid.layers.data(
-        name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
-    ctx_p2 = fluid.layers.data(
-        name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
-    mark = fluid.layers.data(
-        name='mark_data', shape=[1], dtype='int64', lod_level=1)
+    word = fluid.data(
+        name='word_data', shape=[None, 1], dtype='int64', lod_level=1)
+    predicate = fluid.data(
+        name='verb_data', shape=[None, 1], dtype='int64', lod_level=1)
+    ctx_n2 = fluid.data(
+        name='ctx_n2_data', shape=[None, 1], dtype='int64', lod_level=1)
+    ctx_n1 = fluid.data(
+        name='ctx_n1_data', shape=[None, 1], dtype='int64', lod_level=1)
+    ctx_0 = fluid.data(
+        name='ctx_0_data', shape=[None, 1], dtype='int64', lod_level=1)
+    ctx_p1 = fluid.data(
+        name='ctx_p1_data', shape=[None, 1], dtype='int64', lod_level=1)
+    ctx_p2 = fluid.data(
+        name='ctx_p2_data', shape=[None, 1], dtype='int64', lod_level=1)
+    mark = fluid.data(
+        name='mark_data', shape=[None, 1], dtype='int64', lod_level=1)
 
     if args.enable_ce:
         fluid.default_startup_program().random_seed = 90
-- 
GitLab