From 94fa736fb23979e2a060b17fb14eb0d632f2a83e Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Mon, 17 Apr 2017 17:18:14 +0800
Subject: [PATCH] refine the tutorial doc.

---
 06.label_semantic_roles/README.en.md  | 8 ++++----
 06.label_semantic_roles/README.md     | 4 +++-
 06.label_semantic_roles/index.en.html | 8 ++++----
 06.label_semantic_roles/index.html    | 4 +++-
 4 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/06.label_semantic_roles/README.en.md b/06.label_semantic_roles/README.en.md
index f18464f..079b961 100644
--- a/06.label_semantic_roles/README.en.md
+++ b/06.label_semantic_roles/README.en.md
@@ -346,10 +346,10 @@ for i in range(1, depth):
 - In PaddlePaddle, state features and transition features of a CRF are implemented by a fully connected layer and a CRF layer seperately. The fully connected layer with linear activation learns the state features, here we use paddle.layer.mixed (paddle.layer.fc can be uesed as well), and the CRF layer in PaddlePaddle: paddle.layer.crf only learns the transition features, which is a cost layer and is the last layer of the network. paddle.layer.crf outputs the log probability of true tag sequence as the cost by given the input sequence and it requires the true tag sequence as target in the learning process.
 
 ```python
-# the fully connected layer learns the state features
-# The output of the top LSTM unit and its input are concatenated
-# and then is feed into a fully connected layer,
+
+# The output of the top LSTM unit and its input are feed into a fully connected layer,
 # size of which equals to size of tag labels.
+# The fully connected layer learns the state features
 
 feature_out = paddle.layer.mixed(
     size=label_dict_len,
@@ -370,7 +370,7 @@ crf_cost = paddle.layer.crf(
         learning_rate=mix_hidden_lr))
 ```
 
-- The CRF decoding layer is used for evaluation and inference. It shares weights with CRF layer.  The sharing of parameters among multiple layers is specified by using the same parameter name in these layers.
+- The CRF decoding layer is used for evaluation and inference. It shares weights with CRF layer.  The sharing of parameters among multiple layers is specified by using the same parameter name in these layers. If true tag sequence is provided in training process, `paddle.layer.crf_decoding` calculates labelling error for each input token and `evaluator.sum` sum the error over the entire sequence. Otherwise, `paddle.layer.crf_decoding`  generates the labelling tags.
 
 ```python
 crf_dec = paddle.layer.crf_decoding(
diff --git a/06.label_semantic_roles/README.md b/06.label_semantic_roles/README.md
index 3f1c243..a0e8b42 100644
--- a/06.label_semantic_roles/README.md
+++ b/06.label_semantic_roles/README.md
@@ -324,7 +324,9 @@ for i in range(1, depth):
 
 ```python
 
-# 学习 CRF 的状态特征
+# 取最后一个栈式LSTM的输出和这个LSTM单元的输入到隐层映射，
+# 经过一个全连接层映射到标记字典的维度，来学习 CRF 的状态特征
+
 feature_out = paddle.layer.mixed(
 size=label_dict_len,
 bias_attr=std_default,
diff --git a/06.label_semantic_roles/index.en.html b/06.label_semantic_roles/index.en.html
index e5eef7d..5cd490d 100644
--- a/06.label_semantic_roles/index.en.html
+++ b/06.label_semantic_roles/index.en.html
@@ -388,10 +388,10 @@ for i in range(1, depth):
 - In PaddlePaddle, state features and transition features of a CRF are implemented by a fully connected layer and a CRF layer seperately. The fully connected layer with linear activation learns the state features, here we use paddle.layer.mixed (paddle.layer.fc can be uesed as well), and the CRF layer in PaddlePaddle: paddle.layer.crf only learns the transition features, which is a cost layer and is the last layer of the network. paddle.layer.crf outputs the log probability of true tag sequence as the cost by given the input sequence and it requires the true tag sequence as target in the learning process.
 
 ```python
-# the fully connected layer learns the state features
-# The output of the top LSTM unit and its input are concatenated
-# and then is feed into a fully connected layer,
+
+# The output of the top LSTM unit and its input are feed into a fully connected layer,
 # size of which equals to size of tag labels.
+# The fully connected layer learns the state features
 
 feature_out = paddle.layer.mixed(
     size=label_dict_len,
@@ -412,7 +412,7 @@ crf_cost = paddle.layer.crf(
         learning_rate=mix_hidden_lr))
 ```
 
-- The CRF decoding layer is used for evaluation and inference. It shares weights with CRF layer.  The sharing of parameters among multiple layers is specified by using the same parameter name in these layers.
+- The CRF decoding layer is used for evaluation and inference. It shares weights with CRF layer.  The sharing of parameters among multiple layers is specified by using the same parameter name in these layers. If true tag sequence is provided in training process, `paddle.layer.crf_decoding` calculates labelling error for each input token and `evaluator.sum` sum the error over the entire sequence. Otherwise, `paddle.layer.crf_decoding`  generates the labelling tags.
 
 ```python
 crf_dec = paddle.layer.crf_decoding(
diff --git a/06.label_semantic_roles/index.html b/06.label_semantic_roles/index.html
index 082e4ca..5f78688 100644
--- a/06.label_semantic_roles/index.html
+++ b/06.label_semantic_roles/index.html
@@ -366,7 +366,9 @@ for i in range(1, depth):
 
 ```python
 
-# 学习 CRF 的状态特征
+# 取最后一个栈式LSTM的输出和这个LSTM单元的输入到隐层映射，
+# 经过一个全连接层映射到标记字典的维度，来学习 CRF 的状态特征
+
 feature_out = paddle.layer.mixed(
 size=label_dict_len,
 bias_attr=std_default,
-- 
GitLab