diff --git a/demo/sequence_tagging/data/get_data.sh b/demo/sequence_tagging/data/get_data.sh
new file mode 100755
index 0000000000000000000000000000000000000000..e579d6c46ce5ed96f442acc448b4cc61bf8394a3
--- /dev/null
+++ b/demo/sequence_tagging/data/get_data.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+set -e
+
+DIR="$( cd "$(dirname "$0")" ; pwd -P )"
+cd $DIR
+
+wget http://www.cnts.ua.ac.be/conll2000/chunking/train.txt.gz
+wget http://www.cnts.ua.ac.be/conll2000/chunking/test.txt.gz
diff --git a/demo/sequence_tagging/data/test.list b/demo/sequence_tagging/data/test.list
new file mode 100644
index 0000000000000000000000000000000000000000..073c0a0c9063ac55f762ac261746aa73057d70e8
--- /dev/null
+++ b/demo/sequence_tagging/data/test.list
@@ -0,0 +1 @@
+data/test.txt.gz
diff --git a/demo/sequence_tagging/data/train.list b/demo/sequence_tagging/data/train.list
new file mode 100644
index 0000000000000000000000000000000000000000..43c24d5f6484a90fe883ad5516fe100d27c9ce47
--- /dev/null
+++ b/demo/sequence_tagging/data/train.list
@@ -0,0 +1 @@
+data/train.txt.gz
diff --git a/demo/sequence_tagging/dataprovider.py b/demo/sequence_tagging/dataprovider.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f412d6834be6d02397821215b1317353cd5df18
--- /dev/null
+++ b/demo/sequence_tagging/dataprovider.py
@@ -0,0 +1,258 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer.PyDataProvider2 import *
+import gzip
+import logging
+
+logging.basicConfig(
+    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s',
+)
+logger = logging.getLogger('paddle')
+logger.setLevel(logging.INFO)
+
+OOV_POLICY_IGNORE = 0
+OOV_POLICY_USE = 1
+OOV_POLICY_ERROR = 2
+
+num_original_columns = 3
+
+# Feature combination patterns.
+# [[-1,0], [0,0]]  means previous token at column 0 and current token at 
+# column 0 are combined as one feature.
+patterns = [
+    [[-2,0]],
+    [[-1,0]],
+    [[0,0]],
+    [[1,0]],
+    [[2,0]],
+
+    [[-1,0], [0,0]],
+    [[0,0], [1,0]],
+
+    [[-2,1]],
+    [[-1,1]],
+    [[0,1]],
+    [[1,1]],
+    [[2,1]],
+    [[-2,1], [-1,1]],
+    [[-1,1], [0,1]],
+    [[0,1], [1,1]],
+    [[1,1], [2,1]],
+
+    [[-2,1], [-1,1], [0,1]],
+    [[-1,1], [0,1], [1,1]],
+    [[0,1], [1,1], [2,1]],
+]
+
+dict_label = {
+ 'B-ADJP': 0,
+ 'I-ADJP': 1,
+ 'B-ADVP': 2,
+ 'I-ADVP': 3,
+ 'B-CONJP': 4,
+ 'I-CONJP': 5,
+ 'B-INTJ': 6,
+ 'I-INTJ': 7,
+ 'B-LST': 8,
+ 'I-LST': 9,
+ 'B-NP': 10,
+ 'I-NP': 11,
+ 'B-PP': 12,
+ 'I-PP': 13,
+ 'B-PRT': 14,
+ 'I-PRT': 15,
+ 'B-SBAR': 16,
+ 'I-SBAR': 17,
+ 'B-UCP': 18,
+ 'I-UCP': 19,
+ 'B-VP': 20,
+ 'I-VP': 21,
+ 'O': 22
+}
+
+def make_features(sequence):
+    length = len(sequence)
+    num_features = len(sequence[0])
+    def get_features(pos):
+        if pos < 0:
+            return ['#B%s' % -pos] * num_features
+        if pos >= length:
+            return ['#E%s' % (pos - length + 1)] * num_features
+        return sequence[pos]
+
+    for i in xrange(length):
+        for pattern in patterns:
+            fname = '/'.join([get_features(i+pos)[f] for pos, f in pattern])
+            sequence[i].append(fname)
+
+'''
+Source file format:
+Each line is for one timestep. The features are separated by space.
+An empty line indicates end of a sequence.
+
+cutoff: a list of numbers. If count of a feature is smaller than this,
+ it will be ignored.
+if oov_policy[i] is OOV_POLICY_USE, id 0 is reserved for OOV features of
+i-th column.
+
+return a list of dict for each column
+'''
+def create_dictionaries(filename, cutoff, oov_policy):
+    def add_to_dict(sequence, dicts):
+        num_features = len(dicts)
+        for features in sequence:
+            l = len(features)
+            assert l == num_features, "Wrong number of features " + line
+            for i in xrange(l):
+                if features[i] in dicts[i]:
+                    dicts[i][features[i]] += 1
+                else:
+                    dicts[i][features[i]] = 1
+
+    num_features = len(cutoff)
+    dicts = []
+    for i in xrange(num_features):
+        dicts.append(dict())
+
+    f = gzip.open(filename, 'rb')
+
+    sequence = []
+
+    for line in f:
+        line = line.strip()
+        if not line:
+            make_features(sequence)
+            add_to_dict(sequence, dicts)
+            sequence = []
+            continue
+        features = line.split(' ')
+        sequence.append(features)
+
+
+    for i in xrange(num_features):
+        dct = dicts[i]
+        n = 1 if oov_policy[i] == OOV_POLICY_USE else 0
+        todo = []
+        for k, v in dct.iteritems():
+            if v < cutoff[i]:
+                todo.append(k)
+            else:
+                dct[k] = n
+                n += 1
+            
+        if oov_policy[i] == OOV_POLICY_USE:
+            # placeholder so that len(dct) will be the number of features
+            # including OOV
+            dct['#OOV#'] = 0
+
+        logger.info('column %d dict size=%d, ignored %d' % (i, n, len(todo)))
+        for k in todo:
+            del dct[k]
+
+    f.close()
+    return dicts
+
+
+def initializer(settings, **xargs):
+    cutoff = [3, 1, 0]
+    cutoff += [3] * len(patterns)
+    oov_policy = [OOV_POLICY_IGNORE, OOV_POLICY_ERROR, OOV_POLICY_ERROR]
+    oov_policy += [OOV_POLICY_IGNORE] * len(patterns)
+    dicts = create_dictionaries('data/train.txt.gz', cutoff, oov_policy)
+    dicts[2] = dict_label
+    settings.dicts = dicts
+    settings.oov_policy = oov_policy
+    input_types = []
+    num_features = len(dicts)
+    for i in xrange(num_original_columns):
+        input_types.append(integer_sequence(len(dicts[i])))
+        logger.info("slot %s size=%s" % (i, len(dicts[i])))
+    if patterns:
+        dim = 0
+        for i in xrange(num_original_columns, num_features):
+            dim += len(dicts[i])
+        input_types.append(sparse_binary_vector_sequence(dim))
+        logger.info("feature size=%s" % dim)
+    settings.input_types = input_types
+
+'''
+if oov_policy[i] == OOV_POLICY_USE, features in i-th column which are not
+existed in dicts[i] will be assigned to id 0.
+if oov_policy[i] == OOV_POLICY_ERROR, all features in i-th column MUST exist
+in dicts[i].
+'''
+@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
+def process(settings, filename):
+    input_file = filename
+    dicts = settings.dicts
+    oov_policy = settings.oov_policy
+
+    def gen_sample(sequence):
+        num_features = len(dicts)
+        sample = [list() for i in xrange(num_original_columns)]
+        if patterns:
+            sample.append([])
+        for features in sequence:
+            assert len(features) == num_features, \
+                "Wrong number of features: " + line
+            for i in xrange(num_original_columns):
+                id = dicts[i].get(features[i], -1)
+                if id != -1:
+                    sample[i].append(id)
+                elif oov_policy[i] == OOV_POLICY_IGNORE:
+                    sample[i].append(0xffffffff)
+                elif oov_policy[i] == OOV_POLICY_ERROR:
+                    logger.fatal("Unknown token: %s" % features[i])
+                else:
+                    sample[i].append(0)
+
+            if patterns:
+                dim = 0
+                vec = []
+                for i in xrange(num_original_columns, num_features):
+                    id = dicts[i].get(features[i], -1)
+                    if id != -1:
+                        vec.append(dim + id)
+                    elif oov_policy[i] == OOV_POLICY_IGNORE:
+                        pass
+                    elif oov_policy[i] == OOV_POLICY_ERROR:
+                        logger.fatal("Unknown token: %s" % features[i])
+                    else:
+                        vec.ids.append(dim + 0)
+                    
+                    dim += len(dicts[i])
+                sample[-1].append(vec)
+        return sample
+
+    num_features = len(dicts)
+    f = gzip.open(input_file, 'rb')
+
+    num_sequences = 0
+    sequence = []
+    for line in f:
+        line = line.strip()
+        if not line:
+            make_features(sequence)
+            yield gen_sample(sequence)
+            sequence = []
+            num_sequences += 1
+            continue
+        features = line.split(' ')
+        sequence.append(features)
+
+    f.close()
+
+    logger.info("num_sequences=%s" % num_sequences)
+
diff --git a/demo/sequence_tagging/linear_crf.py b/demo/sequence_tagging/linear_crf.py
new file mode 100644
index 0000000000000000000000000000000000000000..2bd1a20bc52fc546dcd0a0874bc09433e7212152
--- /dev/null
+++ b/demo/sequence_tagging/linear_crf.py
@@ -0,0 +1,84 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer_config_helpers import *
+
+import math
+
+define_py_data_sources2(train_list="data/train.list",
+                        test_list="data/test.list",
+                        module="dataprovider",
+                        obj="process")
+
+
+batch_size = 1
+settings(
+    learning_method=MomentumOptimizer(),
+    batch_size=batch_size,
+    regularization=L2Regularization(batch_size * 1e-4),
+    average_window=0.5,
+    learning_rate=1e-1,
+    learning_rate_decay_a=1e-5,
+    learning_rate_decay_b=0.25,
+)
+
+num_label_types=23
+
+def get_simd_size(size):
+    return int(math.ceil(float(size) / 8)) * 8
+
+# Currently, in order to use sparse_update=True,
+# the size has to be aligned.
+num_label_types = get_simd_size(num_label_types)
+
+features = data_layer(name="features", size=76328)
+word = data_layer(name="word", size=6778)
+pos = data_layer(name="pos", size=44)
+chunk = data_layer(name="chunk",
+                   size=num_label_types)
+
+crf_input = fc_layer(
+    input=features,
+    size=num_label_types,
+    act=LinearActivation(),
+    bias_attr=False,
+    param_attr=ParamAttr(initial_std=0, sparse_update=True))
+
+crf=crf_layer(
+    input=crf_input,
+    label=chunk,
+    param_attr=ParamAttr(name="crfw", initial_std=0),
+)
+
+crf_decoding=crf_decoding_layer(
+    size=num_label_types,
+    input=crf_input,
+    label=chunk,
+    param_attr=ParamAttr(name="crfw"),
+)
+
+sum_evaluator(
+    name="error",
+    input=crf_decoding,
+)
+
+chunk_evaluator(
+    name="chunk_f1",
+    input =[crf_decoding, chunk],
+    chunk_scheme="IOB",
+    num_chunk_types=11,
+)
+
+inputs(word, pos, chunk, features)
+outputs(crf)
diff --git a/demo/sequence_tagging/readme.md b/demo/sequence_tagging/readme.md
new file mode 100644
index 0000000000000000000000000000000000000000..2e17fffb83c532f5e5fec1227f169c97c1f20e22
--- /dev/null
+++ b/demo/sequence_tagging/readme.md
@@ -0,0 +1,45 @@
+# Sequence Tagging
+
+This demo is a sequence model for assigning tags to each token in a sentence. The task is described at <a href = "http://www.cnts.ua.ac.be/conll2000/chunking">CONLL2000 Text Chunking</a> task.
+
+## Download data
+```bash
+cd demo/sequence_tagging
+./data/get_data.sh
+```
+
+## Train model
+```bash
+cd demo/sequence_tagging
+./train.sh
+```
+
+## Model description
+
+We provide two models. One is a linear CRF model (linear_crf.py) with is equivalent to the one at <a href="http://leon.bottou.org/projects/sgd#stochastic_gradient_crfs">leon.bottou.org/projects/sgd</a>. The second one is a stacked bidirectional RNN and CRF model (rnn_crf.py).
+<center>
+<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
+
+<thead>
+<th scope="col" class="left">Model name</th>
+<th scope="col" class="left">Number of parameters</th>
+<th scope="col" class="left">F1 score</th>
+</thead>
+
+<tbody>
+<tr>
+<td class="left">linear_crf</td>
+<td class="left"> 1.8M </td>
+<td class="left"> 0.937</td>
+</tr>
+
+<tr>
+<td class="left">rnn_crf</td>
+<td class="left"> 960K </td>
+<td class="left">0.941</td>
+</tr>
+
+</tbody>
+</table>
+</center>
+<br>
diff --git a/demo/sequence_tagging/rnn_crf.py b/demo/sequence_tagging/rnn_crf.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb157bf3ea7193bca2c8a281e1afaf4b5f1d7309
--- /dev/null
+++ b/demo/sequence_tagging/rnn_crf.py
@@ -0,0 +1,130 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer_config_helpers import *
+
+import math
+
+define_py_data_sources2(train_list="data/train.list",
+                        test_list="data/test.list",
+                        module="dataprovider",
+                        obj="process")
+
+batch_size = 16
+settings(
+    learning_method=MomentumOptimizer(),
+    batch_size=batch_size,
+    regularization=L2Regularization(batch_size * 1e-5),
+    average_window=0.5,
+    learning_rate = 2e-3,
+    learning_rate_decay_a = 5e-7,
+    learning_rate_decay_b = 0.5,
+)
+
+word_dim=128
+hidden_dim = 128
+with_rnn = True
+
+initial_std=1/math.sqrt(hidden_dim)
+param_attr=ParamAttr(initial_std=initial_std)
+cpu_layer_attr=ExtraLayerAttribute(device=-1)
+
+default_device(0)
+
+num_label_types=23
+
+features = data_layer(name="features", size=76328)
+word = data_layer(name="word", size=6778)
+pos = data_layer(name="pos", size=44)
+chunk = data_layer(name="chunk",
+                   size=num_label_types,
+                   layer_attr=cpu_layer_attr)
+
+emb = embedding_layer(
+    input=word, size=word_dim, param_attr=ParamAttr(initial_std=0))
+
+hidden1 = mixed_layer(
+    size=hidden_dim,
+    act=STanhActivation(),
+    bias_attr=True,
+    input=[full_matrix_projection(emb),
+           table_projection(pos, param_attr=param_attr)]
+)
+
+if with_rnn:
+    rnn1 = recurrent_layer(
+        act=ReluActivation(),
+        bias_attr=True,
+        input=hidden1,
+        param_attr=ParamAttr(initial_std=0),
+    )
+
+hidden2 = mixed_layer(
+    size=hidden_dim,
+    act=STanhActivation(),
+    bias_attr=True,
+    input=[full_matrix_projection(hidden1)
+    ] + ([
+        full_matrix_projection(rnn1, param_attr=ParamAttr(initial_std=0))
+    ] if with_rnn else []),
+)
+
+if with_rnn:
+    rnn2=recurrent_layer(
+        reverse=True,
+        act=ReluActivation(),
+        bias_attr=True,
+        input=hidden2,
+        param_attr=ParamAttr(initial_std=0),
+    )
+
+crf_input = mixed_layer(
+    size=num_label_types,
+    bias_attr=False,
+    input=[
+        full_matrix_projection(hidden2),
+    ] + ([
+        full_matrix_projection(rnn2, param_attr=ParamAttr(initial_std=0))
+    ] if with_rnn else []),
+)
+
+crf = crf_layer(
+    input=crf_input,
+    label=chunk,
+    param_attr=ParamAttr(name="crfw", initial_std=0),
+    layer_attr=cpu_layer_attr,
+)
+
+crf_decoding = crf_decoding_layer(
+    size=num_label_types,
+    input=crf_input,
+    label=chunk,
+    param_attr=ParamAttr(name="crfw"),
+    layer_attr=cpu_layer_attr,
+)
+
+sum_evaluator(
+    name="error",
+    input=crf_decoding,
+)
+
+chunk_evaluator(
+    name="chunk_f1",
+    input =[crf_decoding, chunk],
+    chunk_scheme="IOB",
+    num_chunk_types=11,
+)
+
+inputs(word, pos, chunk, features)
+outputs(crf)
diff --git a/demo/sequence_tagging/train.sh b/demo/sequence_tagging/train.sh
new file mode 100755
index 0000000000000000000000000000000000000000..9a706b98d8686101ba21b513644bdd791062ec26
--- /dev/null
+++ b/demo/sequence_tagging/train.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+paddle train \
+       --config rnn_crf.py \
+       --parallel_nn=1 \
+       --use_gpu=1 \
+       --dot_period=10 \
+       --log_period=1000 \
+       --test_period=0 \
+       --num_passes=10
diff --git a/demo/sequence_tagging/train_linear.sh b/demo/sequence_tagging/train_linear.sh
new file mode 100755
index 0000000000000000000000000000000000000000..597b5afea9c63a8e209b69b6a40e74556e27ac31
--- /dev/null
+++ b/demo/sequence_tagging/train_linear.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+paddle train \
+       --config linear_crf.py \
+       --use_gpu=0 \
+       --dot_period=100 \
+       --log_period=10000 \
+       --test_period=0 \
+       --num_passes=10
diff --git a/python/paddle/trainer_config_helpers/optimizers.py b/python/paddle/trainer_config_helpers/optimizers.py
index 4660a6b5003daf20e8b347a1ff55ef3e34869e26..d4b947517b7d04323593f4c5d16f241e943c261d 100644
--- a/python/paddle/trainer_config_helpers/optimizers.py
+++ b/python/paddle/trainer_config_helpers/optimizers.py
@@ -362,6 +362,13 @@ def __extends__(dict1, dict2):
                     default_factory=lambda _: BaseRegularization())
 def settings(batch_size,
              learning_rate=1e-3,
+             learning_rate_decay_a=0.,
+             learning_rate_decay_b=0.,
+             learning_rate_schedule='poly',
+             learning_rate_args='',
+             average_window=0,
+             do_average_in_cpu=False,
+             max_average_window=None,
              learning_method=None,
              regularization=None,
              is_async=False,
@@ -408,10 +415,14 @@ def settings(batch_size,
     else:
         algorithm = 'owlqn'
 
+    args=['batch_size', 'learning_rate', 'learning_rate_decay_a',
+          'learning_rate_decay_b', 'learning_rate_schedule',
+          'learning_rate_args', 'average_window', 'do_average_in_cpu',
+          'max_average_window']
     kwargs = dict()
-    kwargs['batch_size'] = batch_size
-    kwargs['learning_rate'] = learning_rate
     kwargs['algorithm'] = algorithm
+    for arg in args:
+        kwargs[arg] = locals()[arg]
 
     kwargs = __extends__(kwargs, learning_method.to_setting_kwargs())
     learning_method.extra_settings()