diff --git a/deepspeech/__init__.py b/deepspeech/__init__.py
index 493f10a6f5f0abc5679b0fcd76bfe73b170998ab..f0cd1bae2bc47e9767c70372519120b6eaf952e8 100644
--- a/deepspeech/__init__.py
+++ b/deepspeech/__init__.py
@@ -362,11 +362,19 @@ def ctc_loss(logits,
              label_lengths,
              blank=0,
              reduction='mean',
-             norm_by_times=True):
+             norm_by_times=False,
+             norm_by_batchsize=True,
+             norm_by_total_logits_len=False):
     #logger.info("my ctc loss with norm by times")
     ## https://github.com/PaddlePaddle/Paddle/blob/f5ca2db2cc/paddle/fluid/operators/warpctc_op.h#L403
-    loss_out = paddle.fluid.layers.warpctc(logits, labels, blank, norm_by_times,
-                                           input_lengths, label_lengths)
+    loss_out = paddle.fluid.layers.warpctc(
+        logits,
+        labels,
+        blank,
+        norm_by_times,
+        input_lengths,
+        label_lengths,
+        norm_by_batchsize, )
 
     loss_out = paddle.fluid.layers.squeeze(loss_out, [-1])
     assert reduction in ['mean', 'sum', 'none']
diff --git a/examples/librispeech/s2/README.md b/examples/librispeech/s2/README.md
index e4022f014a40888afdbe4d2f84c88cc4b9c198dc..1f7c69194691b37f9b31d09ce2d10a4b8b7468cc 100644
--- a/examples/librispeech/s2/README.md
+++ b/examples/librispeech/s2/README.md
@@ -1,41 +1,13 @@
 # LibriSpeech
 
-## Data
-| Data Subset | Duration in Seconds |
-| data/manifest.train |  0.83s ~ 29.735s |
-| data/manifest.dev | 1.065 ~ 35.155s |  
-| data/manifest.test-clean | 1.285s ~ 34.955s |
+| Model | Params | Config | Augmentation| Loss |
+| --- | --- | --- | --- | 
+| transformer | 32.52 M | conf/transformer.yaml | spec_aug | 6.3197922706604 |
 
-## Conformer
-| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER |  
-| --- | --- | --- | --- | --- | --- | --- | --- |
-| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean | attention | - | - |  
-| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean | ctc_greedy_search |  |  |  
-| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean | ctc_prefix_beam_search |  | |  
-| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean | attention_rescoring |  |  |  
 
-### Test w/o length filter
-| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER |  
-| --- | --- | --- | --- | --- | --- | --- | --- |
-| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean-all | attention |  |  |  
-
-
-## Chunk Conformer
-
-| Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size & Left Chunks | Loss | WER |  
-| --- | --- | --- | --- | --- | --- | --- | --- | --- |  
-| conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug + shift | test-clean | attention | 16, -1 |  |  |  
-| conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug + shift | test-clean | ctc_greedy_search | 16, -1 |  |  |  
-| conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug + shift | test-clean | ctc_prefix_beam_search | 16, -1 |  | - |  
-| conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug + shift | test-clean | attention_rescoring | 16, -1 |  | - |  
-
-
-## Transformer
-| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER |  
-| --- | --- | --- | --- | --- | --- | --- | --- |
-| transformer | 32.52 M | conf/transformer.yaml | spec_aug + shift | test-clean | attention |  |  |  
-
-### Test w/o length filter
-| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER |  
-| --- | --- | --- | --- | --- | --- | --- | --- |
-| transformer | 32.52 M | conf/transformer.yaml | spec_aug + shift | test-clean-all | attention | | |  
+| Test Set | Decode Method | #Snt | #Wrd | Corr | Sub | Del | Ins | Err | S.Err |   
+| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| test-clean | attention | 2620 | 52576 | 96.4 | 2.5 | 1.1 | 0.4 | 4.0 | 34.7 |   
+| test-clean | ctc_greedy_search | 2620 | 52576 | 95.9 | 3.7 | 0.4 | 0.5 | 4.6 | 48.0 |   
+| test-clean | ctc_prefix_beamsearch | 2620 | 52576 | 95.9 | 3.7 | 0.4 | 0.5 | 4.6 | 47.6 |   
+| test-clean | attention_rescore | 2620 | 52576 | 96.8 | 2.9 | 0.3 | 0.4 | 3.7 | 38.0 |   
diff --git a/examples/librispeech/s2/local/test.sh b/examples/librispeech/s2/local/test.sh
index 5eeb2d6126e801801b4b8be13fa66d0781859d9a..379a3787e61d172e905902267429877ea8465d6d 100755
--- a/examples/librispeech/s2/local/test.sh
+++ b/examples/librispeech/s2/local/test.sh
@@ -6,7 +6,7 @@ expdir=exp
 datadir=data
 nj=32
 
-lmtag=
+lmtag='nolm'
 
 recog_set="test-clean test-other dev-clean dev-other"
 recog_set="test-clean"
@@ -29,11 +29,18 @@ config_path=$1
 dict=$2
 ckpt_prefix=$3
 
+
+ckpt_dir=$(dirname `dirname ${ckpt_prefix}`)
+echo "ckpt dir: ${ckpt_dir}"
+
+ckpt_tag=$(basename ${ckpt_prefix})
+echo "ckpt tag: ${ckpt_tag}"
+
 chunk_mode=false
 if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
     chunk_mode=true
 fi
-echo "chunk mode ${chunk_mode}"
+echo "chunk mode: ${chunk_mode}"
 
 
 # download language model
@@ -46,11 +53,13 @@ pids=() # initialize pids
 
 for dmethd in attention ctc_greedy_search ctc_prefix_beam_search attention_rescoring; do
 (
+    echo "decode method: ${dmethd}"
     for rtask in ${recog_set}; do
     (
-        decode_dir=decode_${rtask}_${dmethd}_$(basename ${config_path%.*})_${lmtag}
+        echo "dataset: ${rtask}"
+        decode_dir=${ckpt_dir}/decode/decode_${rtask/-/_}_${dmethd}_$(basename ${config_path%.*})_${lmtag}_${ckpt_tag}
         feat_recog_dir=${datadir}
-        mkdir -p ${expdir}/${decode_dir}
+        mkdir -p ${decode_dir}
         mkdir -p ${feat_recog_dir}
 
         # split data
@@ -61,7 +70,7 @@ for dmethd in attention ctc_greedy_search ctc_prefix_beam_search attention_resco
 
         # set batchsize 0 to disable batch decoding
         batch_size=1
-        ${decode_cmd} JOB=1:${nj} ${expdir}/${decode_dir}/log/decode.JOB.log \
+        ${decode_cmd} JOB=1:${nj} ${decode_dir}/log/decode.JOB.log \
             python3 -u ${BIN_DIR}/test.py \
             --model-name u2_kaldi \
             --run-mode test \
@@ -69,7 +78,7 @@ for dmethd in attention ctc_greedy_search ctc_prefix_beam_search attention_resco
             --dict-path ${dict} \
             --config ${config_path} \
             --checkpoint_path ${ckpt_prefix} \
-            --result-file ${expdir}/${decode_dir}/data.JOB.json \
+            --result-file ${decode_dir}/data.JOB.json \
             --opts decoding.decoding_method ${dmethd} \
             --opts decoding.batch_size ${batch_size} \
             --opts data.test_manifest ${feat_recog_dir}/split${nj}/JOB/manifest.${rtask}
diff --git a/examples/librispeech/s2/run.sh b/examples/librispeech/s2/run.sh
index 46b6ac1b406bdb344ae266365524d4ffaa95af41..1ffe3e5c5c3e7ba3e4d4095a6202f8a385bbe9bf 100755
--- a/examples/librispeech/s2/run.sh
+++ b/examples/librispeech/s2/run.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+
 set -e
 
 . ./path.sh || exit 1;
@@ -7,8 +8,9 @@ set -e
 stage=0
 stop_stage=100
 conf_path=conf/transformer.yaml
-dict_path=data/train_960_unigram5000_units.txt
+dict_path=data/bpe_unigram_5000_units.txt
 avg_num=10
+
 source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
 
 avg_ckpt=avg_${avg_num}