diff --git a/README.md b/README.md
index 1818ddd61cc5423c4a590815930d007303f18e81..f209e58b66cc4c056ff4ab30283213534eac52c0 100644
--- a/README.md
+++ b/README.md
@@ -53,7 +53,7 @@ You may need to use a domestic mirror source (in China, you can use the Tsinghua
 
 If you need install modules compiled with develop branch, please download packages from [latest packages list](./doc/LATEST_PACKAGES.md) and install with `pip install` command.
 
-Client package support Centos 7 and Ubuntu 18, or you can use HTTP service without install client.
+Packages of Paddle Serving support Centos 6/7 and Ubuntu 16/18, or you can use HTTP service without install client.
 
 
 <h2 align="center"> Pre-built services with Paddle Serving</h2>
diff --git a/README_CN.md b/README_CN.md
index 29cf095248f4c125b3dba7146e67efe8b7abae6c..05d3ad2100b15830d10c8bc4454a6d319d7b990b 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -55,7 +55,7 @@ pip install paddle-serving-server-gpu # GPU
 
 如果需要使用develop分支编译的安装包，请从[最新安装包列表](./doc/LATEST_PACKAGES.md)中获取下载地址进行下载，使用`pip install`命令进行安装。
 
-客户端安装包支持Centos 7和Ubuntu 18，或者您可以使用HTTP服务，这种情况下不需要安装客户端。
+Paddle Serving安装包支持Centos 6/7和Ubuntu 16/18，或者您可以使用HTTP服务，这种情况下不需要安装客户端。
 
 <h2 align="center"> Paddle Serving预装的服务 </h2>
 
diff --git a/python/examples/bert/benchmark.py b/python/examples/bert/benchmark.py
index af75b718b78b2bc130c2411d05d190fc0d298006..f1533d9710d3149a37818d3f1bc146fad6ce6537 100644
--- a/python/examples/bert/benchmark.py
+++ b/python/examples/bert/benchmark.py
@@ -21,11 +21,7 @@ import sys
 import time
 from paddle_serving_client import Client
 from paddle_serving_client.utils import MultiThreadRunner
-from paddle_serving_client.utils import benchmark_args
-from batching import pad_batch_data
-import tokenization
-import requests
-import json
+from paddle_serving_client.utils import benchmark_args, show_latency
 from paddle_serving_app.reader import ChineseBertReader
 
 args = benchmark_args()
@@ -36,42 +32,75 @@ def single_func(idx, resource):
     dataset = []
     for line in fin:
         dataset.append(line.strip())
+
+    profile_flags = False
+    latency_flags = False
+    if os.getenv("FLAGS_profile_client"):
+        profile_flags = True
+    if os.getenv("FLAGS_serving_latency"):
+        latency_flags = True
+        latency_list = []
+
     if args.request == "rpc":
-        reader = ChineseBertReader(vocab_file="vocab.txt", max_seq_len=20)
+        reader = ChineseBertReader({"max_seq_len": 128})
         fetch = ["pooled_output"]
         client = Client()
         client.load_client_config(args.model)
         client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
-
         start = time.time()
-        for i in range(1000):
-            if args.batch_size == 1:
-                feed_dict = reader.process(dataset[i])
-                result = client.predict(feed=feed_dict, fetch=fetch)
+        for i in range(turns):
+            if args.batch_size >= 1:
+                l_start = time.time()
+                feed_batch = []
+                b_start = time.time()
+                for bi in range(args.batch_size):
+                    feed_batch.append(reader.process(dataset[bi]))
+                b_end = time.time()
+
+                if profile_flags:
+                    sys.stderr.write(
+                        "PROFILE\tpid:{}\tbert_pre_0:{} bert_pre_1:{}\n".format(
+                            os.getpid(),
+                            int(round(b_start * 1000000)),
+                            int(round(b_end * 1000000))))
+                result = client.predict(feed=feed_batch, fetch=fetch)
+
+                l_end = time.time()
+                if latency_flags:
+                    latency_list.append(l_end * 1000 - l_start * 1000)
             else:
                 print("unsupport batch size {}".format(args.batch_size))
 
     elif args.request == "http":
-        start = time.time()
-        header = {"Content-Type": "application/json"}
-        for i in range(1000):
-            dict_data = {"words": dataset[i], "fetch": ["pooled_output"]}
-            r = requests.post(
-                'http://{}/bert/prediction'.format(resource["endpoint"][
-                    idx % len(resource["endpoint"])]),
-                data=json.dumps(dict_data),
-                headers=header)
+        raise ("not implemented")
     end = time.time()
-    return [[end - start]]
+    if latency_flags:
+        return [[end - start], latency_list]
+    else:
+        return [[end - start]]
 
 
 if __name__ == '__main__':
     multi_thread_runner = MultiThreadRunner()
-    endpoint_list = ["127.0.0.1:9292"]
-    result = multi_thread_runner.run(single_func, args.thread,
-                                     {"endpoint": endpoint_list})
+    endpoint_list = [
+        "127.0.0.1:9292", "127.0.0.1:9293", "127.0.0.1:9294", "127.0.0.1:9295"
+    ]
+    turns = 10
+    start = time.time()
+    result = multi_thread_runner.run(
+        single_func, args.thread, {"endpoint": endpoint_list,
+                                   "turns": turns})
+    end = time.time()
+    total_cost = end - start
+
     avg_cost = 0
     for i in range(args.thread):
         avg_cost += result[0][i]
     avg_cost = avg_cost / args.thread
-    print("average total cost {} s.".format(avg_cost))
+
+    print("total cost :{} s".format(total_cost))
+    print("each thread cost :{} s. ".format(avg_cost))
+    print("qps :{} samples/s".format(args.batch_size * args.thread * turns /
+                                     total_cost))
+    if os.getenv("FLAGS_serving_latency"):
+        show_latency(result[1])
diff --git a/python/examples/bert/benchmark.sh b/python/examples/bert/benchmark.sh
index 7f9e2325f3b8f7db288d2b7d82d0d412e05417cb..7ee5f32e9e5d89a836f8962a256bcdf7bf0b62e2 100644
--- a/python/examples/bert/benchmark.sh
+++ b/python/examples/bert/benchmark.sh
@@ -1,9 +1,30 @@
 rm profile_log
-for thread_num in 1 2 4 8 16
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+export FLAGS_profile_server=1
+export FLAGS_profile_client=1
+export FLAGS_serving_latency=1
+python3 -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim False --ir_optim True 2> elog > stdlog &
+
+sleep 5
+
+#warm up
+python3 benchmark.py --thread 8 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
+
+for thread_num in 4 8 16
 do
-    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --model serving_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
-    echo "========================================"
-    echo "batch size : $batch_size" >> profile_log
-    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
-    tail -n 1 profile >> profile_log
+for batch_size in 1 4 16 64 256
+do
+    python3 benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
+    echo "model name :" $1
+    echo "thread num :" $thread_num
+    echo "batch size :" $batch_size
+    echo "=================Done===================="
+    echo "model name :$1" >> profile_log_$1
+    echo "batch size :$batch_size" >> profile_log_$1
+    python3 ../util/show_profile.py profile $thread_num >> profile_log_$1
+    tail -n 8 profile >> profile_log_$1
+    echo "" >> profile_log_$1
+done
 done
+
+ps -ef|grep 'serving'|grep -v grep|cut -c 9-15 | xargs kill -9
diff --git a/python/examples/bert/benchmark_batch.py b/python/examples/bert/benchmark_batch.py
deleted file mode 100644
index 7cedb6aa451e0e4a128f0fedbfde1a896977f601..0000000000000000000000000000000000000000
--- a/python/examples/bert/benchmark_batch.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-
-from __future__ import unicode_literals, absolute_import
-import os
-import sys
-import time
-from paddle_serving_client import Client
-from paddle_serving_client.utils import MultiThreadRunner
-from paddle_serving_client.utils import benchmark_args
-from batching import pad_batch_data
-import tokenization
-import requests
-import json
-from bert_reader import BertReader
-args = benchmark_args()
-
-
-def single_func(idx, resource):
-    fin = open("data-c.txt")
-    dataset = []
-    for line in fin:
-        dataset.append(line.strip())
-    profile_flags = False
-    if os.environ["FLAGS_profile_client"]:
-        profile_flags = True
-    if args.request == "rpc":
-        reader = BertReader(vocab_file="vocab.txt", max_seq_len=20)
-        fetch = ["pooled_output"]
-        client = Client()
-        client.load_client_config(args.model)
-        client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
-        start = time.time()
-        for i in range(1000):
-            if args.batch_size >= 1:
-                feed_batch = []
-                b_start = time.time()
-                for bi in range(args.batch_size):
-                    feed_batch.append(reader.process(dataset[bi]))
-                b_end = time.time()
-                if profile_flags:
-                    print("PROFILE\tpid:{}\tbert_pre_0:{} bert_pre_1:{}".format(
-                        os.getpid(),
-                        int(round(b_start * 1000000)),
-                        int(round(b_end * 1000000))))
-                result = client.predict(feed=feed_batch, fetch=fetch)
-            else:
-                print("unsupport batch size {}".format(args.batch_size))
-
-    elif args.request == "http":
-        raise ("no batch predict for http")
-    end = time.time()
-    return [[end - start]]
-
-
-if __name__ == '__main__':
-    multi_thread_runner = MultiThreadRunner()
-    endpoint_list = ["127.0.0.1:9292"]
-    result = multi_thread_runner.run(single_func, args.thread,
-                                     {"endpoint": endpoint_list})
-    avg_cost = 0
-    for i in range(args.thread):
-        avg_cost += result[0][i]
-    avg_cost = avg_cost / args.thread
-    print("average total cost {} s.".format(avg_cost))
diff --git a/python/examples/bert/benchmark_batch.sh b/python/examples/bert/benchmark_batch.sh
deleted file mode 100644
index 272923776d6640880175745920a8fad9e84972fd..0000000000000000000000000000000000000000
--- a/python/examples/bert/benchmark_batch.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-rm profile_log
-export CUDA_VISIBLE_DEVICES=0,1,2,3
-python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
-
-sleep 5
-
-for thread_num in 1 2 4 8 16
-do
-for batch_size in 1 2 4 8 16 32 64 128 256 512
-do
-    $PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model serving_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
-    echo "========================================"
-    echo "thread num: ", $thread_num
-    echo "batch size: ", $batch_size
-    echo "batch size : $batch_size" >> profile_log
-    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
-    tail -n 1 profile >> profile_log
-done
-done
diff --git a/python/examples/imagenet/benchmark.py b/python/examples/imagenet/benchmark.py
index caa952f121fbd8725c2a6bfe36f0dd84b6a82707..ac7ba8c333d25fb23bfc7695105315bfaa4e76ee 100644
--- a/python/examples/imagenet/benchmark.py
+++ b/python/examples/imagenet/benchmark.py
@@ -93,7 +93,7 @@ def single_func(idx, resource):
 
 if __name__ == '__main__':
     multi_thread_runner = MultiThreadRunner()
-    endpoint_list = ["127.0.0.1:9696"]
+    endpoint_list = ["127.0.0.1:9393"]
     #endpoint_list = endpoint_list + endpoint_list + endpoint_list
     result = multi_thread_runner.run(single_func, args.thread,
                                      {"endpoint": endpoint_list})
diff --git a/python/examples/imagenet/benchmark.sh b/python/examples/imagenet/benchmark.sh
index 618a62c063c0bc4955baf8516bc5bc93e4832394..84885908fa89d050b3ca71386fe2a21533ce0809 100644
--- a/python/examples/imagenet/benchmark.sh
+++ b/python/examples/imagenet/benchmark.sh
@@ -1,12 +1,28 @@
 rm profile_log
-for thread_num in 1 2 4 8
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+export FLAGS_profile_server=1
+export FLAGS_profile_client=1
+python -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
+
+sleep 5
+
+#warm up
+$PYTHONROOT/bin/python benchmark.py --thread 8 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
+
+for thread_num in 4 8 16
 do
-for batch_size in 1 2 4 8 16 32 64 128
+for batch_size in 1 4 16 64 256
 do
-    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --batch_size $batch_size --model ResNet50_vd_client_config/serving_client_conf.prototxt --request rpc > profile 2>&1
-    echo "========================================"
-    echo "batch size : $batch_size" >> profile_log
+    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
+    echo "model name :" $1
+    echo "thread num :" $thread_num
+    echo "batch size :" $batch_size
+    echo "=================Done===================="
+    echo "model name :$1" >> profile_log
+    echo "batch size :$batch_size" >> profile_log
     $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
-    tail -n 1 profile >> profile_log
+    tail -n 8 profile >> profile_log
 done
 done
+
+ps -ef|grep 'serving'|grep -v grep|cut -c 9-15 | xargs kill -9
diff --git a/python/examples/lac/lac_client.py b/python/examples/lac/lac_client.py
index ab9af730abb2f5b33f4d0292115b2f7bf682f278..22f3c511dcd2540365623ef9428b60cfcb5e5a34 100644
--- a/python/examples/lac/lac_client.py
+++ b/python/examples/lac/lac_client.py
@@ -35,5 +35,4 @@ for line in sys.stdin:
     begin = fetch_map['crf_decode.lod'][0]
     end = fetch_map['crf_decode.lod'][1]
     segs = reader.parse_result(line, fetch_map["crf_decode"][begin:end])
-
-    print({"word_seg": "|".join(segs)})
+    print("word_seg: " + "|".join(str(words) for words in segs))
diff --git a/python/examples/senta/README.md b/python/examples/senta/README.md
index 9aeb6d1191719e067e2cb99d408a6d091c25ede3..8929a9312c17264800f299f77afb583221006068 100644
--- a/python/examples/senta/README.md
+++ b/python/examples/senta/README.md
@@ -5,6 +5,8 @@
 ```
 python -m paddle_serving_app.package --get_model senta_bilstm
 python -m paddle_serving_app.package --get_model lac
+tar -xzvf senta_bilstm.tar.gz
+tar -xzvf lac.tar.gz
 ```
 
 ## Start HTTP Service
@@ -17,5 +19,5 @@ In this demo, the LAC task is placed in the preprocessing part of the HTTP predi
 
 ## Client prediction
 ```
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "天气不错"}], "fetch":["class_probs"]}' http://127.0.0.1:9292/senta/prediction
+curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "天气不错"}], "fetch":["class_probs"]}' http://127.0.0.1:9393/senta/prediction
 ```
diff --git a/python/examples/senta/README_CN.md b/python/examples/senta/README_CN.md
index f958af221d843748836bea325f87ba603411d39c..e5624dc975e6bc00de219f68cbf74dea7cac8360 100644
--- a/python/examples/senta/README_CN.md
+++ b/python/examples/senta/README_CN.md
@@ -5,6 +5,8 @@
 ```
 python -m paddle_serving_app.package --get_model senta_bilstm
 python -m paddle_serving_app.package --get_model lac
+tar -xzvf lac.tar.gz
+tar -xzvf senta_bilstm.tar.gz
 ```
 
 ## 启动HTTP服务
@@ -17,5 +19,5 @@ python senta_web_service.py
 
 ## 客户端预测
 ```
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "天气不错"}], "fetch":["class_probs"]}' http://127.0.0.1:9292/senta/prediction
+curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "天气不错"}], "fetch":["class_probs"]}' http://127.0.0.1:9393/senta/prediction
 ```
diff --git a/python/examples/util/show_profile.py b/python/examples/util/show_profile.py
index 9153d939338f0ee171af539b9f955d51802ad547..1581dda19bb0abefe6eb21592bda7fc97d8fb7cd 100644
--- a/python/examples/util/show_profile.py
+++ b/python/examples/util/show_profile.py
@@ -31,7 +31,7 @@ with open(profile_file) as f:
         if line[0] == "PROFILE":
             prase(line[2])
 
-print("thread num {}".format(thread_num))
+print("thread num :{}".format(thread_num))
 for name in time_dict:
-    print("{} cost {} s in each thread ".format(name, time_dict[name] / (
+    print("{} cost :{} s in each thread ".format(name, time_dict[name] / (
         1000000.0 * float(thread_num))))
diff --git a/python/paddle_serving_app/models/model_list.py b/python/paddle_serving_app/models/model_list.py
index cf0ca3bf5765d65065e541462eb919ccc5c4b978..d5f42ab78acdbe837a719908d27cda513da02c3f 100644
--- a/python/paddle_serving_app/models/model_list.py
+++ b/python/paddle_serving_app/models/model_list.py
@@ -38,7 +38,7 @@ class ServingModels(object):
         object_detection_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ObjectDetection/"
         ocr_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/OCR/"
         senta_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SentimentAnalysis/"
-        semantic_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticRepresentation/"
+        semantic_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/"
         wordseg_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/LexicalAnalysis/"
 
         self.url_dict = {}
diff --git a/python/paddle_serving_app/reader/lac_reader.py b/python/paddle_serving_app/reader/lac_reader.py
index 7e804ff371e2d90d79f7f663e83a854b1b0c9647..8f7d79a6a1e7ce8c4c86b689e2856eea6fa42158 100644
--- a/python/paddle_serving_app/reader/lac_reader.py
+++ b/python/paddle_serving_app/reader/lac_reader.py
@@ -111,6 +111,10 @@ class LACReader(object):
         return word_ids
 
     def parse_result(self, words, crf_decode):
+        try:
+            words = unicode(words, "utf-8")
+        except:
+            pass
         tags = [self.id2label_dict[str(x[0])] for x in crf_decode]
 
         sent_out = []
diff --git a/python/paddle_serving_client/utils/__init__.py b/python/paddle_serving_client/utils/__init__.py
index 381da6bf9bade2bb0627f4c07851012360905de5..53f40726fbf21a0607b47bb29a20aa6ff50b6221 100644
--- a/python/paddle_serving_client/utils/__init__.py
+++ b/python/paddle_serving_client/utils/__init__.py
@@ -17,6 +17,7 @@ import sys
 import subprocess
 import argparse
 from multiprocessing import Pool
+import numpy as np
 
 
 def benchmark_args():
@@ -35,6 +36,17 @@ def benchmark_args():
     return parser.parse_args()
 
 
+def show_latency(latency_list):
+    latency_array = np.array(latency_list)
+    info = "latency:\n"
+    info += "mean :{} ms\n".format(np.mean(latency_array))
+    info += "median :{} ms\n".format(np.median(latency_array))
+    info += "80 percent :{} ms\n".format(np.percentile(latency_array, 80))
+    info += "90 percent :{} ms\n".format(np.percentile(latency_array, 90))
+    info += "99 percent :{} ms\n".format(np.percentile(latency_array, 99))
+    sys.stderr.write(info)
+
+
 class MultiThreadRunner(object):
     def __init__(self):
         pass
diff --git a/python/paddle_serving_server/monitor.py b/python/paddle_serving_server/monitor.py
index 3f1ff6436917b8ae7ff4ea06fcae1f55bd65e887..84146039c40794436030a8c5c6ba9d18ccbfda06 100644
--- a/python/paddle_serving_server/monitor.py
+++ b/python/paddle_serving_server/monitor.py
@@ -20,7 +20,7 @@ Usage:
 import os
 import time
 import argparse
-import commands
+import subprocess
 import datetime
 import shutil
 import tarfile
@@ -209,7 +209,7 @@ class HadoopMonitor(Monitor):
         remote_filepath = os.path.join(path, filename)
         cmd = '{} -ls {} 2>/dev/null'.format(self._cmd_prefix, remote_filepath)
         _LOGGER.debug('check cmd: {}'.format(cmd))
-        [status, output] = commands.getstatusoutput(cmd)
+        [status, output] = subprocess.getstatusoutput(cmd)
         _LOGGER.debug('resp: {}'.format(output))
         if status == 0:
             [_, _, _, _, _, mdate, mtime, _] = output.split('\n')[-1].split()
diff --git a/python/paddle_serving_server_gpu/monitor.py b/python/paddle_serving_server_gpu/monitor.py
index 3f1ff6436917b8ae7ff4ea06fcae1f55bd65e887..84146039c40794436030a8c5c6ba9d18ccbfda06 100644
--- a/python/paddle_serving_server_gpu/monitor.py
+++ b/python/paddle_serving_server_gpu/monitor.py
@@ -20,7 +20,7 @@ Usage:
 import os
 import time
 import argparse
-import commands
+import subprocess
 import datetime
 import shutil
 import tarfile
@@ -209,7 +209,7 @@ class HadoopMonitor(Monitor):
         remote_filepath = os.path.join(path, filename)
         cmd = '{} -ls {} 2>/dev/null'.format(self._cmd_prefix, remote_filepath)
         _LOGGER.debug('check cmd: {}'.format(cmd))
-        [status, output] = commands.getstatusoutput(cmd)
+        [status, output] = subprocess.getstatusoutput(cmd)
         _LOGGER.debug('resp: {}'.format(output))
         if status == 0:
             [_, _, _, _, _, mdate, mtime, _] = output.split('\n')[-1].split()
diff --git a/tools/Dockerfile b/tools/Dockerfile
index dc39adf01288f092143803557b322a0c8fbcb2b4..3c701725400350247153f828410d06cec69856f5 100644
--- a/tools/Dockerfile
+++ b/tools/Dockerfile
@@ -9,4 +9,6 @@ RUN yum -y install wget && \
     yum -y install python3 python3-devel && \
     yum clean all && \
     curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
-    python get-pip.py && rm get-pip.py
+    python get-pip.py && rm get-pip.py && \
+    localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \
+    echo "export LANG=en_US.utf8" >> /root/.bashrc
diff --git a/tools/Dockerfile.centos6.devel b/tools/Dockerfile.centos6.devel
index 5223693d846bdbc90bdefe58c26db29d6a81359d..83981dcc4731252dfc75270b5ce6fc623a0266a8 100644
--- a/tools/Dockerfile.centos6.devel
+++ b/tools/Dockerfile.centos6.devel
@@ -44,4 +44,6 @@ RUN yum -y install wget && \
     cd .. && rm -rf Python-3.6.8* && \
     pip3 install google protobuf setuptools wheel flask numpy==1.16.4 && \
     yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
-    yum clean all
+    yum clean all && \
+    localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \
+    echo "export LANG=en_US.utf8" >> /root/.bashrc
diff --git a/tools/Dockerfile.centos6.gpu.devel b/tools/Dockerfile.centos6.gpu.devel
index 1432d49abe9a4aec3b558d855c9cfcf30efef461..9ee3591b9a1e2ea5881106cf7e67ca28b24c1890 100644
--- a/tools/Dockerfile.centos6.gpu.devel
+++ b/tools/Dockerfile.centos6.gpu.devel
@@ -44,4 +44,5 @@ RUN yum -y install wget && \
     cd .. && rm -rf Python-3.6.8* && \
     pip3 install google protobuf setuptools wheel flask numpy==1.16.4 && \
     yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
-    yum clean all
+    yum clean all && \
+    echo "export LANG=en_US.utf8" >> /root/.bashrc
diff --git a/tools/Dockerfile.devel b/tools/Dockerfile.devel
index 385e568273eab54f7dfa51a20bb7dcd89cfa98a8..e4bcd33534cb9e887f49fcba5029619aaa1dea4c 100644
--- a/tools/Dockerfile.devel
+++ b/tools/Dockerfile.devel
@@ -21,4 +21,6 @@ RUN yum -y install wget >/dev/null \
     && yum install -y python3 python3-devel \
     && pip3 install google protobuf setuptools wheel flask \
     && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
-    && yum clean all
+    && yum clean all \
+    && localedef -c -i en_US -f UTF-8 en_US.UTF-8 \
+    && echo "export LANG=en_US.utf8" >> /root/.bashrc
diff --git a/tools/Dockerfile.gpu b/tools/Dockerfile.gpu
index bf05080ca72e90b2179f6a717f6f4e86e7aefe29..2f38a3a3cd1c8987d34a81259ec9ad6ba67156a7 100644
--- a/tools/Dockerfile.gpu
+++ b/tools/Dockerfile.gpu
@@ -15,6 +15,7 @@ RUN yum -y install wget && \
     echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> /root/.bashrc && \
     ln -s /usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudnn.so.7 /usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudnn.so && \
     echo 'export LD_LIBRARY_PATH=/usr/local/cuda-9.0/targets/x86_64-linux/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \
+    echo "export LANG=en_US.utf8" >> /root/.bashrc && \
     mkdir -p /usr/local/cuda/extras
 
 COPY --from=builder /usr/local/cuda/extras/CUPTI /usr/local/cuda/extras/CUPTI
diff --git a/tools/Dockerfile.gpu.devel b/tools/Dockerfile.gpu.devel
index 2ffbe4601e1f7e9b05c87f9562b3e0ffc4b967ff..057201cefa1f8de7a105ea9b7f93e7ca9e342777 100644
--- a/tools/Dockerfile.gpu.devel
+++ b/tools/Dockerfile.gpu.devel
@@ -22,4 +22,5 @@ RUN yum -y install wget >/dev/null \
     && yum install -y python3 python3-devel \
     && pip3 install google protobuf setuptools wheel flask \
     && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
-    && yum clean all
+    && yum clean all \
+    && echo "export LANG=en_US.utf8" >> /root/.bashrc