diff --git a/python/examples/pipeline/PaddleSeg/N0060.jpg b/python/examples/pipeline/PaddleSeg/N0060.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..feac2837eaa5ae5db414d9769a0c5a830dde268d
Binary files /dev/null and b/python/examples/pipeline/PaddleSeg/N0060.jpg differ
diff --git a/python/examples/pipeline/PaddleSeg/benchmark.py b/python/examples/pipeline/PaddleSeg/benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..10078994047abab5e9790a1b32d9692c89400ec6
--- /dev/null
+++ b/python/examples/pipeline/PaddleSeg/benchmark.py
@@ -0,0 +1,113 @@
+import sys
+import os
+import base64
+import yaml
+import requests
+import time
+import json
+try:
+    from paddle_serving_server_gpu.pipeline import PipelineClient
+except ImportError:
+    from paddle_serving_server.pipeline import PipelineClient
+import numpy as np
+from paddle_serving_client.utils import MultiThreadRunner
+from paddle_serving_client.utils import benchmark_args, show_latency
+def parse_benchmark(filein, fileout):
+    with open(filein, "r") as fin:
+        res = yaml.load(fin)
+        del_list = []
+        for key in res["DAG"].keys():
+            if "call" in key:
+                del_list.append(key)
+        for key in del_list:
+            del res["DAG"][key]
+    with open(fileout, "w") as fout:
+        yaml.dump(res, fout, default_flow_style=False)
+
+def gen_yml(device, gpu_id):
+    fin = open("config.yml", "r")
+    config = yaml.load(fin)
+    fin.close()
+    config["dag"]["tracer"] = {"interval_s": 10}
+    if device == "gpu":
+        config["op"]["imagenet"]["local_service_conf"]["device_type"] = 1
+        config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
+    else:
+        config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
+    with open("config2.yml", "w") as fout: 
+        yaml.dump(config, fout, default_flow_style=False)
+
+def cv2_to_base64(image):
+    return base64.b64encode(image).decode('utf8')
+
+def run_http(idx, batch_size):
+    print("start thread ({})".format(idx))
+    url = "http://127.0.0.1:18080/imagenet/prediction"    
+    start = time.time()
+
+    with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
+        image_data1 = file.read()
+    image = cv2_to_base64(image_data1)
+    keys, values = [], []
+    for i in range(batch_size):
+        keys.append("image_{}".format(i))
+        values.append(image)
+    data = {"key": keys, "value": values}
+    start_time = time.time()
+    while True:
+        r = requests.post(url=url, data=json.dumps(data))
+        print(r.json())
+        if time.time() - start_time > 20:
+            break
+    end = time.time()
+    return [[end - start]]
+
+def multithread_http(thread, batch_size):
+    multi_thread_runner = MultiThreadRunner()
+    result = multi_thread_runner.run(run_http , thread, batch_size)
+
+def run_rpc(thread, batch_size):
+    client = PipelineClient()
+    client.connect(['127.0.0.1:18080'])
+    start = time.time()
+    test_img_dir = "imgs/"
+    for img_file in os.listdir(test_img_dir):
+        with open(os.path.join(test_img_dir, img_file), 'rb') as file:
+            image_data = file.read()
+        image = cv2_to_base64(image_data)
+        start_time = time.time()
+        while True:
+            ret = client.predict(feed_dict={"image": image}, fetch=["res"])
+            if time.time() - start_time > 10:
+                break
+    end = time.time()
+    return [[end - start]]
+
+
+def multithread_rpc(thraed, batch_size):
+    multi_thread_runner = MultiThreadRunner()
+    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+
+if __name__ == "__main__":
+    if sys.argv[1] == "yaml":
+        mode = sys.argv[2] # brpc/  local predictor
+        thread = int(sys.argv[3])
+        device = sys.argv[4]
+        if device == "gpu":
+            gpu_id = sys.argv[5]
+        else:
+            gpu_id = None
+        gen_yml(device, gpu_id)
+    elif sys.argv[1] == "run":
+        mode = sys.argv[2] # http/ rpc
+        thread = int(sys.argv[3])
+        batch_size = int(sys.argv[4])
+        if mode == "http":
+            multithread_http(thread, batch_size)
+        elif mode == "rpc":
+            multithread_rpc(thread, batch_size)
+    elif sys.argv[1] == "dump":
+        filein = sys.argv[2]
+        fileout = sys.argv[3]
+        parse_benchmark(filein, fileout)
+    
diff --git a/python/examples/pipeline/PaddleSeg/benchmark.sh b/python/examples/pipeline/PaddleSeg/benchmark.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7c5a57f5430c49cf3b18f155c331b29939de060b
--- /dev/null
+++ b/python/examples/pipeline/PaddleSeg/benchmark.sh
@@ -0,0 +1,42 @@
+export FLAGS_profile_pipeline=1
+alias python3="python3.7"
+modelname="imagenet"
+use_gpu=0
+gpu_id="0"
+benchmark_config_filename="benchmark_config.yaml"
+
+# HTTP
+ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
+sleep 3
+if [ $use_gpu -eq 1 ]; then
+  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
+else
+  python3 benchmark.py yaml local_predictor 1 cpu
+fi
+rm -rf profile_log_$modelname
+for thread_num in 1
+do
+  for batch_size in 1
+  do
+    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
+    rm -rf PipelineServingLogs
+    rm -rf cpu_utilization.py
+    python3 resnet50_web_service.py >web.log 2>&1 &
+    sleep 3
+    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
+    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
+    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
+    python3 benchmark.py run http $thread_num $batch_size
+    python3 cpu_utilization.py >>profile_log_$modelname
+    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
+    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
+    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
+    python3 benchmark.py dump benchmark.log benchmark.tmp
+    mv benchmark.tmp benchmark.log
+    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
+    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
+    cat benchmark.log >> profile_log_$modelname
+    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
+    #rm -rf gpu_use.log gpu_utilization.log
+  done
+done
diff --git a/python/examples/pipeline/PaddleSeg/config.yml b/python/examples/pipeline/PaddleSeg/config.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0923c9626d080b52c7b9d439859c58590fbbe4fd
--- /dev/null
+++ b/python/examples/pipeline/PaddleSeg/config.yml
@@ -0,0 +1,33 @@
+#worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程，每个进程内构建grpcSever和DAG
+##当build_dag_each_worker=False时，框架会设置主线程grpc线程池的max_workers=worker_num
+worker_num: 1
+
+#http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时，不自动生成http_port
+http_port: 18080
+rpc_port: 9993
+
+dag:
+    #op资源类型, True, 为线程模型；False，为进程模型
+    is_thread_op: False
+op:
+    seg:
+        #当op配置没有server_endpoints时，从local_service_conf读取本地服务配置
+        local_service_conf:
+
+            #并发数，is_thread_op=True时，为线程并发；否则为进程并发
+            concurrency: 1
+
+            #uci模型路径
+            model_config: unet_model/
+
+            #计算硬件类型: 空缺时由devices决定(CPU/GPU)，0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+            device_type: 1
+
+            #计算硬件ID，当devices为""或不写时为CPU预测；当devices为"0", "0,1,2"时为GPU预测，表示使用的GPU卡
+            devices: "0" # "0,1"
+
+            #client类型，包括brpc, grpc和local_predictor.local_predictor不启动Serving服务，进程内预测
+            client_type: local_predictor
+
+            #Fetch结果列表，以client_config中fetch_var的alias_name为准
+            fetch_list: ["output"] 
diff --git a/python/examples/pipeline/PaddleSeg/web_service.py b/python/examples/pipeline/PaddleSeg/web_service.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a0bbe4c67f45027d8232cf5fe36d1a1929a503a
--- /dev/null
+++ b/python/examples/pipeline/PaddleSeg/web_service.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+from paddle_serving_app.reader import Sequential, Resize, SegPostprocess
+try:
+    from paddle_serving_server_gpu.web_service import WebService, Op
+except ImportError:
+    from paddle_serving_server.web_service import WebService, Op
+import logging
+import numpy as np
+import base64, cv2
+
+
+class SegOp(Op):
+    def init_op(self):
+        self.seq = Sequential([Resize((512, 512), interpolation=cv2.INTER_LINEAR)])
+        self.seg_postprocess=SegPostprocess(2)
+
+    def preprocess(self, input_dicts, data_id, log_id):
+        (_, input_dict), = input_dicts.items()
+        batch_size = len(input_dict.keys())
+        imgs = []
+        for key in input_dict.keys():
+            data = base64.b64decode(input_dict[key].encode('utf8'))
+            data = np.fromstring(data, np.uint8)
+            im = cv2.imdecode(data, cv2.IMREAD_COLOR)
+            img = self.seq(im)
+            imgs.append(img[np.newaxis, :].copy())
+        input_imgs = np.concatenate(imgs, axis=0)
+        print(input_imgs.shape)
+        return {"seg": input_imgs}, False, None, ""
+
+    def postprocess(self, input_dicts, fetch_dict, log_id):
+        print(fetch_dict)
+        return fetch_dict["output"].tolist(), None, ""
+
+
+class ImageService(WebService):
+    def get_pipeline_response(self, read_op):
+        seg_op = SegOp(name="seg", input_ops=[read_op])
+        return seg_op
+
+
+uci_service = ImageService(name="seg")
+uci_service.prepare_pipeline_config("config2.yml")
+uci_service.run_service()