diff --git a/docs/getting_started/create_a_model_deployment.rst b/docs/getting_started/create_a_model_deployment.rst
index 918c84a2f33692dd923ccfdffa649f8f83bcda76..46be54ad0bb3ac53f044f07b31e83520130ec360 100644
--- a/docs/getting_started/create_a_model_deployment.rst
+++ b/docs/getting_started/create_a_model_deployment.rst
@@ -35,7 +35,7 @@ Configurations
     * - target_abis
       - The target ABI to build, can be one or more of 'host', 'armeabi-v7a' or 'arm64-v8a'
     * - target_socs
-      - build for specified socs if you just want use the model for that socs.
+      - [optional] build for specified socs if you just want use the model for that socs.
     * - embed_model_data
       - Whether embedding model weights as the code, default to 0
     * - build_type
@@ -50,9 +50,9 @@ Configurations
     * - model_sha256_checksum
       - The SHA256 checksum of the model file
     * - weight_file_path
-      - The path of the model weights file, used by Caffe model
+      - [optional] The path of the model weights file, used by Caffe model
     * - weight_sha256_checksum
-      - The SHA256 checksum of the weight file, used by Caffe model
+      - [optional] The SHA256 checksum of the weight file, used by Caffe model
     * - subgraphs
       - subgraphs key. ** DO NOT EDIT **
     * - input_tensors
@@ -63,6 +63,8 @@ Configurations
       - The shapes of the input tensors, in NHWC order
     * - output_shapes
       - The shapes of the output tensors, in NHWC order
+    * - validation_inputs_data
+      - [optional] Specify Numpy validation inputs. When not provided, [-1, 1] random values will be used
     * - runtime
       - The running device, one of [cpu, gpu, dsp, cpu_gpu]. cpu_gpu contains cpu and gpu model definition so you can run the model on both cpu and gpu.
     * - data_type
@@ -75,5 +77,3 @@ Configurations
       - [optional] Whether to obfuscate the model operator name, default to 0
     * - winograd
       - [optional] Whether to enable Winograd convolution, **will increase memory consumption**
-    * - input_files
-      - [optional] Specify Numpy validation inputs. When not provided, [-1, 1] random values will be used
diff --git a/docs/getting_started/how_to_build.rst b/docs/getting_started/how_to_build.rst
index d07379bf2e0a5ca852ec1baeb2ba231379766941..51c4b0ac8aa0e0db163afc154bb28160d0e836ab 100644
--- a/docs/getting_started/how_to_build.rst
+++ b/docs/getting_started/how_to_build.rst
@@ -366,7 +366,7 @@ The followings list the details.
         ``.pb`` file will be generated only when build_type is ``proto``.
 
 **OpenCL compiled kernel binary file**
-    * ``opencl/compiled_kernel.bin``
+    * ``opencl/${target_abi}/${library_name}_compiled_opencl_kernel.${device_name}.${target_soc}.bin``
 
     .. note::
 
@@ -376,6 +376,13 @@ The followings list the details.
 
         This file rely on the OpenCL driver on the phone, you should update the file when OpenCL driver changed.
 
+**tar package**
+    * ``./build/${library_name}/libmace_${library_name}.tar.gz``
+
+    .. note::
+
+        This file package all the above files which used for deployment.
+
 =============
 5. how to use
 =============
diff --git a/docs/getting_started/how_to_build_zh.rst b/docs/getting_started/how_to_build_zh.rst
index 7945d7a1c6a6df8b95944abee4d2afe717500b55..0695442ea09cc3a88cc563c0fffb495f04edbaf3 100644
--- a/docs/getting_started/how_to_build_zh.rst
+++ b/docs/getting_started/how_to_build_zh.rst
@@ -364,6 +364,16 @@ Mace目前只提供静态库，有以下两种使用场景。
 
         pb文件紧当模型build_type设置为proto时才会产生。
 
+**OpenCL预编译文件**
+    * ``opencl/${target_abi}/${library_name}_compiled_opencl_kernel.${device_name}.${target_soc}.bin``
+
+    .. note::
+
+        只有指定了``target_soc``并且``runtime==gpu``的情况下才会生成。
+
+    .. warning::
+
+        该文件依赖于手机上opencl驱动，如果OpenCL版本变化，请更新该文件。
 
 **库文件tar包**
     * ``./build/${library_name}/libmace_${library_name}.tar.gz``
diff --git a/docs/getting_started/models/demo_app_models.yaml b/docs/getting_started/models/demo_app_models.yaml
index f78dc40a2f383a19eecd373e4ac7cb5bbdea3338..216deea517a3d6b3ef8e7673e90fb1f439655206 100644
--- a/docs/getting_started/models/demo_app_models.yaml
+++ b/docs/getting_started/models/demo_app_models.yaml
@@ -43,10 +43,10 @@ models: # 一个配置文件可以包含多个模型的配置信息，最终生
         output_shapes:
           - 1,256,256,2
           - 1,1,1,2
+        validation_inputs_data:
+          - path/to/input_files # support http://
     runtime: cpu
     limit_opencl_kernel_time: 1
     nnlib_graph_mode: 0
     obfuscate: 1
     winograd: 0
-    input_files:
-      - path/to/input_files # support http://
diff --git a/mace/core/file_storage.cc b/mace/core/file_storage.cc
index 4d93da5106d74f13e1668e71fd6ef4bae4c5fc73..37c2ece13841e5eef49d61cc0103f3217c091ff3 100644
--- a/mace/core/file_storage.cc
+++ b/mace/core/file_storage.cc
@@ -150,8 +150,10 @@ int FileStorage::Load() {
 bool FileStorage::Insert(const std::string &key,
                          const std::vector<unsigned char> &value) {
   utils::WriteLock lock(&data_mutex_);
-  data_.emplace(key, value);
-  data_changed_ = true;
+  auto res = data_.emplace(key, value);
+  if (res.second) {
+    data_changed_ = true;
+  }
   return true;
 }
 
diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc
index 0fad713155671c3df9506e6aea8cf395034dd06c..5235479db1455f1b7830445b6e8d3de1d56da9db 100644
--- a/mace/core/runtime/opencl/opencl_runtime.cc
+++ b/mace/core/runtime/opencl/opencl_runtime.cc
@@ -604,6 +604,11 @@ void OpenCLRuntime::BuildProgramFromSource(
 
     if (this->cache_storage_ != nullptr) {
       this->cache_storage_->Insert(built_program_key, content);
+      // update platform info
+      this->cache_storage_->Insert(
+          kOpenCLPlatformInfoKey,
+          std::vector<unsigned char>(platform_info_.begin(),
+                                     platform_info_.end()));
     }
 
     VLOG(3) << "Program from source: " << built_program_key;
@@ -656,10 +661,6 @@ cl::Kernel OpenCLRuntime::BuildKernel(
 
 void OpenCLRuntime::SaveBuiltCLProgram() {
   if (cache_storage_ != nullptr) {
-    // update platform info
-    cache_storage_->Insert(kOpenCLPlatformInfoKey,
-                           std::vector<unsigned char>(platform_info_.begin(),
-                                                      platform_info_.end()));
     if (cache_storage_->Flush() != 0) {
       LOG(FATAL) << "Store OPENCL compiled kernel to file failed. "
                  << "Please make sure the storage directory exist "
diff --git a/tools/converter.py b/tools/converter.py
index 6d9df68dfba3ff6157ce262b51001799728d2221..0230f144c30e8315558a4d0e239672e2eccc6f24 100644
--- a/tools/converter.py
+++ b/tools/converter.py
@@ -14,6 +14,7 @@
 
 import argparse
 import filelock
+import glob
 import hashlib
 import os
 import re
@@ -40,11 +41,12 @@ from common import StringFormatter
 BUILD_OUTPUT_DIR = 'build'
 PHONE_DATA_DIR = "/data/local/tmp/mace_run"
 MODEL_OUTPUT_DIR_NAME = 'model'
+MODEL_HEADER_DIR_PATH = 'include/mace/public'
 BUILD_TMP_DIR_NAME = '_tmp'
 BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general'
 OUTPUT_LIBRARY_DIR_NAME = 'library'
 OUTPUT_OPENCL_BINARY_DIR_NAME = 'opencl'
-OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel.bin'
+OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel'
 CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin"
 CODEGEN_BASE_DIR = 'mace/codegen'
 MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models'
@@ -434,11 +436,19 @@ def get_build_model_dirs(library_name, model_name, target_abi, target_soc,
     return model_output_base_dir, model_output_dir, mace_model_dir
 
 
-def get_opencl_binary_output_path(library_name):
-    return '%s/%s/%s/%s' % (BUILD_OUTPUT_DIR,
-                            library_name,
-                            OUTPUT_OPENCL_BINARY_DIR_NAME,
-                            OUTPUT_OPENCL_BINARY_FILE_NAME)
+def get_opencl_binary_output_path(library_name, target_abi,
+                                  target_soc, serial_num):
+    device_name = \
+        sh_commands.adb_get_device_name_by_serialno(serial_num)
+    return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \
+           (BUILD_OUTPUT_DIR,
+            library_name,
+            OUTPUT_OPENCL_BINARY_DIR_NAME,
+            target_abi,
+            library_name,
+            OUTPUT_OPENCL_BINARY_FILE_NAME,
+            device_name,
+            target_soc)
 
 
 ################################
@@ -512,9 +522,16 @@ def convert_model(configs):
 
     model_output_dir = \
         '%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME)
+    model_header_dir = \
+        '%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_HEADER_DIR_PATH)
     if os.path.exists(model_output_dir):
         sh.rm("-rf", model_output_dir)
     os.makedirs(model_output_dir)
+    if os.path.exists(model_header_dir):
+        sh.rm("-rf", model_header_dir)
+    os.makedirs(model_header_dir)
+    # copy header files
+    sh.cp("-f", glob.glob("mace/public/*.h"), model_header_dir)
 
     embed_model_data = configs[YAMLKeyword.embed_model_data]
 
@@ -582,14 +599,20 @@ def convert_model(configs):
             configs[YAMLKeyword.build_type],
             data_type)
 
-        # mv pb and data file to build/model_name/model
         if not embed_model_data:
-            sh_commands.mv_model_file_to_output_dir(
-                model_build_type=configs[YAMLKeyword.build_type],
-                model_codegen_dir=model_codegen_dir,
-                model_name=model_name,
-                output_dir=model_output_dir
-            )
+            # mv pb and data file to build/model_name/model
+            sh.mv("-f",
+                  '%s/%s.data' % (model_codegen_dir, model_name),
+                  model_output_dir)
+            if configs[YAMLKeyword.build_type] == BuildType.proto:
+                sh.mv("-f",
+                      '%s/%s.pb' % (model_codegen_dir, model_name),
+                      model_output_dir)
+            else:
+                sh.cp("-f", glob.glob("mace/codegen/engine/*.h"),
+                      model_header_dir)
+                sh.cp("-f", glob.glob("mace/codegen/models/*/*.h"),
+                      model_header_dir)
 
         MaceLogger.summary(
             StringFormatter.block("Model %s converted" % model_name))
@@ -681,9 +704,12 @@ def build_specific_lib(target_abi, target_soc, serial_num,
             binary_changed = True
 
     if binary_changed:
+        opencl_output_bin_path = get_opencl_binary_output_path(
+            library_name, target_abi, target_soc, serial_num
+        )
         sh_commands.merge_opencl_binaries(
             model_output_dirs, CL_COMPILED_BINARY_FILE_NAME,
-            get_opencl_binary_output_path(library_name))
+            opencl_output_bin_path)
         sh_commands.gen_tuning_param_code(model_output_dirs)
         sh_commands.bazel_build(
             MACE_RUN_TARGET,
@@ -837,12 +863,16 @@ def run_specific_target(flags, configs, target_abi,
     library_name = configs[YAMLKeyword.library_name]
     build_type = configs[YAMLKeyword.build_type]
     embed_model_data = configs[YAMLKeyword.embed_model_data]
+    opencl_output_bin_path = ""
     if not configs[YAMLKeyword.target_socs]:
         build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi,
                                                     None, None)
     else:
         build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi,
                                                     target_soc, serial_num)
+        opencl_output_bin_path = get_opencl_binary_output_path(
+            library_name, target_abi, target_soc, serial_num
+        )
     mace_check(os.path.exists(build_tmp_binary_dir),
                ModuleName.RUN,
                'You should build before run.')
@@ -892,6 +922,7 @@ def run_specific_target(flags, configs, target_abi,
             runtime_list.extend([model_runtime])
         for runtime in runtime_list:
             device_type = parse_device_type(runtime)
+
             run_output = sh_commands.tuning_run(
                 abi=target_abi,
                 serialno=serial_num,
@@ -919,7 +950,7 @@ def run_specific_target(flags, configs, target_abi,
                 gpu_priority_hint=flags.gpu_priority_hint,
                 runtime_failure_ratio=flags.runtime_failure_ratio,
                 address_sanitizer=flags.address_sanitizer,
-                opencl_binary_file=get_opencl_binary_output_path(library_name),
+                opencl_binary_file=opencl_output_bin_path,
             )
             if flags.validate:
                 model_file_path, weight_file_path = get_model_files_path(
@@ -978,12 +1009,16 @@ def bm_specific_target(flags, configs, target_abi, target_soc, serial_num):
     library_name = configs[YAMLKeyword.library_name]
     build_type = configs[YAMLKeyword.build_type]
     embed_model_data = configs[YAMLKeyword.embed_model_data]
+    opencl_output_bin_path = ""
     if not configs[YAMLKeyword.target_socs]:
         build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi,
                                                     None, None)
     else:
         build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi,
                                                     target_soc, serial_num)
+        opencl_output_bin_path = get_opencl_binary_output_path(
+            library_name, target_abi, target_soc, serial_num
+        )
     mace_check(os.path.exists(build_tmp_binary_dir),
                ModuleName.BENCHMARK,
                'You should build before benchmark.')
@@ -1052,7 +1087,7 @@ def bm_specific_target(flags, configs, target_abi, target_soc, serial_num):
                 cpu_affinity_policy=flags.cpu_affinity_policy,
                 gpu_perf_hint=flags.gpu_perf_hint,
                 gpu_priority_hint=flags.gpu_priority_hint,
-                opencl_binary_file=get_opencl_binary_output_path(library_name))
+                opencl_binary_file=opencl_output_bin_path)
 
 
 def benchmark_model(flags):
diff --git a/tools/sh_commands.py b/tools/sh_commands.py
index 8282f8840138e3c1bc3f501386d9fe02aff4d094..289f76dcd13f2e4e6957c52e1afcf591a649c1cf 100644
--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py
@@ -400,9 +400,8 @@ def merge_opencl_binaries(binaries_dirs,
         cl_bin_dirs.append(os.path.join(d, "opencl_bin"))
     # create opencl binary output dir
     opencl_binary_dir = os.path.dirname(output_file_path)
-    if os.path.exists(opencl_binary_dir):
-        sh.rm("-rf", opencl_binary_dir)
-    sh.mkdir("-p", opencl_binary_dir)
+    if not os.path.exists(opencl_binary_dir):
+        sh.mkdir("-p", opencl_binary_dir)
     kvs = {}
     for binary_dir in cl_bin_dirs:
         binary_path = os.path.join(binary_dir, cl_compiled_program_file_name)
@@ -578,20 +577,6 @@ def is_binary_tuned(build_tmp_binary_dir):
     return os.path.exists(build_tmp_binary_dir + '/tuned')
 
 
-def mv_model_file_to_output_dir(
-        model_build_type,
-        model_codegen_dir,
-        model_name,
-        output_dir):
-    if model_build_type == BuildType.proto:
-        sh.mv("-f",
-              '%s/%s.pb' % (model_codegen_dir, model_name),
-              output_dir)
-    sh.mv("-f",
-          '%s/%s.data' % (model_codegen_dir, model_name),
-          output_dir)
-
-
 def create_internal_storage_dir(serialno, phone_data_dir):
     internal_storage_dir = "%s/interior/" % phone_data_dir
     sh.adb("-s", serialno, "shell", "mkdir", "-p", internal_storage_dir)
@@ -897,26 +882,15 @@ def merge_libs(target_soc,
                hexagon_mode):
     print("* Merge mace lib")
     project_output_dir = "%s/%s" % (build_output_dir, project_name)
-    model_header_dir = "%s/include/mace/public" % project_output_dir
     hexagon_lib_file = "third_party/nnlib/libhexagon_controller.so"
     library_dir = "%s/%s" % (project_output_dir, library_output_dir)
     model_bin_dir = "%s/%s/" % (library_dir, abi)
 
-    if os.path.exists(model_bin_dir):
-        sh.rm("-rf", model_bin_dir)
-    sh.mkdir("-p", model_bin_dir)
-    if os.path.exists(model_header_dir):
-        sh.rm("-rf", model_header_dir)
-    sh.mkdir("-p", model_header_dir)
-    # copy header files
-    sh.cp("-f", glob.glob("mace/public/*.h"), model_header_dir)
+    if not os.path.exists(model_bin_dir):
+        sh.mkdir("-p", model_bin_dir)
     if hexagon_mode:
         sh.cp("-f", hexagon_lib_file, library_dir)
 
-    if model_build_type == BuildType.code:
-        sh.cp("-f", glob.glob("mace/codegen/engine/*.h"), model_header_dir)
-        sh.cp("-f", glob.glob("mace/codegen/models/*/*.h"), model_header_dir)
-
     # make static library
     mri_stream = ""
     if abi == "host":