Merge branch 'master' of v9.git.n.xiaomi.com:deep-computing/mace

810784ff · yejianwu · e3a8c484 · 818ef60d · 810784ff · 810784ff
21 changed file
--- a/mace/core/runtime/opencl/opencl_runtime.cc
+++ b/mace/core/runtime/opencl/opencl_runtime.cc
@@ -19,7 +19,7 @@ namespace {
 bool WriteFile(const std::string &filename,
               bool binary,
               const std::vector<unsigned char> &content) {
-  std::ios_base::openmode mode = std::ios::out;
+  std::ios_base::openmode mode = std::ios_base::out | std::ios_base::trunc;
  if (binary) {
    mode |= std::ios::binary;
  }
@@ -124,17 +124,14 @@ cl::CommandQueue &OpenCLRuntime::command_queue() { return *command_queue_; }
 std::string OpenCLRuntime::GenerateCLBinaryFilenamePrefix(
    const std::string &filename_msg) {
-#ifdef MACE_OBFUSCATE_LITERALS
+  // TODO This can be long and slow, fix it
-  return ObfuscateSymbolWithCollision(filename_msg);
-#else
  std::string filename_prefix = filename_msg;
  for (auto it = filename_prefix.begin(); it != filename_prefix.end(); ++it) {
    if (*it == ' ' || *it == '-' || *it == '=') {
      *it = '_';
    }
  }
-  return filename_prefix;
+  return MACE_OBFUSCATE_SYMBOL(filename_prefix);
-#endif
 }
 extern bool GetSourceOrBinaryProgram(const std::string &program_name,
@@ -145,21 +142,24 @@ extern bool GetSourceOrBinaryProgram(const std::string &program_name,
                                     bool *is_opencl_binary);
 void OpenCLRuntime::BuildProgram(const std::string &program_name,
-                                 const std::string &binary_file_name_prefix,
+                                 const std::string &built_program_key,
                                 const std::string &build_options,
                                 cl::Program *program) {
  MACE_CHECK_NOTNULL(program);
-  bool is_opencl_binary = false;
+  std::string binary_file_name_prefix =
+    GenerateCLBinaryFilenamePrefix(built_program_key);
  std::vector<unsigned char> program_vec;
+  bool is_opencl_binary;
  const bool found = GetSourceOrBinaryProgram(program_name,
                                              binary_file_name_prefix,
                                              context(),
                                              device(),
                                              program,
                                              &is_opencl_binary);
-  MACE_CHECK(found, "Program not found source: ", program_name, ", or binary: ",
+  MACE_CHECK(found, "Program not found for ",
-             binary_file_name_prefix);
+                    is_opencl_binary ? "source: " : "binary: ",
+                    built_program_key);
  // Build program
  std::string build_options_str =
@@ -173,7 +173,10 @@ void OpenCLRuntime::BuildProgram(const std::string &program_name,
          program->getBuildInfo<CL_PROGRAM_BUILD_LOG>(device());
      LOG(INFO) << "Program build log: " << build_log;
    }
-    LOG(FATAL) << "Build program failed: " << ret;
+    LOG(FATAL) << "Build program from "
+               << (is_opencl_binary ? "source: " : "binary: ")
+               << built_program_key
+               << " failed: " << ret;
  }
  if (!is_opencl_binary) {
@@ -222,9 +225,7 @@ cl::Kernel OpenCLRuntime::BuildKernel(
  if (built_program_it != built_program_map_.end()) {
    program = built_program_it->second;
  } else {
-    std::string binary_file_name_prefix =
+    this->BuildProgram(program_name, built_program_key,
-      GenerateCLBinaryFilenamePrefix(built_program_key);
-    this->BuildProgram(program_name, binary_file_name_prefix,
                       build_options_str, &program);
    built_program_map_.emplace(built_program_key, program);
  }

--- a/mace/kernels/opencl/addn.cc
+++ b/mace/kernels/opencl/addn.cc
@@ -31,7 +31,7 @@ static void AddN(const std::vector<const Tensor *> &input_tensors,
  auto runtime = OpenCLRuntime::Global();
  std::set<std::string> built_options;
  auto dt = DataTypeToEnum<T>::value;
-  std::string kernel_name = MACE_KERNRL_NAME("addn");
+  std::string kernel_name = MACE_OBFUSCATE_SYMBOL("addn");
  built_options.emplace("-Daddn=" + kernel_name);
  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt));
  built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));

--- a/mace/kernels/opencl/batch_norm_opencl.cc
+++ b/mace/kernels/opencl/batch_norm_opencl.cc
@@ -34,7 +34,7 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(
  auto runtime = OpenCLRuntime::Global();
  std::set<std::string> built_options;
  auto dt = DataTypeToEnum<T>::value;
-  std::string kernel_name = MACE_KERNRL_NAME("batch_norm");
+  std::string kernel_name = MACE_OBFUSCATE_SYMBOL("batch_norm");
  built_options.emplace("-Dbatch_norm=" + kernel_name);
  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt));
  built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));

--- a/mace/kernels/opencl/bias_add_opencl.cc
+++ b/mace/kernels/opencl/bias_add_opencl.cc
@@ -31,7 +31,7 @@ void BiasAddFunctor<DeviceType::OPENCL, T>::operator()(
  auto runtime = OpenCLRuntime::Global();
  std::set<std::string> built_options;
  auto dt = DataTypeToEnum<T>::value;
-  std::string kernel_name = MACE_KERNRL_NAME("bias_add");
+  std::string kernel_name = MACE_OBFUSCATE_SYMBOL("bias_add");
  built_options.emplace("-Dbias_add=" + kernel_name);
  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt));
  built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));

--- a/mace/kernels/opencl/buffer_to_image.cc
+++ b/mace/kernels/opencl/buffer_to_image.cc
@@ -37,7 +37,7 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
      kernel_name = i2b_ ? "arg_image_to_buffer" : "arg_buffer_to_image";
      break;
  }
-  string obfuscated_kernel_name = MACE_KERNRL_NAME(kernel_name);
+  string obfuscated_kernel_name = MACE_OBFUSCATE_SYMBOL(kernel_name);
  std::set<std::string> built_options;
  std::stringstream kernel_name_ss;
  kernel_name_ss << "-D" << kernel_name << "=" << obfuscated_kernel_name;

--- a/mace/kernels/opencl/concat.cc
+++ b/mace/kernels/opencl/concat.cc
@@ -25,7 +25,7 @@ static void Concat2(const Tensor *input0,
  auto runtime = OpenCLRuntime::Global();
  std::set<std::string> built_options;
-  std::string kernel_name = MACE_KERNRL_NAME("concat_channel");
+  std::string kernel_name = MACE_OBFUSCATE_SYMBOL("concat_channel");
  built_options.emplace("-Dconcat_channel=" + kernel_name);
  if (input0->dtype() == output->dtype()) {
    built_options.emplace("-DDATA_TYPE=" + DtToCLDt(dt));

--- a/mace/kernels/opencl/conv_2d_opencl_1x1.cc
+++ b/mace/kernels/opencl/conv_2d_opencl_1x1.cc
@@ -36,7 +36,7 @@ void Conv1x1(const Tensor *input,
  MACE_CHECK(input_batch == batch);
  std::set<std::string> built_options;
-  std::string kernel_name = MACE_KERNRL_NAME("conv_2d_1x1");
+  std::string kernel_name = MACE_OBFUSCATE_SYMBOL("conv_2d_1x1");
  built_options.emplace("-Dconv_2d_1x1=" + kernel_name);
  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt));
  built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));

--- a/mace/kernels/opencl/conv_2d_opencl_3x3.cc
+++ b/mace/kernels/opencl/conv_2d_opencl_3x3.cc
@@ -28,7 +28,7 @@ static void Conv2d3x3S12(const Tensor *input, const Tensor *filter,
  const index_t width_blocks = RoundUpDiv<index_t, 5>(width);
  std::set<std::string> built_options;
-  std::string kernel_name = MACE_KERNRL_NAME("conv_2d_3x3");
+  std::string kernel_name = MACE_OBFUSCATE_SYMBOL("conv_2d_3x3");
  built_options.emplace("-Dconv_2d_3x3=" + kernel_name);
  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt));
  built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));

--- a/mace/kernels/opencl/conv_2d_opencl_general.cc
+++ b/mace/kernels/opencl/conv_2d_opencl_general.cc
@@ -28,7 +28,7 @@ void Conv2dOpencl(const Tensor *input, const Tensor *filter,
  const index_t width_blocks = RoundUpDiv4(width);
  std::set<std::string> built_options;
-  std::string kernel_name = MACE_KERNRL_NAME("conv_2d");
+  std::string kernel_name = MACE_OBFUSCATE_SYMBOL("conv_2d");
  built_options.emplace("-Dconv_2d=" + kernel_name);
  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt));
  built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));

--- a/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc
+++ b/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc
@@ -33,7 +33,7 @@ static void InnerDepthwiseConvOpenclK3x3S12(const Tensor *input,
  auto runtime = OpenCLRuntime::Global();
  std::set<std::string> built_options;
-  std::string kernel_name = MACE_KERNRL_NAME("depthwise_conv_3x3");
+  std::string kernel_name = MACE_OBFUSCATE_SYMBOL("depthwise_conv_3x3");
  built_options.emplace("-Ddepthwise_conv_3x3=" + kernel_name);
  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(input->dtype()));
  built_options.emplace(stride == 1 ? "-DSTRIDE_1" : "");

--- a/mace/kernels/opencl/pooling_opencl.cc
+++ b/mace/kernels/opencl/pooling_opencl.cc
@@ -28,7 +28,7 @@ static void Pooling(const Tensor *input,
  auto runtime = OpenCLRuntime::Global();
  std::set<std::string> built_options;
-  std::string kernel_name = MACE_KERNRL_NAME("pooling");
+  std::string kernel_name = MACE_OBFUSCATE_SYMBOL("pooling");
  built_options.emplace("-Dpooling=" + kernel_name);
  if (type == MAX && input->dtype() == output->dtype()) {
    built_options.emplace("-DDATA_TYPE=" + DtToCLDt(dt));

--- a/mace/kernels/opencl/relu_opencl.cc
+++ b/mace/kernels/opencl/relu_opencl.cc
@@ -32,7 +32,7 @@ void ReluFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
  built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));
  cl::Kernel relu_kernel;
  if (max_limit_ < 0) {
-    std::string kernel_name = MACE_KERNRL_NAME("relu");
+    std::string kernel_name = MACE_OBFUSCATE_SYMBOL("relu");
    built_options.emplace("-Drelu=" + kernel_name);
    relu_kernel  = runtime->BuildKernel("relu", kernel_name, built_options);
@@ -40,7 +40,7 @@ void ReluFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
    relu_kernel.setArg(idx++, *(static_cast<const cl::Image2D *>(input->buffer())));
    relu_kernel.setArg(idx++, *(static_cast<cl::Image2D *>(output->buffer())));
  } else {
-    std::string kernel_name = MACE_KERNRL_NAME("relux");
+    std::string kernel_name = MACE_OBFUSCATE_SYMBOL("relux");
    built_options.emplace("-Drelux=" + kernel_name);
    relu_kernel  = runtime->BuildKernel("relu", kernel_name, built_options);

--- a/mace/kernels/opencl/resize_bilinear_opencl.cc
+++ b/mace/kernels/opencl/resize_bilinear_opencl.cc
@@ -40,7 +40,7 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
  auto runtime = OpenCLRuntime::Global();
  std::set<std::string> built_options;
-  std::string kernel_name = MACE_KERNRL_NAME("resize_bilinear_nocache");
+  std::string kernel_name = MACE_OBFUSCATE_SYMBOL("resize_bilinear_nocache");
  built_options.emplace("-Dresize_bilinear_nocache=" + kernel_name);
  auto dt = DataTypeToEnum<T>::value;
  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt));

--- a/mace/kernels/opencl/softmax_opencl.cc
+++ b/mace/kernels/opencl/softmax_opencl.cc
@@ -26,7 +26,7 @@ void SoftmaxFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *logits,
  auto runtime = OpenCLRuntime::Global();
  std::set<std::string> built_options;
-  std::string kernel_name = MACE_KERNRL_NAME("softmax");
+  std::string kernel_name = MACE_OBFUSCATE_SYMBOL("softmax");
  built_options.emplace("-Dsoftmax=" + kernel_name);
  auto dt = DataTypeToEnum<T>::value;
  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt));

--- a/mace/kernels/opencl/space_to_batch_opencl.cc
+++ b/mace/kernels/opencl/space_to_batch_opencl.cc
@@ -30,7 +30,7 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, T>::operator()(Tensor *space_tensor
    batch_tensor->ResizeImage(output_shape, output_image_shape);
    kernel_name = "space_to_batch";
  }
-  std::string obfuscated_kernel_name = MACE_KERNRL_NAME(kernel_name);
+  std::string obfuscated_kernel_name = MACE_OBFUSCATE_SYMBOL(kernel_name);
  auto runtime = OpenCLRuntime::Global();
  std::set<std::string> built_options;
  std::stringstream kernel_name_ss;

--- a/mace/python/tools/binary_codegen.py
+++ b/mace/python/tools/binary_codegen.py
@@ -14,11 +14,18 @@ FLAGS = None
 def generate_cpp_source():
+  data_map = {}
+  if not os.path.exists(FLAGS.binary_file):
+    env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0]))
+    return env.get_template('str2vec_maps.cc.tmpl').render(
+      maps=data_map,
+      data_type='unsigned int',
+      variable_name=FLAGS.variable_name
+    )
  with open(FLAGS.binary_file, "rb") as binary_file:
    binary_array = np.fromfile(binary_file, dtype=np.uint8)
-  data_map = {}
  idx = 0
  size, = struct.unpack("Q", binary_array[idx:idx+8])
  print size

--- a/mace/utils/tuner.h
+++ b/mace/utils/tuner.h
@@ -46,7 +46,7 @@ class Tuner {
          &param_generator,
      const std::function<RetType(const std::vector<param_type> &)> &func,
      Timer *timer) {
-    std::string obfucated_param_key = MACE_OBFUSCATE_SYMBOLS(param_key);
+    std::string obfucated_param_key = MACE_OBFUSCATE_SYMBOL(param_key);
    if (IsTuning() && param_generator != nullptr) {
      // tune
      std::vector<param_type> opt_param = default_param;
@@ -92,8 +92,7 @@ class Tuner {
          int32_t key_size = kp.first.size();
          ofs.write(reinterpret_cast<char *>(&key_size), sizeof(key_size));
          ofs.write(kp.first.c_str(), key_size);
-          VLOG(1) << "Write tuning param: "
+          VLOG(1) << "Write tuning param: " << kp.first.c_str();
-                  << MACE_OBFUSCATE_SYMBOLS(kp.first.c_str());
          auto &params = kp.second;
          int32_t params_size = params.size() * sizeof(param_type);

--- a/mace/utils/utils.h
+++ b/mace/utils/utils.h
@@ -65,26 +65,41 @@ inline std::string ObfuscateString(const std::string &src) {
 }
 // Obfuscate synbol or path string
-inline std::string ObfuscateSymbolWithCollision(const std::string &src) {
+inline std::string ObfuscateSymbol(const std::string &src) {
-  std::string dest = ObfuscateString(src);
+  std::string dest = src;
+  if (dest.empty()) {
+    return dest;
+  }
+  dest[0] = src[0]; // avoid invalid symbol which starts from 0-9
  const std::string encode_dict =
-    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_";
-  for (int i = 0; i < src.size(); i++) {
+  for (int i = 1; i < src.size(); i++) {
-    dest[i] = encode_dict[dest[i] % encode_dict.size()];
+    char ch = src[i];
+    int idx;
+    if (ch >= '0' && ch <= '9') {
+      idx = ch - '0';
+    } else if (ch >= 'a' && ch <= 'z') {
+      idx = 10 + ch - 'a';
+    } else if (ch >= 'A' && ch <= 'Z') {
+      idx = 10 + 26 + ch - 'a';
+    } else if (ch == '_') {
+      idx = 10 + 26 + 26;
+    } else {
+      dest[i] = ch;
+      continue;
+    }
+    // There is no collision if it's true for every char at every position
+    dest[i] = encode_dict[(idx + i + 31) % encode_dict.size()];
  }
  return std::move(dest);
 }
 #ifdef MACE_OBFUSCATE_LITERALS
 #define MACE_OBFUSCATE_STRING(str) ObfuscateString(str)
-// This table is delibratedly selected to avoid '\0' in genereated literal
+#define MACE_OBFUSCATE_SYMBOL(str) ObfuscateSymbol(str)
-#define MACE_OBFUSCATE_SYMBOLS(str) ObfuscateString(str, "!@#$%^&*()+?")
-// OpenCL will report error if there is name collision
-#define MACE_KERNRL_NAME(name) ObfuscateSymbolWithCollision(name)
 #else
 #define MACE_OBFUSCATE_STRING(str) (str)
-#define MACE_OBFUSCATE_SYMBOLS(str) (str)
+#define MACE_OBFUSCATE_SYMBOL(str) (str)
-#define MACE_KERNRL_NAME(name) (name)
 #endif
 }  //  namespace mace

--- a/tools/gcn.config
+++ b/tools/gcn.config
 TF_INPUT_NODE=input
 TF_OUTPUT_NODE=softmax/Reshape_1
\ No newline at end of file
+TF_OUTPUT_BR_NODE=GCN/br_result_2/fcn_br
\ No newline at end of file
--- a/tools/validate.py
+++ b/tools/validate.py
@@ -21,7 +21,7 @@ from tensorflow import gfile
 def generate_data(shape):
  np.random.seed()
-  data = np.random.random(shape) * -1
+  data = np.random.random(shape) * 2 - 1
  print FLAGS.input_file
  data.astype(np.float32).tofile(FLAGS.input_file)
  print "Generate input file done."

--- a/tools/validate_gcn_dsp.sh
+++ b/tools/validate_gcn_dsp.sh
@@ -27,6 +27,9 @@ KERNEL_DIR="${PHONE_DATA_DIR}/cl/"
 CODEGEN_DIR=${MACE_SOURCE_DIR}/mace/codegen
 MODEL_CODEGEN_DIR=${CODEGEN_DIR}/models/${MODEL_TAG}
 VERSION_SOURCE_PATH=${CODEGEN_DIR}/version
+CL_CODEGEN_DIR=${CODEGEN_DIR}/opencl
+CL_BIN_DIR=${CODEGEN_DIR}/opencl_bin
+TUNING_CODEGEN_DIR=${CODEGEN_DIR}/tuning
 build_and_run()
 {
@@ -71,7 +74,7 @@ mkdir -p ${MODEL_CODEGEN_DIR}
 bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \
                                         --output=${MODEL_CODEGEN_DIR}/mace_gcn${IMAGE_SIZE}.cc \
                                         --input_node=${TF_INPUT_NODE} \
-                                         --output_node=${TF_OUTPUT_NODE} \
+                                         --output_node=${TF_OUTPUT_BR_NODE} \
                                         --data_type=DT_UINT8 \
                                         --runtime=dsp \
                                         --output_type=source \
@@ -84,18 +87,30 @@ rm -rf ${VERSION_SOURCE_PATH}
 mkdir -p ${VERSION_SOURCE_PATH}
 bash mace/tools/git/gen_version_source.sh ${VERSION_SOURCE_PATH}/version.cc
-echo "Step 4: Run model on the phone with files"
+echo "Step 4: Generate OpenCL binary program and config code"
+rm -rf ${CL_BIN_DIR}
+mkdir -p ${CL_BIN_DIR}
+python mace/python/tools/opencl_codegen.py \
+  --cl_binary_dir=${CL_BIN_DIR} --output_path=${CL_CODEGEN_DIR}/opencl_compiled_program.cc
+echo "Step 5: Generate tuning source file"
+rm -rf ${TUNING_CODEGEN_DIR}
+mkdir -p ${TUNING_CODEGEN_DIR}
+python mace/python/tools/binary_codegen.py \
+  --binary_file=${CL_BIN_DIR}/mace_run.config --output_path=${TUNING_CODEGEN_DIR}/tuning_params.cc
+echo "Step 6: Run model on the phone with files"
 build_and_run
-echo "Step 5: Pull the mace run result."
+echo "Step 7: Pull the mace run result."
 rm -rf ${MODEL_DIR}/${OUTPUT_FILE_NAME}
 adb </dev/null pull ${PHONE_DATA_DIR}/${OUTPUT_FILE_NAME} ${MODEL_DIR}
-echo "Step 6: Validate the result"
+echo "Step 8: Validate the result"
 python tools/validate.py --model_file ${TF_MODEL_FILE_PATH} \
    --input_file ${MODEL_DIR}/${INPUT_FILE_NAME} \
    --mace_out_file ${MODEL_DIR}/${OUTPUT_FILE_NAME} \
    --input_node ${TF_INPUT_NODE} \
-    --output_node ${TF_OUTPUT_NODE} \
+    --output_node ${TF_OUTPUT_BR_NODE} \
    --input_shape "${IMAGE_SIZE},${IMAGE_SIZE},3" \
    --output_shape "1,${IMAGE_SIZE},${IMAGE_SIZE},2"