Merge pull request #9591 from dkurt:feature_dnn_caffe_importer_fp16

3358b891 · Vadim Pisarevsky · 73298ea8 · 8646d5fb · 3358b891 · 3358b891
7 changed file
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -701,6 +701,19 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
    CV_EXPORTS_W Mat blobFromImages(const std::vector<Mat>& images, double scalefactor=1.0,
                                    Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=true);

+    /** @brief Convert all weights of Caffe network to half precision floating point.
+     * @param src Path to origin model from Caffe framework contains single
+     *            precision floating point weights (usually has `.caffemodel` extension).
+     * @param dst Path to destination model with updated weights.
+     *
+     * @note Shrinked model has no origin float32 weights so it can't be used
+     *       in origin Caffe framework anymore. However the structure of data
+     *       is taken from NVidia's Caffe fork: https://github.com/NVIDIA/caffe.
+     *       So the resulting model may be used there.
+     */
+    CV_EXPORTS_W void shrinkCaffeModel(const String& src, const String& dst);
+
+
 //! @}
 CV__DNN_EXPERIMENTAL_NS_END
 }

--- a/modules/dnn/misc/caffe/caffe.pb.cc
+++ b/modules/dnn/misc/caffe/caffe.pb.cc
--- a/modules/dnn/misc/caffe/caffe.pb.h
+++ b/modules/dnn/misc/caffe/caffe.pb.h
@@ -641,6 +641,28 @@ inline bool V0LayerParameter_PoolMethod_Parse(
  return ::google::protobuf::internal::ParseNamedEnum<V0LayerParameter_PoolMethod>(
    V0LayerParameter_PoolMethod_descriptor(), name, value);
 }
+enum Type {
+  DOUBLE = 0,
+  FLOAT = 1,
+  FLOAT16 = 2,
+  INT = 3,
+  UINT = 4
+};
+bool Type_IsValid(int value);
+const Type Type_MIN = DOUBLE;
+const Type Type_MAX = UINT;
+const int Type_ARRAYSIZE = Type_MAX + 1;
+
+const ::google::protobuf::EnumDescriptor* Type_descriptor();
+inline const ::std::string& Type_Name(Type value) {
+  return ::google::protobuf::internal::NameOfEnum(
+    Type_descriptor(), value);
+}
+inline bool Type_Parse(
+    const ::std::string& name, Type* value) {
+  return ::google::protobuf::internal::ParseNamedEnum<Type>(
+    Type_descriptor(), name, value);
+}
 enum Phase {
  TRAIN = 0,
  TEST = 1
@@ -892,6 +914,25 @@ class BlobProto : public ::google::protobuf::Message /* @@protoc_insertion_point
  ::google::protobuf::RepeatedField< double >*
      mutable_double_diff();

+  // optional .caffe.Type raw_data_type = 10;
+  bool has_raw_data_type() const;
+  void clear_raw_data_type();
+  static const int kRawDataTypeFieldNumber = 10;
+  ::caffe::Type raw_data_type() const;
+  void set_raw_data_type(::caffe::Type value);
+
+  // optional bytes raw_data = 12 [packed = false];
+  bool has_raw_data() const;
+  void clear_raw_data();
+  static const int kRawDataFieldNumber = 12;
+  const ::std::string& raw_data() const;
+  void set_raw_data(const ::std::string& value);
+  void set_raw_data(const char* value);
+  void set_raw_data(const void* value, size_t size);
+  ::std::string* mutable_raw_data();
+  ::std::string* release_raw_data();
+  void set_allocated_raw_data(::std::string* raw_data);
+
  // optional int32 num = 1 [default = 0];
  bool has_num() const;
  void clear_num();
@@ -924,6 +965,10 @@ class BlobProto : public ::google::protobuf::Message /* @@protoc_insertion_point
 private:
  inline void set_has_shape();
  inline void clear_has_shape();
+  inline void set_has_raw_data_type();
+  inline void clear_has_raw_data_type();
+  inline void set_has_raw_data();
+  inline void clear_has_raw_data();
  inline void set_has_num();
  inline void clear_has_num();
  inline void set_has_channels();
@@ -944,7 +989,9 @@ class BlobProto : public ::google::protobuf::Message /* @@protoc_insertion_point
  mutable int _double_data_cached_byte_size_;
  ::google::protobuf::RepeatedField< double > double_diff_;
  mutable int _double_diff_cached_byte_size_;
+  ::google::protobuf::internal::ArenaStringPtr raw_data_;
  ::caffe::BlobShape* shape_;
+  int raw_data_type_;
  ::google::protobuf::int32 num_;
  ::google::protobuf::int32 channels_;
  ::google::protobuf::int32 height_;
@@ -12884,15 +12931,94 @@ BlobProto::mutable_double_diff() {
  return &double_diff_;
 }

+// optional .caffe.Type raw_data_type = 10;
+inline bool BlobProto::has_raw_data_type() const {
+  return (_has_bits_[0] & 0x00000020u) != 0;
+}
+inline void BlobProto::set_has_raw_data_type() {
+  _has_bits_[0] |= 0x00000020u;
+}
+inline void BlobProto::clear_has_raw_data_type() {
+  _has_bits_[0] &= ~0x00000020u;
+}
+inline void BlobProto::clear_raw_data_type() {
+  raw_data_type_ = 0;
+  clear_has_raw_data_type();
+}
+inline ::caffe::Type BlobProto::raw_data_type() const {
+  // @@protoc_insertion_point(field_get:caffe.BlobProto.raw_data_type)
+  return static_cast< ::caffe::Type >(raw_data_type_);
+}
+inline void BlobProto::set_raw_data_type(::caffe::Type value) {
+  assert(::caffe::Type_IsValid(value));
+  set_has_raw_data_type();
+  raw_data_type_ = value;
+  // @@protoc_insertion_point(field_set:caffe.BlobProto.raw_data_type)
+}
+
+// optional bytes raw_data = 12 [packed = false];
+inline bool BlobProto::has_raw_data() const {
+  return (_has_bits_[0] & 0x00000040u) != 0;
+}
+inline void BlobProto::set_has_raw_data() {
+  _has_bits_[0] |= 0x00000040u;
+}
+inline void BlobProto::clear_has_raw_data() {
+  _has_bits_[0] &= ~0x00000040u;
+}
+inline void BlobProto::clear_raw_data() {
+  raw_data_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+  clear_has_raw_data();
+}
+inline const ::std::string& BlobProto::raw_data() const {
+  // @@protoc_insertion_point(field_get:caffe.BlobProto.raw_data)
+  return raw_data_.GetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+inline void BlobProto::set_raw_data(const ::std::string& value) {
+  set_has_raw_data();
+  raw_data_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value);
+  // @@protoc_insertion_point(field_set:caffe.BlobProto.raw_data)
+}
+inline void BlobProto::set_raw_data(const char* value) {
+  set_has_raw_data();
+  raw_data_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value));
+  // @@protoc_insertion_point(field_set_char:caffe.BlobProto.raw_data)
+}
+inline void BlobProto::set_raw_data(const void* value, size_t size) {
+  set_has_raw_data();
+  raw_data_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(),
+      ::std::string(reinterpret_cast<const char*>(value), size));
+  // @@protoc_insertion_point(field_set_pointer:caffe.BlobProto.raw_data)
+}
+inline ::std::string* BlobProto::mutable_raw_data() {
+  set_has_raw_data();
+  // @@protoc_insertion_point(field_mutable:caffe.BlobProto.raw_data)
+  return raw_data_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+inline ::std::string* BlobProto::release_raw_data() {
+  // @@protoc_insertion_point(field_release:caffe.BlobProto.raw_data)
+  clear_has_raw_data();
+  return raw_data_.ReleaseNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited());
+}
+inline void BlobProto::set_allocated_raw_data(::std::string* raw_data) {
+  if (raw_data != NULL) {
+    set_has_raw_data();
+  } else {
+    clear_has_raw_data();
+  }
+  raw_data_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), raw_data);
+  // @@protoc_insertion_point(field_set_allocated:caffe.BlobProto.raw_data)
+}
+
 // optional int32 num = 1 [default = 0];
 inline bool BlobProto::has_num() const {
-  return (_has_bits_[0] & 0x00000020u) != 0;
+  return (_has_bits_[0] & 0x00000080u) != 0;
 }
 inline void BlobProto::set_has_num() {
-  _has_bits_[0] |= 0x00000020u;
+  _has_bits_[0] |= 0x00000080u;
 }
 inline void BlobProto::clear_has_num() {
-  _has_bits_[0] &= ~0x00000020u;
+  _has_bits_[0] &= ~0x00000080u;
 }
 inline void BlobProto::clear_num() {
  num_ = 0;
@@ -12910,13 +13036,13 @@ inline void BlobProto::set_num(::google::protobuf::int32 value) {

 // optional int32 channels = 2 [default = 0];
 inline bool BlobProto::has_channels() const {
-  return (_has_bits_[0] & 0x00000040u) != 0;
+  return (_has_bits_[0] & 0x00000100u) != 0;
 }
 inline void BlobProto::set_has_channels() {
-  _has_bits_[0] |= 0x00000040u;
+  _has_bits_[0] |= 0x00000100u;
 }
 inline void BlobProto::clear_has_channels() {
-  _has_bits_[0] &= ~0x00000040u;
+  _has_bits_[0] &= ~0x00000100u;
 }
 inline void BlobProto::clear_channels() {
  channels_ = 0;
@@ -12934,13 +13060,13 @@ inline void BlobProto::set_channels(::google::protobuf::int32 value) {

 // optional int32 height = 3 [default = 0];
 inline bool BlobProto::has_height() const {
-  return (_has_bits_[0] & 0x00000080u) != 0;
+  return (_has_bits_[0] & 0x00000200u) != 0;
 }
 inline void BlobProto::set_has_height() {
-  _has_bits_[0] |= 0x00000080u;
+  _has_bits_[0] |= 0x00000200u;
 }
 inline void BlobProto::clear_has_height() {
-  _has_bits_[0] &= ~0x00000080u;
+  _has_bits_[0] &= ~0x00000200u;
 }
 inline void BlobProto::clear_height() {
  height_ = 0;
@@ -12958,13 +13084,13 @@ inline void BlobProto::set_height(::google::protobuf::int32 value) {

 // optional int32 width = 4 [default = 0];
 inline bool BlobProto::has_width() const {
-  return (_has_bits_[0] & 0x00000100u) != 0;
+  return (_has_bits_[0] & 0x00000400u) != 0;
 }
 inline void BlobProto::set_has_width() {
-  _has_bits_[0] |= 0x00000100u;
+  _has_bits_[0] |= 0x00000400u;
 }
 inline void BlobProto::clear_has_width() {
-  _has_bits_[0] &= ~0x00000100u;
+  _has_bits_[0] &= ~0x00000400u;
 }
 inline void BlobProto::clear_width() {
  width_ = 0;
@@ -28597,6 +28723,11 @@ template <>
 inline const EnumDescriptor* GetEnumDescriptor< ::caffe::V0LayerParameter_PoolMethod>() {
  return ::caffe::V0LayerParameter_PoolMethod_descriptor();
 }
+template <> struct is_proto_enum< ::caffe::Type> : ::google::protobuf::internal::true_type {};
+template <>
+inline const EnumDescriptor* GetEnumDescriptor< ::caffe::Type>() {
+  return ::caffe::Type_descriptor();
+}
 template <> struct is_proto_enum< ::caffe::Phase> : ::google::protobuf::internal::true_type {};
 template <>
 inline const EnumDescriptor* GetEnumDescriptor< ::caffe::Phase>() {
--- a/modules/dnn/src/caffe/caffe.proto
+++ b/modules/dnn/src/caffe/caffe.proto
@@ -50,6 +50,16 @@ syntax = "proto2";

 package caffe;

+// NVidia's Caffe feature is used to store fp16 weights, https://github.com/NVIDIA/caffe:
+// Math and storage types
+enum Type {
+  DOUBLE = 0;
+  FLOAT = 1;
+  FLOAT16 = 2;
+  INT = 3;  // math not supported
+  UINT = 4;  // math not supported
+}
+
 // Specifies the shape (dimensions) of a Blob.
 message BlobShape {
  repeated int64 dim = 1 [packed = true];
@@ -62,6 +72,11 @@ message BlobProto {
  repeated double double_data = 8 [packed = true];
  repeated double double_diff = 9 [packed = true];

+  // NVidia's Caffe fields begin.
+  optional Type raw_data_type = 10;
+  optional bytes raw_data = 12 [packed = false];
+  // NVidia's Caffe fields end.
+
  // 4D dimensions -- deprecated.  Use "shape" instead.
  optional int32 num = 1 [default = 0];
  optional int32 channels = 2 [default = 0];

--- a/modules/dnn/src/caffe/caffe_importer.cpp
+++ b/modules/dnn/src/caffe/caffe_importer.cpp
@@ -225,13 +225,28 @@ public:
        blobShapeFromProto(pbBlob, shape);

        dstBlob.create((int)shape.size(), &shape[0], CV_32F);
-        CV_Assert(pbBlob.data_size() == (int)dstBlob.total());
-
-        CV_DbgAssert(pbBlob.GetDescriptor()->FindFieldByLowercaseName("data")->cpp_type() == FieldDescriptor::CPPTYPE_FLOAT);
        float *dstData = dstBlob.ptr<float>();
+        if (pbBlob.data_size())
+        {
+            // Single precision floats.
+            CV_Assert(pbBlob.data_size() == (int)dstBlob.total());
+
+            CV_DbgAssert(pbBlob.GetDescriptor()->FindFieldByLowercaseName("data")->cpp_type() == FieldDescriptor::CPPTYPE_FLOAT);

-        for (int i = 0; i < pbBlob.data_size(); i++)
-            dstData[i] = pbBlob.data(i);
+            for (int i = 0; i < pbBlob.data_size(); i++)
+                dstData[i] = pbBlob.data(i);
+        }
+        else
+        {
+            // Half precision floats.
+            CV_Assert(pbBlob.raw_data_type() == caffe::FLOAT16);
+            std::string raw_data = pbBlob.raw_data();
+
+            CV_Assert(raw_data.size() / 2 == (int)dstBlob.total());
+
+            Mat halfs((int)shape.size(), &shape[0], CV_16SC1, (void*)raw_data.c_str());
+            convertFp16(halfs, dstBlob);
+        }
    }

    void extractBinaryLayerParms(const caffe::LayerParameter& layer, LayerParams& layerParams)

--- a/modules/dnn/src/caffe/caffe_shrinker.cpp
+++ b/modules/dnn/src/caffe/caffe_shrinker.cpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#include "../precomp.hpp"
+
+#ifdef HAVE_PROTOBUF
+#include <fstream>
+#include "caffe_io.hpp"
+#endif
+
+namespace cv { namespace dnn {
+CV__DNN_EXPERIMENTAL_NS_BEGIN
+
+#ifdef HAVE_PROTOBUF
+
+void shrinkCaffeModel(const String& src, const String& dst)
+{
+    CV_TRACE_FUNCTION();
+
+    caffe::NetParameter net;
+    ReadNetParamsFromBinaryFileOrDie(src.c_str(), &net);
+
+    for (int i = 0; i < net.layer_size(); ++i)
+    {
+        caffe::LayerParameter* lp = net.mutable_layer(i);
+        for (int j = 0; j < lp->blobs_size(); ++j)
+        {
+            caffe::BlobProto* blob = lp->mutable_blobs(j);
+            CV_Assert(blob->data_size() != 0);  // float32 array.
+
+            Mat floats(1, blob->data_size(), CV_32FC1, (void*)blob->data().data());
+            Mat halfs(1, blob->data_size(), CV_16SC1);
+            convertFp16(floats, halfs);  // Convert to float16.
+
+            blob->clear_data();  // Clear float32 data.
+
+            // Set float16 data.
+            blob->set_raw_data(halfs.data, halfs.total() * halfs.elemSize());
+            blob->set_raw_data_type(caffe::FLOAT16);
+        }
+    }
+    size_t msgSize = net.ByteSizeLong();
+    std::vector<uint8_t> output(msgSize);
+    net.SerializeWithCachedSizesToArray(&output[0]);
+
+    std::ofstream ofs(dst.c_str(), std::ios::binary);
+    ofs.write((const char*)&output[0], msgSize);
+    ofs.close();
+}
+
+#else
+
+void shrinkCaffeModel(const String& src, const String& dst)
+{
+    CV_Error(cv::Error::StsNotImplemented, "libprotobuf required to import data from Caffe models");
+}
+
+#endif  // HAVE_PROTOBUF
+
+CV__DNN_EXPERIMENTAL_NS_END
+}} // namespace
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@@ -188,4 +188,46 @@ TEST(Reproducibility_SqueezeNet_v1_1, Accuracy)
    normAssert(ref, out);
 }

+TEST(Reproducibility_AlexNet_fp16, Accuracy)
+{
+    const float l1 = 1e-5;
+    const float lInf = 2e-4;
+
+    const string proto = findDataFile("dnn/bvlc_alexnet.prototxt", false);
+    const string model = findDataFile("dnn/bvlc_alexnet.caffemodel", false);
+
+    shrinkCaffeModel(model, "bvlc_alexnet.caffemodel_fp16");
+    Net net = readNetFromCaffe(proto, "bvlc_alexnet.caffemodel_fp16");
+
+    Mat sample = imread(findDataFile("dnn/grace_hopper_227.png", false));
+
+    net.setInput(blobFromImage(sample, 1, Size(227, 227)));
+    Mat out = net.forward();
+    Mat ref = blobFromNPY(findDataFile("dnn/caffe_alexnet_prob.npy", false));
+    normAssert(ref, out, "", l1, lInf);
+}
+
+TEST(Reproducibility_GoogLeNet_fp16, Accuracy)
+{
+    const float l1 = 1e-5;
+    const float lInf = 3e-3;
+
+    const string proto = findDataFile("dnn/bvlc_googlenet.prototxt", false);
+    const string model = findDataFile("dnn/bvlc_googlenet.caffemodel", false);
+
+    shrinkCaffeModel(model, "bvlc_googlenet.caffemodel_fp16");
+    Net net = readNetFromCaffe(proto, "bvlc_googlenet.caffemodel_fp16");
+
+    std::vector<Mat> inpMats;
+    inpMats.push_back( imread(_tf("googlenet_0.png")) );
+    inpMats.push_back( imread(_tf("googlenet_1.png")) );
+    ASSERT_TRUE(!inpMats[0].empty() && !inpMats[1].empty());
+
+    net.setInput(blobFromImages(inpMats), "data");
+    Mat out = net.forward("prob");
+
+    Mat ref = blobFromNPY(_tf("googlenet_prob.npy"));
+    normAssert(out, ref, "", l1, lInf);
+}
+
 }