blob.rb 6.8 KB
Newer Older
R
Robert Speicher 已提交
1 2 3 4
module Gitlab
  module Git
    class Blob
      include Linguist::BlobHelper
5
      include Gitlab::EncodingHelper
R
Robert Speicher 已提交
6 7 8 9 10

      # This number is the maximum amount of data that we want to display to
      # the user. We load as much as we can for encoding detection
      # (Linguist) and LFS pointer parsing. All other cases where we need full
      # blob data should use load_all_data!.
11
      MAX_DATA_DISPLAY_SIZE = 10.megabytes
R
Robert Speicher 已提交
12 13 14 15 16

      attr_accessor :name, :path, :size, :data, :mode, :id, :commit_id, :loaded_size, :binary

      class << self
        def find(repository, sha, path)
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
          Gitlab::GitalyClient.migrate(:project_raw_show) do |is_enabled|
            if is_enabled
              find_by_gitaly(repository, sha, path)
            else
              find_by_rugged(repository, sha, path)
            end
          end
        end

        def find_by_gitaly(repository, sha, path)
          path = path.sub(/\A\/*/, '')
          path = '/' if path.empty?
          name = File.basename(path)
          entry = Gitlab::GitalyClient::Commit.new(repository).tree_entry(sha, path, MAX_DATA_DISPLAY_SIZE)
          return unless entry

          case entry.type
          when :COMMIT
            new(
              id: entry.oid,
              name: name,
              size: 0,
              data: '',
              path: path,
              commit_id: sha
            )
          when :BLOB
            new(
              id: entry.oid,
              name: name,
              size: entry.size,
              data: entry.data.dup,
              mode: entry.mode.to_s(8),
              path: path,
              commit_id: sha,
J
Jacob Vosmaer 已提交
52
              binary: binary?(entry.data)
53 54 55 56 57
            )
          end
        end

        def find_by_rugged(repository, sha, path)
R
Robert Speicher 已提交
58 59 60 61 62 63 64 65 66 67 68 69 70
          commit = repository.lookup(sha)
          root_tree = commit.tree

          blob_entry = find_entry_by_path(repository, root_tree.oid, path)

          return nil unless blob_entry

          if blob_entry[:type] == :commit
            submodule_blob(blob_entry, path, sha)
          else
            blob = repository.lookup(blob_entry[:oid])

            if blob
71
              new(
R
Robert Speicher 已提交
72 73 74 75 76 77 78 79 80 81 82 83 84 85
                id: blob.oid,
                name: blob_entry[:name],
                size: blob.size,
                data: blob.content(MAX_DATA_DISPLAY_SIZE),
                mode: blob_entry[:filemode].to_s(8),
                path: path,
                commit_id: sha,
                binary: blob.binary?
              )
            end
          end
        end

        def raw(repository, sha)
J
Jacob Vosmaer 已提交
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
          Gitlab::GitalyClient.migrate(:git_blob_raw) do |is_enabled|
            if is_enabled
              Gitlab::GitalyClient::Blob.new(repository).get_blob(oid: sha, limit: MAX_DATA_DISPLAY_SIZE)
            else
              blob = repository.lookup(sha)
    
              new(
                id: blob.oid,
                size: blob.size,
                data: blob.content(MAX_DATA_DISPLAY_SIZE),
                binary: blob.binary?
              )
            end
          end
        end
R
Robert Speicher 已提交
101

J
Jacob Vosmaer 已提交
102 103 104 105 106 107
        def binary?(data)
          # EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks
          # only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15),
          # which is what we use below to keep a consistent behavior.
          detect = CharlockHolmes::EncodingDetector.new(8000).detect(data)
          detect && detect[:type] == :binary
R
Robert Speicher 已提交
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
        end

        # Recursive search of blob id by path
        #
        # Ex.
        #   blog/            # oid: 1a
        #     app/           # oid: 2a
        #       models/      # oid: 3a
        #       file.rb      # oid: 4a
        #
        #
        # Blob.find_entry_by_path(repo, '1a', 'app/file.rb') # => '4a'
        #
        def find_entry_by_path(repository, root_id, path)
          root_tree = repository.lookup(root_id)
          # Strip leading slashes
          path[/^\/*/] = ''
          path_arr = path.split('/')

          entry = root_tree.find do |entry|
            entry[:name] == path_arr[0]
          end

          return nil unless entry

          if path_arr.size > 1
            return nil unless entry[:type] == :tree
            path_arr.shift
            find_entry_by_path(repository, entry[:oid], path_arr.join('/'))
          else
            [:blob, :commit].include?(entry[:type]) ? entry : nil
          end
        end

        def submodule_blob(blob_entry, path, sha)
143
          new(
R
Robert Speicher 已提交
144 145
            id: blob_entry[:oid],
            name: blob_entry[:name],
146
            size: 0,
R
Robert Speicher 已提交
147 148
            data: '',
            path: path,
149
            commit_id: sha
R
Robert Speicher 已提交
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
          )
        end
      end

      def initialize(options)
        %w(id name path size data mode commit_id binary).each do |key|
          self.send("#{key}=", options[key.to_sym])
        end

        @loaded_all_data = false
        # Retain the actual size before it is encoded
        @loaded_size = @data.bytesize if @data
      end

      def binary?
        @binary.nil? ? super : @binary == true
      end

      def data
        encode! @data
      end

      # Load all blob data (not just the first MAX_DATA_DISPLAY_SIZE bytes) into
      # memory as a Ruby string.
      def load_all_data!(repository)
        return if @data == '' # don't mess with submodule blobs
        return @data if @loaded_all_data

178 179 180 181 182 183 184 185 186 187
        Gitlab::GitalyClient.migrate(:git_blob_load_all_data) do |is_enabled|
          @data = begin
            if is_enabled
              Gitlab::GitalyClient::Blob.new(repository).get_blob(oid: id, limit: -1).data
            else
              repository.lookup(id).content
            end
          end
        end

R
Robert Speicher 已提交
188 189
        @loaded_all_data = true
        @loaded_size = @data.bytesize
190
        @binary = nil
R
Robert Speicher 已提交
191 192 193 194 195 196
      end

      def name
        encode! @name
      end

197 198 199 200
      def path
        encode! @path
      end

201 202 203 204
      def truncated?
        size && (size > loaded_size)
      end

R
Robert Speicher 已提交
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
      # Valid LFS object pointer is a text file consisting of
      # version
      # oid
      # size
      # see https://github.com/github/git-lfs/blob/v1.1.0/docs/spec.md#the-pointer
      def lfs_pointer?
        has_lfs_version_key? && lfs_oid.present? && lfs_size.present?
      end

      def lfs_oid
        if has_lfs_version_key?
          oid = data.match(/(?<=sha256:)([0-9a-f]{64})/)
          return oid[1] if oid
        end

        nil
      end

      def lfs_size
        if has_lfs_version_key?
          size = data.match(/(?<=size )([0-9]+)/)
226
          return size[1].to_i if size
R
Robert Speicher 已提交
227 228 229 230 231
        end

        nil
      end

232 233 234 235
      def external_storage
        return unless lfs_pointer?

        :lfs
R
Robert Speicher 已提交
236 237
      end

238 239
      alias_method :external_size, :lfs_size

R
Robert Speicher 已提交
240 241 242 243 244 245 246 247
      private

      def has_lfs_version_key?
        !empty? && text? && data.start_with?("version https://git-lfs.github.com/spec")
      end
    end
  end
end