diff --git a/lib/api/entities.rb b/lib/api/entities.rb index 0c63dc345c2d45ce4e60cfc16a7fb3e043d8b98a..1d224d7bc217c7fdcca350c05438f01cadeb73cc 100644 --- a/lib/api/entities.rb +++ b/lib/api/entities.rb @@ -295,7 +295,7 @@ module API expose :new_file?, as: :new_file expose :renamed_file?, as: :renamed_file expose :deleted_file?, as: :deleted_file - expose :diff + expose :json_safe_diff, as: :diff end class ProtectedRefAccess < Grape::Entity diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb index c5e173ba55a9609d209046f7ac15e48348e23ef6..21a14141c87a337453ba558106dce2eef072b0f1 100644 --- a/lib/gitlab/encoding_helper.rb +++ b/lib/gitlab/encoding_helper.rb @@ -24,7 +24,7 @@ module Gitlab # return message if message type is binary detect = CharlockHolmes::EncodingDetector.detect(message) - return message.force_encoding("BINARY") if binary?(message, detect) + return message.force_encoding("BINARY") if all_binary?(message, detect) if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD # force detected encoding if we have sufficient confidence. @@ -34,14 +34,21 @@ module Gitlab # encode and clean the bad chars message.replace clean(message) rescue => e - byebug encoding = detect ? detect[:encoding] : "unknown" "--broken encoding: #{encoding}" end - def binary?(message, detect=nil) - detect ||= CharlockHolmes::EncodingDetector.detect(message) - detect && detect[:type] == :binary && detect[:confidence] == 100 + def all_binary?(data, detect=nil) + detect ||= CharlockHolmes::EncodingDetector.detect(data) + detect && detect[:type] == :binary + end + + def libgit2_binary?(data) + # EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks + # only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15), + # which is what we use below to keep a consistent behavior. + detect = CharlockHolmes::EncodingDetector.new(8000).detect(data) + all_binary?(data, detect) end def encode_utf8(message) diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb index 7780f4e4d4f0bd075b199739f4df43d37176ccd2..2e6edb8be0da2e8d921fb9c297558253cda3fab9 100644 --- a/lib/gitlab/git/blob.rb +++ b/lib/gitlab/git/blob.rb @@ -42,14 +42,6 @@ module Gitlab end end - def binary?(data) - # EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks - # only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15), - # which is what we use below to keep a consistent behavior. - detect = CharlockHolmes::EncodingDetector.new(8000).detect(data) - detect && detect[:type] == :binary - end - # Returns an array of Blob instances, specified in blob_references as # [[commit_sha, path], [commit_sha, path], ...]. If blob_size_limit < 0 then the # full blob contents are returned. If blob_size_limit >= 0 then each blob will @@ -169,6 +161,10 @@ module Gitlab end end end + + def binary?(data) + EncodingHelper.libgit2_binary?(data) + end end def initialize(options) diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb index e544f255a5d0368f33f3f2b4ec4723dd16092774..17defe55af0e493184164bf16e707673c2a985b4 100644 --- a/lib/gitlab/git/diff.rb +++ b/lib/gitlab/git/diff.rb @@ -197,6 +197,13 @@ module Gitlab @collapsed = true end + def json_safe_diff + return @diff unless all_binary?(@diff) + + # the diff is binary, let's make a message for it + Diff::binary_message(@old_path, @new_path) + end + private def init_from_rugged(rugged) @@ -221,14 +228,7 @@ module Gitlab # binary we're not going to display anything so we skip the size check. return if !patch.delta.binary? && prune_large_patch(patch) - diff = strip_diff_headers(patch.to_s) - @diff = if binary?(diff) - # the diff is binary, let's make a message for it - Diff::binary_message(patch.delta.old_file[:path], - patch.delta.new_file[:path]) - else - encode!(diff) - end + @diff = encode!(strip_diff_headers(patch.to_s)) end def init_from_hash(hash)