diff --git a/changelogs/unreleased/mk-rake-task-verify-remote-files.yml b/changelogs/unreleased/mk-rake-task-verify-remote-files.yml new file mode 100644 index 0000000000000000000000000000000000000000..772aa11d89b61918851a9aed6af5ed6bee2b1e9e --- /dev/null +++ b/changelogs/unreleased/mk-rake-task-verify-remote-files.yml @@ -0,0 +1,5 @@ +--- +title: Add support for verifying remote uploads, artifacts, and LFS objects in check rake tasks +merge_request: 19501 +author: +type: added diff --git a/doc/administration/raketasks/check.md b/doc/administration/raketasks/check.md index 7d34d35e7d13f729c000a845fa073d9f2bcac056..2649bf61d74c94d30ad6b43f55f639ebe1edffd0 100644 --- a/doc/administration/raketasks/check.md +++ b/doc/administration/raketasks/check.md @@ -78,9 +78,10 @@ Example output: ## Uploaded Files Integrity -Various types of file can be uploaded to a GitLab installation by users. -Checksums are generated and stored in the database upon upload, and integrity -checks using those checksums can be run. These checks also detect missing files. +Various types of files can be uploaded to a GitLab installation by users. +These integrity checks can detect missing files. Additionally, for locally +stored files, checksums are generated and stored in the database upon upload, +and these checks will verify them against current files. Currently, integrity checks are supported for the following types of file: diff --git a/lib/gitlab/verify/batch_verifier.rb b/lib/gitlab/verify/batch_verifier.rb index 1ef369a4b676e93176cac52d44d82516b1f28d06..167ba1b31494954fc073ab287fac92c9a7b44620 100644 --- a/lib/gitlab/verify/batch_verifier.rb +++ b/lib/gitlab/verify/batch_verifier.rb @@ -7,13 +7,15 @@ module Gitlab @batch_size = batch_size @start = start @finish = finish + + fix_google_api_logger end # Yields a Range of IDs and a Hash of failed verifications (object => error) def run_batches(&blk) - relation.in_batches(of: batch_size, start: start, finish: finish) do |relation| # rubocop: disable Cop/InBatches - range = relation.first.id..relation.last.id - failures = run_batch(relation) + all_relation.in_batches(of: batch_size, start: start, finish: finish) do |batch| # rubocop: disable Cop/InBatches + range = batch.first.id..batch.last.id + failures = run_batch_for(batch) yield(range, failures) end @@ -29,24 +31,56 @@ module Gitlab private - def run_batch(relation) - relation.map { |upload| verify(upload) }.compact.to_h + def run_batch_for(batch) + batch.map { |upload| verify(upload) }.compact.to_h end def verify(object) + local?(object) ? verify_local(object) : verify_remote(object) + rescue => err + failure(object, err.inspect) + end + + def verify_local(object) expected = expected_checksum(object) actual = actual_checksum(object) - raise 'Checksum missing' unless expected.present? - raise 'Checksum mismatch' unless expected == actual + return failure(object, 'Checksum missing') unless expected.present? + return failure(object, 'Checksum mismatch') unless expected == actual + + success + end + # We don't calculate checksum for remote objects, so just check existence + def verify_remote(object) + return failure(object, 'Remote object does not exist') unless remote_object_exists?(object) + + success + end + + def success nil - rescue => err - [object, err] + end + + def failure(object, message) + [object, message] + end + + # It's already set to Logger::INFO, but acts as if it is set to + # Logger::DEBUG, and this fixes it... + def fix_google_api_logger + if Object.const_defined?('Google::Apis') + Google::Apis.logger.level = Logger::INFO + end end # This should return an ActiveRecord::Relation suitable for calling #in_batches on - def relation + def all_relation + raise NotImplementedError.new + end + + # Should return true if the object is stored locally + def local?(_object) raise NotImplementedError.new end @@ -59,6 +93,11 @@ module Gitlab def actual_checksum(_object) raise NotImplementedError.new end + + # Be sure to perform a hard check of the remote object (don't just check DB value) + def remote_object_exists?(object) + raise NotImplementedError.new + end end end end diff --git a/lib/gitlab/verify/job_artifacts.rb b/lib/gitlab/verify/job_artifacts.rb index 03500a610743088ba13590dde8b7624c81996f28..dbadfbde9e38118e53031f1f1cbdc716fe6c5d07 100644 --- a/lib/gitlab/verify/job_artifacts.rb +++ b/lib/gitlab/verify/job_artifacts.rb @@ -11,10 +11,14 @@ module Gitlab private - def relation + def all_relation ::Ci::JobArtifact.all end + def local?(artifact) + artifact.local_store? + end + def expected_checksum(artifact) artifact.file_sha256 end @@ -22,6 +26,10 @@ module Gitlab def actual_checksum(artifact) Digest::SHA256.file(artifact.file.path).hexdigest end + + def remote_object_exists?(artifact) + artifact.file.file.exists? + end end end end diff --git a/lib/gitlab/verify/lfs_objects.rb b/lib/gitlab/verify/lfs_objects.rb index 970e2a7b7189d2d14f6329c79314ef9c09d50a56..d3f58a73ac7ae4ebf6902ab66b067929370e1851 100644 --- a/lib/gitlab/verify/lfs_objects.rb +++ b/lib/gitlab/verify/lfs_objects.rb @@ -11,8 +11,12 @@ module Gitlab private - def relation - LfsObject.with_files_stored_locally + def all_relation + LfsObject.all + end + + def local?(lfs_object) + lfs_object.local_store? end def expected_checksum(lfs_object) @@ -22,6 +26,10 @@ module Gitlab def actual_checksum(lfs_object) LfsObject.calculate_oid(lfs_object.file.path) end + + def remote_object_exists?(lfs_object) + lfs_object.file.file.exists? + end end end end diff --git a/lib/gitlab/verify/rake_task.rb b/lib/gitlab/verify/rake_task.rb index dd138e6b92b143a31124219568fa91db62e47081..e190eaddc79cb6ac740efff60fe67fdac381d488 100644 --- a/lib/gitlab/verify/rake_task.rb +++ b/lib/gitlab/verify/rake_task.rb @@ -45,7 +45,7 @@ module Gitlab return unless verbose? failures.each do |object, error| - say " - #{verifier.describe(object)}: #{error.inspect}".color(:red) + say " - #{verifier.describe(object)}: #{error}".color(:red) end end end diff --git a/lib/gitlab/verify/uploads.rb b/lib/gitlab/verify/uploads.rb index 0ffa71a6d724d683eff044641c8e20f191a327c8..01f09ab8df70a5ce32e6226184f9b4be49b12906 100644 --- a/lib/gitlab/verify/uploads.rb +++ b/lib/gitlab/verify/uploads.rb @@ -11,8 +11,12 @@ module Gitlab private - def relation - Upload.with_files_stored_locally + def all_relation + Upload.all + end + + def local?(upload) + upload.local? end def expected_checksum(upload) @@ -22,6 +26,10 @@ module Gitlab def actual_checksum(upload) Upload.hexdigest(upload.absolute_path) end + + def remote_object_exists?(upload) + upload.build_uploader.file.exists? + end end end end diff --git a/spec/lib/gitlab/verify/job_artifacts_spec.rb b/spec/lib/gitlab/verify/job_artifacts_spec.rb index ec490bdfde294dafbad6f9bff11a648a80b41065..6e916a5656490abff31e382dd1dd65728dd136ae 100644 --- a/spec/lib/gitlab/verify/job_artifacts_spec.rb +++ b/spec/lib/gitlab/verify/job_artifacts_spec.rb @@ -21,15 +21,38 @@ describe Gitlab::Verify::JobArtifacts do FileUtils.rm_f(artifact.file.path) expect(failures.keys).to contain_exactly(artifact) - expect(failure).to be_a(Errno::ENOENT) - expect(failure.to_s).to include(artifact.file.path) + expect(failure).to include('No such file or directory') + expect(failure).to include(artifact.file.path) end it 'fails artifacts with a mismatched checksum' do File.truncate(artifact.file.path, 0) expect(failures.keys).to contain_exactly(artifact) - expect(failure.to_s).to include('Checksum mismatch') + expect(failure).to include('Checksum mismatch') + end + + context 'with remote files' do + let(:file) { double(:file) } + + before do + stub_artifacts_object_storage + artifact.update!(file_store: ObjectStorage::Store::REMOTE) + expect(CarrierWave::Storage::Fog::File).to receive(:new).and_return(file) + end + + it 'passes artifacts in object storage that exist' do + expect(file).to receive(:exists?).and_return(true) + + expect(failures).to eq({}) + end + + it 'fails artifacts in object storage that do not exist' do + expect(file).to receive(:exists?).and_return(false) + + expect(failures.keys).to contain_exactly(artifact) + expect(failure).to include('Remote object does not exist') + end end end end diff --git a/spec/lib/gitlab/verify/lfs_objects_spec.rb b/spec/lib/gitlab/verify/lfs_objects_spec.rb index 0f890e2c7ceee5afc792b05e4dc916bfcd7e6b3d..2feaedd6f14907bdfe146231f58bbd9a3de87d5c 100644 --- a/spec/lib/gitlab/verify/lfs_objects_spec.rb +++ b/spec/lib/gitlab/verify/lfs_objects_spec.rb @@ -21,30 +21,37 @@ describe Gitlab::Verify::LfsObjects do FileUtils.rm_f(lfs_object.file.path) expect(failures.keys).to contain_exactly(lfs_object) - expect(failure).to be_a(Errno::ENOENT) - expect(failure.to_s).to include(lfs_object.file.path) + expect(failure).to include('No such file or directory') + expect(failure).to include(lfs_object.file.path) end it 'fails LFS objects with a mismatched oid' do File.truncate(lfs_object.file.path, 0) expect(failures.keys).to contain_exactly(lfs_object) - expect(failure.to_s).to include('Checksum mismatch') + expect(failure).to include('Checksum mismatch') end context 'with remote files' do + let(:file) { double(:file) } + before do stub_lfs_object_storage + lfs_object.update!(file_store: ObjectStorage::Store::REMOTE) + expect(CarrierWave::Storage::Fog::File).to receive(:new).and_return(file) end - it 'skips LFS objects in object storage' do - local_failure = create(:lfs_object) - create(:lfs_object, :object_storage) + it 'passes LFS objects in object storage that exist' do + expect(file).to receive(:exists?).and_return(true) + + expect(failures).to eq({}) + end - failures = {} - described_class.new(batch_size: 10).run_batches { |_, failed| failures.merge!(failed) } + it 'fails LFS objects in object storage that do not exist' do + expect(file).to receive(:exists?).and_return(false) - expect(failures.keys).to contain_exactly(local_failure) + expect(failures.keys).to contain_exactly(lfs_object) + expect(failure).to include('Remote object does not exist') end end end diff --git a/spec/lib/gitlab/verify/uploads_spec.rb b/spec/lib/gitlab/verify/uploads_spec.rb index 85768308edc73fa3f33a0f88a504c63b61aaba15..296866d331972b2176a141d19d692d9691361229 100644 --- a/spec/lib/gitlab/verify/uploads_spec.rb +++ b/spec/lib/gitlab/verify/uploads_spec.rb @@ -23,37 +23,44 @@ describe Gitlab::Verify::Uploads do FileUtils.rm_f(upload.absolute_path) expect(failures.keys).to contain_exactly(upload) - expect(failure).to be_a(Errno::ENOENT) - expect(failure.to_s).to include(upload.absolute_path) + expect(failure).to include('No such file or directory') + expect(failure).to include(upload.absolute_path) end it 'fails uploads with a mismatched checksum' do upload.update!(checksum: 'something incorrect') expect(failures.keys).to contain_exactly(upload) - expect(failure.to_s).to include('Checksum mismatch') + expect(failure).to include('Checksum mismatch') end it 'fails uploads with a missing precalculated checksum' do upload.update!(checksum: '') expect(failures.keys).to contain_exactly(upload) - expect(failure.to_s).to include('Checksum missing') + expect(failure).to include('Checksum missing') end context 'with remote files' do + let(:file) { double(:file) } + before do stub_uploads_object_storage(AvatarUploader) + upload.update!(store: ObjectStorage::Store::REMOTE) + expect(CarrierWave::Storage::Fog::File).to receive(:new).and_return(file) end - it 'skips uploads in object storage' do - local_failure = create(:upload) - create(:upload, :object_storage) + it 'passes uploads in object storage that exist' do + expect(file).to receive(:exists?).and_return(true) + + expect(failures).to eq({}) + end - failures = {} - described_class.new(batch_size: 10).run_batches { |_, failed| failures.merge!(failed) } + it 'fails uploads in object storage that do not exist' do + expect(file).to receive(:exists?).and_return(false) - expect(failures.keys).to contain_exactly(local_failure) + expect(failures.keys).to contain_exactly(upload) + expect(failure).to include('Remote object does not exist') end end end