diff --git a/app/models/upload.rb b/app/models/upload.rb new file mode 100644 index 0000000000000000000000000000000000000000..13987931b0505820cfb4b55268f4ac39d36821b0 --- /dev/null +++ b/app/models/upload.rb @@ -0,0 +1,63 @@ +class Upload < ActiveRecord::Base + # Upper limit for foreground checksum processing + CHECKSUM_THRESHOLD = 100.megabytes + + belongs_to :model, polymorphic: true + + validates :size, presence: true + validates :path, presence: true + validates :model, presence: true + validates :uploader, presence: true + + before_save :calculate_checksum, if: :foreground_checksum? + after_commit :schedule_checksum, unless: :foreground_checksum? + + def self.remove_path(path) + where(path: path).destroy_all + end + + def self.record(uploader) + remove_path(uploader.relative_path) + + create( + size: uploader.file.size, + path: uploader.relative_path, + model: uploader.model, + uploader: uploader.class.to_s + ) + end + + def absolute_path + return path unless relative_path? + + uploader_class.absolute_path(self) + end + + def calculate_checksum + return unless exist? + + self.checksum = Digest::SHA256.file(absolute_path).hexdigest + end + + def exist? + File.exist?(absolute_path) + end + + private + + def foreground_checksum? + size <= CHECKSUM_THRESHOLD + end + + def schedule_checksum + UploadChecksumWorker.perform_async(id) + end + + def relative_path? + !path.start_with?('/') + end + + def uploader_class + Object.const_get(uploader) + end +end diff --git a/app/workers/upload_checksum_worker.rb b/app/workers/upload_checksum_worker.rb new file mode 100644 index 0000000000000000000000000000000000000000..78931f1258fbeddc37a1f23dd470b8e8efa68437 --- /dev/null +++ b/app/workers/upload_checksum_worker.rb @@ -0,0 +1,12 @@ +class UploadChecksumWorker + include Sidekiq::Worker + include DedicatedSidekiqQueue + + def perform(upload_id) + upload = Upload.find(upload_id) + upload.calculate_checksum + upload.save! + rescue ActiveRecord::RecordNotFound + Rails.logger.error("UploadChecksumWorker: couldn't find upload #{upload_id}, skipping") + end +end diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index 97620cc9c7f2c91e558f06a06bd719c46e836e86..824f99e687ea7f2feb86567cd99af1ee7c2b1505 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -29,6 +29,7 @@ - [email_receiver, 2] - [emails_on_push, 2] - [mailers, 2] + - [upload_checksum, 1] - [use_key, 1] - [repository_fork, 1] - [repository_import, 1] diff --git a/db/migrate/20170130221926_create_uploads.rb b/db/migrate/20170130221926_create_uploads.rb new file mode 100644 index 0000000000000000000000000000000000000000..6f06c5dd84082bfe37a11118c0c61d72036956e1 --- /dev/null +++ b/db/migrate/20170130221926_create_uploads.rb @@ -0,0 +1,20 @@ +class CreateUploads < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + def change + create_table :uploads do |t| + t.integer :size, limit: 8, null: false + t.string :path, null: false + t.string :checksum, limit: 64 + t.references :model, polymorphic: true + t.string :uploader, null: false + t.datetime :created_at, null: false + end + + add_index :uploads, :path + add_index :uploads, :checksum + add_index :uploads, [:model_id, :model_type] + end +end diff --git a/db/schema.rb b/db/schema.rb index 6f7dd3e47687ce98c24df8671d5a9ce44c9e9a16..9f31ee8e6c5af8d1d8eb5792430fc0a5a55ed3c6 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -1214,6 +1214,20 @@ ActiveRecord::Schema.define(version: 20170306170512) do add_index "u2f_registrations", ["key_handle"], name: "index_u2f_registrations_on_key_handle", using: :btree add_index "u2f_registrations", ["user_id"], name: "index_u2f_registrations_on_user_id", using: :btree + create_table "uploads", force: :cascade do |t| + t.integer "size", limit: 8, null: false + t.string "path", null: false + t.string "checksum", limit: 64 + t.integer "model_id" + t.string "model_type" + t.string "uploader", null: false + t.datetime "created_at", null: false + end + + add_index "uploads", ["checksum"], name: "index_uploads_on_checksum", using: :btree + add_index "uploads", ["model_id", "model_type"], name: "index_uploads_on_model_id_and_model_type", using: :btree + add_index "uploads", ["path"], name: "index_uploads_on_path", using: :btree + create_table "user_agent_details", force: :cascade do |t| t.string "user_agent", null: false t.string "ip_address", null: false diff --git a/spec/models/upload_spec.rb b/spec/models/upload_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..4c832c87d6afcbffaaa390db292905c39beb1147 --- /dev/null +++ b/spec/models/upload_spec.rb @@ -0,0 +1,151 @@ +require 'rails_helper' + +describe Upload, type: :model do + describe 'assocations' do + it { is_expected.to belong_to(:model) } + end + + describe 'validations' do + it { is_expected.to validate_presence_of(:size) } + it { is_expected.to validate_presence_of(:path) } + it { is_expected.to validate_presence_of(:model) } + it { is_expected.to validate_presence_of(:uploader) } + end + + describe 'callbacks' do + context 'for a file above the checksum threshold' do + it 'schedules checksum calculation' do + stub_const('UploadChecksumWorker', spy) + + upload = described_class.create( + path: __FILE__, + size: described_class::CHECKSUM_THRESHOLD + 1.kilobyte, + model: build_stubbed(:user), + uploader: double('ExampleUploader') + ) + + expect(UploadChecksumWorker) + .to have_received(:perform_async).with(upload.id) + end + end + + context 'for a file at or below the checksum threshold' do + it 'calculates checksum immediately before save' do + upload = described_class.new( + path: __FILE__, + size: described_class::CHECKSUM_THRESHOLD, + model: build_stubbed(:user), + uploader: double('ExampleUploader') + ) + + expect { upload.save } + .to change { upload.checksum }.from(nil) + .to(a_string_matching(/\A\h{64}\z/)) + end + end + end + + describe '.remove_path' do + it 'removes all records at the given path' do + described_class.create!( + size: File.size(__FILE__), + path: __FILE__, + model: build_stubbed(:user), + uploader: 'AvatarUploader' + ) + + expect { described_class.remove_path(__FILE__) }. + to change { described_class.count }.from(1).to(0) + end + end + + describe '.record' do + let(:fake_uploader) do + double( + file: double(size: 12_345), + relative_path: 'foo/bar.jpg', + model: build_stubbed(:user), + class: 'AvatarUploader' + ) + end + + it 'removes existing paths before creation' do + expect(described_class).to receive(:remove_path) + .with(fake_uploader.relative_path) + + described_class.record(fake_uploader) + end + + it 'creates a new record and assigns size, path, model, and uploader' do + upload = described_class.record(fake_uploader) + + aggregate_failures do + expect(upload).to be_persisted + expect(upload.size).to eq fake_uploader.file.size + expect(upload.path).to eq fake_uploader.relative_path + expect(upload.model_id).to eq fake_uploader.model.id + expect(upload.model_type).to eq fake_uploader.model.class.to_s + expect(upload.uploader).to eq fake_uploader.class + end + end + end + + describe '#absolute_path' do + it 'returns the path directly when already absolute' do + path = '/path/to/namespace/project/secret/file.jpg' + upload = described_class.new(path: path) + + expect(upload).not_to receive(:uploader_class) + + expect(upload.absolute_path).to eq path + end + + it "delegates to the uploader's absolute_path method" do + uploader = spy('FakeUploader') + upload = described_class.new(path: 'secret/file.jpg') + expect(upload).to receive(:uploader_class).and_return(uploader) + + upload.absolute_path + + expect(uploader).to have_received(:absolute_path).with(upload) + end + end + + describe '#calculate_checksum' do + it 'calculates the SHA256 sum' do + upload = described_class.new( + path: __FILE__, + size: described_class::CHECKSUM_THRESHOLD - 1.megabyte + ) + expected = Digest::SHA256.file(__FILE__).hexdigest + + expect { upload.calculate_checksum } + .to change { upload.checksum }.from(nil).to(expected) + end + + it 'returns nil for a non-existant file' do + upload = described_class.new( + path: __FILE__, + size: described_class::CHECKSUM_THRESHOLD - 1.megabyte + ) + + expect(upload).to receive(:exist?).and_return(false) + + expect(upload.calculate_checksum).to be_nil + end + end + + describe '#exist?' do + it 'returns true when the file exists' do + upload = described_class.new(path: __FILE__) + + expect(upload).to exist + end + + it 'returns false when the file does not exist' do + upload = described_class.new(path: "#{__FILE__}-nope") + + expect(upload).not_to exist + end + end +end diff --git a/spec/support/carrierwave.rb b/spec/support/carrierwave.rb index 72af2c70324ca10db0d7acf5a568d5299ea7155d..fa9a316b6a22b828b707468c8eb7d6e4b33e63c4 100644 --- a/spec/support/carrierwave.rb +++ b/spec/support/carrierwave.rb @@ -1,7 +1,7 @@ -CarrierWave.root = 'tmp/tests/uploads' +CarrierWave.root = File.expand_path('tmp/tests/uploads', Rails.root) RSpec.configure do |config| config.after(:each) do - FileUtils.rm_rf('tmp/tests/uploads') + FileUtils.rm_rf(CarrierWave.root) end end diff --git a/spec/workers/upload_checksum_worker_spec.rb b/spec/workers/upload_checksum_worker_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..911360da66ca32147f6140244ed5287b6b80362b --- /dev/null +++ b/spec/workers/upload_checksum_worker_spec.rb @@ -0,0 +1,19 @@ +require 'rails_helper' + +describe UploadChecksumWorker do + describe '#perform' do + it 'rescues ActiveRecord::RecordNotFound' do + expect { described_class.new.perform(999_999) }.not_to raise_error + end + + it 'calls calculate_checksum_without_delay and save!' do + upload = spy + expect(Upload).to receive(:find).with(999_999).and_return(upload) + + described_class.new.perform(999_999) + + expect(upload).to have_received(:calculate_checksum) + expect(upload).to have_received(:save!) + end + end +end