diff --git a/changelogs/unreleased/process-commit-worker-migration-encoding.yml b/changelogs/unreleased/process-commit-worker-migration-encoding.yml new file mode 100644 index 0000000000000000000000000000000000000000..26aabd9b647d321512b278284e77d73b1cca98e2 --- /dev/null +++ b/changelogs/unreleased/process-commit-worker-migration-encoding.yml @@ -0,0 +1,4 @@ +--- +title: Encode input when migrating ProcessCommitWorker jobs to prevent migration errors +merge_request: +author: diff --git a/db/migrate/20161124141322_migrate_process_commit_worker_jobs.rb b/db/migrate/20161124141322_migrate_process_commit_worker_jobs.rb index 453a44e271ac9561197b075fbc475e9cd9187660..77e0c40d850c2b4884f06c6ef13baa256a15d119 100644 --- a/db/migrate/20161124141322_migrate_process_commit_worker_jobs.rb +++ b/db/migrate/20161124141322_migrate_process_commit_worker_jobs.rb @@ -47,14 +47,14 @@ class MigrateProcessCommitWorkerJobs < ActiveRecord::Migration hash = { id: commit.oid, - message: commit.message, + message: encode(commit.message), parent_ids: commit.parent_ids, authored_date: commit.author[:time], - author_name: commit.author[:name], - author_email: commit.author[:email], + author_name: encode(commit.author[:name]), + author_email: encode(commit.author[:email]), committed_date: commit.committer[:time], - committer_email: commit.committer[:email], - committer_name: commit.committer[:name] + committer_email: encode(commit.committer[:email]), + committer_name: encode(commit.committer[:name]) } payload['args'][2] = hash @@ -89,4 +89,14 @@ class MigrateProcessCommitWorkerJobs < ActiveRecord::Migration end end end + + def encode(data) + encoding = Encoding::UTF_8 + + if data.encoding == encoding + data + else + data.encode(encoding, invalid: :replace, undef: :replace) + end + end end diff --git a/spec/migrations/migrate_process_commit_worker_jobs_spec.rb b/spec/migrations/migrate_process_commit_worker_jobs_spec.rb index 52428547a9f451a671ea4e0f6220c681fc6c8926..6a93deb5412e54591fbae25d71418a2c2d05fea6 100644 --- a/spec/migrations/migrate_process_commit_worker_jobs_spec.rb +++ b/spec/migrations/migrate_process_commit_worker_jobs_spec.rb @@ -1,3 +1,5 @@ +# encoding: utf-8 + require 'spec_helper' require Rails.root.join('db', 'migrate', '20161124141322_migrate_process_commit_worker_jobs.rb') @@ -59,6 +61,10 @@ describe MigrateProcessCommitWorkerJobs do Sidekiq.redis { |r| r.llen('queue:process_commit') } end + def pop_job + JSON.load(Sidekiq.redis { |r| r.lpop('queue:process_commit') }) + end + before do Sidekiq.redis do |redis| job = JSON.dump(args: [project.id, user.id, commit.oid]) @@ -92,11 +98,28 @@ describe MigrateProcessCommitWorkerJobs do expect(job_count).to eq(1) end + it 'encodes data to UTF-8' do + allow_any_instance_of(Rugged::Repository).to receive(:lookup). + with(commit.oid). + and_return(commit) + + allow(commit).to receive(:message). + and_return('김치'.force_encoding('BINARY')) + + migration.up + + job = pop_job + + # We don't care so much about what is being stored, instead we just want + # to make sure the encoding is right so that JSON encoding the data + # doesn't produce any errors. + expect(job['args'][2]['message'].encoding).to eq(Encoding::UTF_8) + end + context 'a migrated job' do let(:job) do migration.up - - JSON.load(Sidekiq.redis { |r| r.lpop('queue:process_commit') }) + pop_job end let(:commit_hash) do