diff --git a/Gemfile b/Gemfile index b3eeb3ec0ec6ca465faace518b5c5d74a503f2a4..e632a083acf1211f1d3461086ef18aecc4eaf93e 100644 --- a/Gemfile +++ b/Gemfile @@ -57,6 +57,7 @@ gem 'u2f', '~> 0.2.1' # GitLab Pages gem 'validates_hostname', '~> 1.0.6' +gem 'rubyzip', '~> 1.2.2', require: false # Browser detection gem 'browser', '~> 2.5' diff --git a/Gemfile.lock b/Gemfile.lock index 419a683192411e30f7addbd2f01471be141735d8..d8982614d8b199a61154759c7429bdba4f59f882 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1137,6 +1137,7 @@ DEPENDENCIES ruby-prof (~> 0.17.0) ruby-progressbar ruby_parser (~> 3.8) + rubyzip (~> 1.2.2) rugged (~> 0.27) sanitize (~> 4.6) sass (~> 3.5) diff --git a/app/services/projects/update_pages_service.rb b/app/services/projects/update_pages_service.rb index eb2478be3cfc1bfd41cd46a3ff38a505d7a0878b..5caeb4cfa5fec3d487f4439c0a45b22c5fdb325d 100644 --- a/app/services/projects/update_pages_service.rb +++ b/app/services/projects/update_pages_service.rb @@ -7,7 +7,11 @@ module Projects BLOCK_SIZE = 32.kilobytes MAX_SIZE = 1.terabyte - SITE_PATH = 'public/'.freeze + PUBLIC_DIR = 'public'.freeze + + # this has to be invalid group name, + # as it shares the namespace with groups + TMP_EXTRACT_PATH = '@pages.tmp'.freeze attr_reader :build @@ -27,12 +31,11 @@ module Projects raise InvalidStateError, 'pages are outdated' unless latest? # Create temporary directory in which we will extract the artifacts - FileUtils.mkdir_p(tmp_path) - Dir.mktmpdir(nil, tmp_path) do |archive_path| + make_secure_tmp_dir(tmp_path) do |archive_path| extract_archive!(archive_path) # Check if we did extract public directory - archive_public_path = File.join(archive_path, 'public') + archive_public_path = File.join(archive_path, PUBLIC_DIR) raise InvalidStateError, 'pages miss the public folder' unless Dir.exist?(archive_public_path) raise InvalidStateError, 'pages are outdated' unless latest? @@ -85,22 +88,18 @@ module Projects raise InvalidStateError, 'missing artifacts metadata' unless build.artifacts_metadata? # Calculate page size after extract - public_entry = build.artifacts_metadata_entry(SITE_PATH, recursive: true) + public_entry = build.artifacts_metadata_entry(PUBLIC_DIR + '/', recursive: true) if public_entry.total_size > max_size raise InvalidStateError, "artifacts for pages are too large: #{public_entry.total_size}" end - # Requires UnZip at least 6.00 Info-ZIP. - # -qq be (very) quiet - # -n never overwrite existing files - # We add * to end of SITE_PATH, because we want to extract SITE_PATH and all subdirectories - site_path = File.join(SITE_PATH, '*') build.artifacts_file.use_file do |artifacts_path| - unless system(*%W(unzip -n #{artifacts_path} #{site_path} -d #{temp_path})) - raise FailedToExtractError, 'pages failed to extract' - end + SafeZip::Extract.new(artifacts_path) + .extract(directories: [PUBLIC_DIR], to: temp_path) end + rescue SafeZip::Extract::Error => e + raise FailedToExtractError, e.message end def deploy_page!(archive_public_path) @@ -139,7 +138,7 @@ module Projects end def tmp_path - @tmp_path ||= File.join(::Settings.pages.path, 'tmp') + @tmp_path ||= File.join(::Settings.pages.path, TMP_EXTRACT_PATH) end def pages_path @@ -147,11 +146,11 @@ module Projects end def public_path - @public_path ||= File.join(pages_path, 'public') + @public_path ||= File.join(pages_path, PUBLIC_DIR) end def previous_public_path - @previous_public_path ||= File.join(pages_path, "public.#{SecureRandom.hex}") + @previous_public_path ||= File.join(pages_path, "#{PUBLIC_DIR}.#{SecureRandom.hex}") end def ref @@ -188,5 +187,15 @@ module Projects def pages_deployments_failed_total_counter @pages_deployments_failed_total_counter ||= Gitlab::Metrics.counter(:pages_deployments_failed_total, "Counter of GitLab Pages deployments which failed") end + + def make_secure_tmp_dir(tmp_path) + FileUtils.mkdir_p(tmp_path) + path = Dir.mktmpdir(nil, tmp_path) + begin + yield(path) + ensure + FileUtils.remove_entry_secure(path) + end + end end end diff --git a/changelogs/unreleased/extract-pages-with-rubyzip.yml b/changelogs/unreleased/extract-pages-with-rubyzip.yml new file mode 100644 index 0000000000000000000000000000000000000000..8352e79d3e557d7b8747865d072e6d54b6910412 --- /dev/null +++ b/changelogs/unreleased/extract-pages-with-rubyzip.yml @@ -0,0 +1,5 @@ +--- +title: Extract GitLab Pages using RubyZip +merge_request: +author: +type: security diff --git a/lib/safe_zip/entry.rb b/lib/safe_zip/entry.rb new file mode 100644 index 0000000000000000000000000000000000000000..664e2f52f914abc3e320edb6c5e8c6e2cc8d9417 --- /dev/null +++ b/lib/safe_zip/entry.rb @@ -0,0 +1,97 @@ +# frozen_string_literal: true + +module SafeZip + class Entry + attr_reader :zip_archive, :zip_entry + attr_reader :path, :params + + def initialize(zip_archive, zip_entry, params) + @zip_archive = zip_archive + @zip_entry = zip_entry + @params = params + @path = ::File.expand_path(zip_entry.name, params.extract_path) + end + + def path_dir + ::File.dirname(path) + end + + def real_path_dir + ::File.realpath(path_dir) + end + + def exist? + ::File.exist?(path) + end + + def extract + # do not extract if file is not part of target directory + return false unless matching_target_directory + + # do not overwrite existing file + raise SafeZip::Extract::AlreadyExistsError, "File already exists #{zip_entry.name}" if exist? + + create_path_dir + + if zip_entry.file? + extract_file + elsif zip_entry.directory? + extract_dir + elsif zip_entry.symlink? + extract_symlink + else + raise SafeZip::Extract::UnsupportedEntryError, "File #{zip_entry.name} cannot be extracted" + end + rescue SafeZip::Extract::Error + raise + rescue => e + raise SafeZip::Extract::ExtractError, e.message + end + + private + + def extract_file + zip_archive.extract(zip_entry, path) + end + + def extract_dir + FileUtils.mkdir(path) + end + + def extract_symlink + source_path = read_symlink + real_source_path = expand_symlink(source_path) + + # ensure that source path of symlink is within target directories + unless real_source_path.start_with?(matching_target_directory) + raise SafeZip::Extract::PermissionDeniedError, "Symlink cannot be created targeting: #{source_path}" + end + + ::File.symlink(source_path, path) + end + + def create_path_dir + # Create all directories, but ignore permissions + FileUtils.mkdir_p(path_dir) + + # disallow to make path dirs to point to another directories + unless path_dir == real_path_dir + raise SafeZip::Extract::PermissionDeniedError, "Directory of #{zip_entry.name} points to another directory" + end + end + + def matching_target_directory + params.matching_target_directory(path) + end + + def read_symlink + zip_archive.read(zip_entry) + end + + def expand_symlink(source_path) + ::File.realpath(source_path, path_dir) + rescue + raise SafeZip::Extract::SymlinkSourceDoesNotExistError, "Symlink source #{source_path} does not exist" + end + end +end diff --git a/lib/safe_zip/extract.rb b/lib/safe_zip/extract.rb new file mode 100644 index 0000000000000000000000000000000000000000..3bd4935ef34a11fca695b66051e3d24810936fdb --- /dev/null +++ b/lib/safe_zip/extract.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +module SafeZip + class Extract + Error = Class.new(StandardError) + PermissionDeniedError = Class.new(Error) + SymlinkSourceDoesNotExistError = Class.new(Error) + UnsupportedEntryError = Class.new(Error) + AlreadyExistsError = Class.new(Error) + NoMatchingError = Class.new(Error) + ExtractError = Class.new(Error) + + attr_reader :archive_path + + def initialize(archive_file) + @archive_path = archive_file + end + + def extract(opts = {}) + params = SafeZip::ExtractParams.new(**opts) + + if Feature.enabled?(:safezip_use_rubyzip, default_enabled: true) + extract_with_ruby_zip(params) + else + legacy_unsafe_extract_with_system_zip(params) + end + end + + private + + def extract_with_ruby_zip(params) + Zip::File.open(archive_path) do |zip_archive| + # Extract all files in the following order: + # 1. Directories first, + # 2. Files next, + # 3. Symlinks last (or anything else) + extracted = extract_all_entries(zip_archive, params, + zip_archive.lazy.select(&:directory?)) + + extracted += extract_all_entries(zip_archive, params, + zip_archive.lazy.select(&:file?)) + + extracted += extract_all_entries(zip_archive, params, + zip_archive.lazy.reject(&:directory?).reject(&:file?)) + + raise NoMatchingError, 'No entries extracted' unless extracted > 0 + end + end + + def extract_all_entries(zip_archive, params, entries) + entries.count do |zip_entry| + SafeZip::Entry.new(zip_archive, zip_entry, params) + .extract + end + end + + def legacy_unsafe_extract_with_system_zip(params) + # Requires UnZip at least 6.00 Info-ZIP. + # -n never overwrite existing files + args = %W(unzip -n -qq #{archive_path}) + + # We add * to end of directory, because we want to extract directory and all subdirectories + args += params.directories_wildcard + + # Target directory where we extract + args += %W(-d #{params.extract_path}) + + unless system(*args) + raise Error, 'archive failed to extract' + end + end + end +end diff --git a/lib/safe_zip/extract_params.rb b/lib/safe_zip/extract_params.rb new file mode 100644 index 0000000000000000000000000000000000000000..bd3b788bac979b47535d0e669530f0a6dc5874ca --- /dev/null +++ b/lib/safe_zip/extract_params.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module SafeZip + class ExtractParams + include Gitlab::Utils::StrongMemoize + + attr_reader :directories, :extract_path + + def initialize(directories:, to:) + @directories = directories + @extract_path = ::File.realpath(to) + end + + def matching_target_directory(path) + target_directories.find do |directory| + path.start_with?(directory) + end + end + + def target_directories + strong_memoize(:target_directories) do + directories.map do |directory| + ::File.join(::File.expand_path(directory, extract_path), '') + end + end + end + + def directories_wildcard + strong_memoize(:directories_wildcard) do + directories.map do |directory| + ::File.join(directory, '*') + end + end + end + end +end diff --git a/spec/fixtures/pages_non_writeable.zip b/spec/fixtures/pages_non_writeable.zip new file mode 100644 index 0000000000000000000000000000000000000000..69f175d8504884c71c70d445a66dcfc967f1cfab Binary files /dev/null and b/spec/fixtures/pages_non_writeable.zip differ diff --git a/spec/fixtures/safe_zip/invalid-symlink-does-not-exist.zip b/spec/fixtures/safe_zip/invalid-symlink-does-not-exist.zip new file mode 100644 index 0000000000000000000000000000000000000000..b9ae1548713ab160866691dc880ddb4f6f29d0e2 Binary files /dev/null and b/spec/fixtures/safe_zip/invalid-symlink-does-not-exist.zip differ diff --git a/spec/fixtures/safe_zip/invalid-symlinks-outside.zip b/spec/fixtures/safe_zip/invalid-symlinks-outside.zip new file mode 100644 index 0000000000000000000000000000000000000000..c184a1dafe285eefe69b04fc001138c0d1559ab1 Binary files /dev/null and b/spec/fixtures/safe_zip/invalid-symlinks-outside.zip differ diff --git a/spec/fixtures/safe_zip/valid-non-writeable.zip b/spec/fixtures/safe_zip/valid-non-writeable.zip new file mode 100644 index 0000000000000000000000000000000000000000..69f175d8504884c71c70d445a66dcfc967f1cfab Binary files /dev/null and b/spec/fixtures/safe_zip/valid-non-writeable.zip differ diff --git a/spec/fixtures/safe_zip/valid-simple.zip b/spec/fixtures/safe_zip/valid-simple.zip new file mode 100644 index 0000000000000000000000000000000000000000..a56b8b41dcc86e7c671a35513bd59666c824f758 Binary files /dev/null and b/spec/fixtures/safe_zip/valid-simple.zip differ diff --git a/spec/fixtures/safe_zip/valid-symlinks-first.zip b/spec/fixtures/safe_zip/valid-symlinks-first.zip new file mode 100644 index 0000000000000000000000000000000000000000..f5952ef71c92c49666fc20602d5b1c71b6b599ad Binary files /dev/null and b/spec/fixtures/safe_zip/valid-symlinks-first.zip differ diff --git a/spec/lib/safe_zip/entry_spec.rb b/spec/lib/safe_zip/entry_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..115e28c5994c5dfb5ef37b6bc4105a5ee7b085ad --- /dev/null +++ b/spec/lib/safe_zip/entry_spec.rb @@ -0,0 +1,196 @@ +require 'spec_helper' + +describe SafeZip::Entry do + let(:target_path) { Dir.mktmpdir('safe-zip') } + let(:directories) { %w(public folder/with/subfolder) } + let(:params) { SafeZip::ExtractParams.new(directories: directories, to: target_path) } + + let(:entry) { described_class.new(zip_archive, zip_entry, params) } + let(:entry_name) { 'public/folder/index.html' } + let(:entry_path_dir) { File.join(target_path, File.dirname(entry_name)) } + let(:entry_path) { File.join(target_path, entry_name) } + let(:zip_archive) { double } + + let(:zip_entry) do + double( + name: entry_name, + file?: false, + directory?: false, + symlink?: false) + end + + after do + FileUtils.remove_entry_secure(target_path) + end + + context '#path_dir' do + subject { entry.path_dir } + + it { is_expected.to eq(target_path + '/public/folder') } + end + + context '#exist?' do + subject { entry.exist? } + + context 'when entry does not exist' do + it { is_expected.not_to be_truthy } + end + + context 'when entry does exist' do + before do + create_entry + end + + it { is_expected.to be_truthy } + end + end + + describe '#extract' do + subject { entry.extract } + + context 'when entry does not match the filtered directories' do + using RSpec::Parameterized::TableSyntax + + where(:entry_name) do + [ + 'assets/folder/index.html', + 'public/../folder/index.html', + 'public/../../../../../index.html', + '../../../../../public/index.html', + '/etc/passwd' + ] + end + + with_them do + it 'does not extract file' do + is_expected.to be_falsey + end + end + end + + context 'when entry does exist' do + before do + create_entry + end + + it 'raises an exception' do + expect { subject }.to raise_error(SafeZip::Extract::AlreadyExistsError) + end + end + + context 'when entry type is unknown' do + it 'raises an exception' do + expect { subject }.to raise_error(SafeZip::Extract::UnsupportedEntryError) + end + end + + context 'when entry is valid' do + shared_examples 'secured symlinks' do + context 'when we try to extract entry into symlinked folder' do + before do + FileUtils.mkdir_p(File.join(target_path, "source")) + File.symlink("source", File.join(target_path, "public")) + end + + it 'raises an exception' do + expect { subject }.to raise_error(SafeZip::Extract::PermissionDeniedError) + end + end + end + + context 'and is file' do + before do + allow(zip_entry).to receive(:file?) { true } + end + + it 'does extract file' do + expect(zip_archive).to receive(:extract) + .with(zip_entry, entry_path) + .and_return(true) + + is_expected.to be_truthy + end + + it_behaves_like 'secured symlinks' + end + + context 'and is directory' do + let(:entry_name) { 'public/folder/assets' } + + before do + allow(zip_entry).to receive(:directory?) { true } + end + + it 'does create directory' do + is_expected.to be_truthy + + expect(File.exist?(entry_path)).to eq(true) + end + + it_behaves_like 'secured symlinks' + end + + context 'and is symlink' do + let(:entry_name) { 'public/folder/assets' } + + before do + allow(zip_entry).to receive(:symlink?) { true } + allow(zip_archive).to receive(:read).with(zip_entry) { entry_symlink } + end + + shared_examples 'a valid symlink' do + it 'does create symlink' do + is_expected.to be_truthy + + expect(File.exist?(entry_path)).to eq(true) + end + end + + context 'when source is within target' do + let(:entry_symlink) { '../images' } + + context 'but does not exist' do + it 'raises an exception' do + expect { subject }.to raise_error(SafeZip::Extract::SymlinkSourceDoesNotExistError) + end + end + + context 'and does exist' do + before do + FileUtils.mkdir_p(File.join(target_path, 'public', 'images')) + end + + it_behaves_like 'a valid symlink' + end + end + + context 'when source points outside of target' do + let(:entry_symlink) { '../../images' } + + before do + FileUtils.mkdir(File.join(target_path, 'images')) + end + + it 'raises an exception' do + expect { subject }.to raise_error(SafeZip::Extract::PermissionDeniedError) + end + end + + context 'when source points to /etc/passwd' do + let(:entry_symlink) { '/etc/passwd' } + + it 'raises an exception' do + expect { subject }.to raise_error(SafeZip::Extract::PermissionDeniedError) + end + end + end + end + end + + private + + def create_entry + FileUtils.mkdir_p(entry_path_dir) + FileUtils.touch(entry_path) + end +end diff --git a/spec/lib/safe_zip/extract_params_spec.rb b/spec/lib/safe_zip/extract_params_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..85e22cfa49594748a9df2340081d3144f3acc79b --- /dev/null +++ b/spec/lib/safe_zip/extract_params_spec.rb @@ -0,0 +1,54 @@ +require 'spec_helper' + +describe SafeZip::ExtractParams do + let(:target_path) { Dir.mktmpdir("safe-zip") } + let(:params) { described_class.new(directories: directories, to: target_path) } + let(:directories) { %w(public folder/with/subfolder) } + + after do + FileUtils.remove_entry_secure(target_path) + end + + describe '#extract_path' do + subject { params.extract_path } + + it { is_expected.to eq(target_path) } + end + + describe '#matching_target_directory' do + using RSpec::Parameterized::TableSyntax + + subject { params.matching_target_directory(target_path + path) } + + where(:path, :result) do + '/public/index.html' | '/public/' + '/non/existing/path' | nil + '/public' | nil + '/folder/with/index.html' | nil + end + + with_them do + it { is_expected.to eq(result ? target_path + result : nil) } + end + end + + describe '#target_directories' do + subject { params.target_directories } + + it 'starts with target_path' do + is_expected.to all(start_with(target_path + '/')) + end + + it 'ends with / for all paths' do + is_expected.to all(end_with('/')) + end + end + + describe '#directories_wildcard' do + subject { params.directories_wildcard } + + it 'adds * for all paths' do + is_expected.to all(end_with('/*')) + end + end +end diff --git a/spec/lib/safe_zip/extract_spec.rb b/spec/lib/safe_zip/extract_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..dc7e25c0cf6f2d57fe242d48a54c7800e6e3e9d2 --- /dev/null +++ b/spec/lib/safe_zip/extract_spec.rb @@ -0,0 +1,80 @@ +require 'spec_helper' + +describe SafeZip::Extract do + let(:target_path) { Dir.mktmpdir('safe-zip') } + let(:directories) { %w(public) } + let(:object) { described_class.new(archive) } + let(:archive) { Rails.root.join('spec', 'fixtures', 'safe_zip', archive_name) } + + after do + FileUtils.remove_entry_secure(target_path) + end + + context '#extract' do + subject { object.extract(directories: directories, to: target_path) } + + shared_examples 'extracts archive' do |param| + before do + stub_feature_flags(safezip_use_rubyzip: param) + end + + it 'does extract archive' do + subject + + expect(File.exist?(File.join(target_path, 'public', 'index.html'))).to eq(true) + expect(File.exist?(File.join(target_path, 'source'))).to eq(false) + end + end + + shared_examples 'fails to extract archive' do |param| + before do + stub_feature_flags(safezip_use_rubyzip: param) + end + + it 'does not extract archive' do + expect { subject }.to raise_error(SafeZip::Extract::Error) + end + end + + %w(valid-simple.zip valid-symlinks-first.zip valid-non-writeable.zip).each do |name| + context "when using #{name} archive" do + let(:archive_name) { name } + + context 'for RubyZip' do + it_behaves_like 'extracts archive', true + end + + context 'for UnZip' do + it_behaves_like 'extracts archive', false + end + end + end + + %w(invalid-symlink-does-not-exist.zip invalid-symlinks-outside.zip).each do |name| + context "when using #{name} archive" do + let(:archive_name) { name } + + context 'for RubyZip' do + it_behaves_like 'fails to extract archive', true + end + + context 'for UnZip (UNSAFE)' do + it_behaves_like 'extracts archive', false + end + end + end + + context 'when no matching directories are found' do + let(:archive_name) { 'valid-simple.zip' } + let(:directories) { %w(non/existing) } + + context 'for RubyZip' do + it_behaves_like 'fails to extract archive', true + end + + context 'for UnZip' do + it_behaves_like 'fails to extract archive', false + end + end + end +end diff --git a/spec/services/projects/update_pages_service_spec.rb b/spec/services/projects/update_pages_service_spec.rb index 36b619ba9be03c6d00f98081e57e5a54ddb7e578..8b70845befe69b3b8dcd09a993ce7d14c147b66c 100644 --- a/spec/services/projects/update_pages_service_spec.rb +++ b/spec/services/projects/update_pages_service_spec.rb @@ -5,24 +5,27 @@ describe Projects::UpdatePagesService do set(:pipeline) { create(:ci_pipeline, project: project, sha: project.commit('HEAD').sha) } set(:build) { create(:ci_build, pipeline: pipeline, ref: 'HEAD') } let(:invalid_file) { fixture_file_upload('spec/fixtures/dk.png') } - let(:extension) { 'zip' } - let(:file) { fixture_file_upload("spec/fixtures/pages.#{extension}") } - let(:empty_file) { fixture_file_upload("spec/fixtures/pages_empty.#{extension}") } - let(:metadata) do - filename = "spec/fixtures/pages.#{extension}.meta" - fixture_file_upload(filename) if File.exist?(filename) - end + let(:file) { fixture_file_upload("spec/fixtures/pages.zip") } + let(:empty_file) { fixture_file_upload("spec/fixtures/pages_empty.zip") } + let(:metadata_filename) { "spec/fixtures/pages.zip.meta" } + let(:metadata) { fixture_file_upload(metadata_filename) if File.exist?(metadata_filename) } subject { described_class.new(project, build) } before do + stub_feature_flags(safezip_use_rubyzip: true) + project.remove_pages end - context 'legacy artifacts' do - let(:extension) { 'zip' } + context '::TMP_EXTRACT_PATH' do + subject { described_class::TMP_EXTRACT_PATH } + it { is_expected.not_to match(Gitlab::PathRegex.namespace_format_regex) } + end + + context 'legacy artifacts' do before do build.update(legacy_artifacts_file: file) build.update(legacy_artifacts_metadata: metadata) @@ -132,6 +135,20 @@ describe Projects::UpdatePagesService do end end + context 'when using pages with non-writeable public' do + let(:file) { fixture_file_upload("spec/fixtures/pages_non_writeable.zip") } + + context 'when using RubyZip' do + before do + stub_feature_flags(safezip_use_rubyzip: true) + end + + it 'succeeds to extract' do + expect(execute).to eq(:success) + end + end + end + context 'when timeout happens by DNS error' do before do allow_any_instance_of(described_class)