diff --git a/changelogs/unreleased/allow-resolving-conflicts-in-utf-8.yml b/changelogs/unreleased/allow-resolving-conflicts-in-utf-8.yml new file mode 100644 index 0000000000000000000000000000000000000000..c3c877423ff27d6361f8083610c2a6e4800df507 --- /dev/null +++ b/changelogs/unreleased/allow-resolving-conflicts-in-utf-8.yml @@ -0,0 +1,4 @@ +--- +title: Fix conflict resolution when files contain valid UTF-8 characters +merge_request: +author: diff --git a/lib/gitlab/conflict/parser.rb b/lib/gitlab/conflict/parser.rb index d3524c338ee1344bf2282221033c49cdac77b565..84f9ecd3d23fe0958909deb47b312acfbf7ea3aa 100644 --- a/lib/gitlab/conflict/parser.rb +++ b/lib/gitlab/conflict/parser.rb @@ -15,11 +15,9 @@ module Gitlab raise UnmergeableFile if text.blank? # Typically a binary file raise UnmergeableFile if text.length > 200.kilobytes - begin - text.to_json - rescue Encoding::UndefinedConversionError - raise UnsupportedEncoding - end + text.force_encoding('UTF-8') + + raise UnsupportedEncoding unless text.valid_encoding? line_obj_index = 0 line_old = 1 diff --git a/spec/lib/gitlab/conflict/parser_spec.rb b/spec/lib/gitlab/conflict/parser_spec.rb index 16eb376635644762dcb29f047b5a3149161f33f1..2570f95dd21be4bc396c4df1feb44e7233f697ae 100644 --- a/spec/lib/gitlab/conflict/parser_spec.rb +++ b/spec/lib/gitlab/conflict/parser_spec.rb @@ -120,43 +120,61 @@ CONFLICT end context 'when the file contents include conflict delimiters' do - it 'raises UnexpectedDelimiter when there is a non-start delimiter first' do - expect { parse_text('=======') }. - to raise_error(Gitlab::Conflict::Parser::UnexpectedDelimiter) - - expect { parse_text('>>>>>>> README.md') }. - to raise_error(Gitlab::Conflict::Parser::UnexpectedDelimiter) - - expect { parse_text('>>>>>>> some-other-path.md') }. - not_to raise_error + context 'when there is a non-start delimiter first' do + it 'raises UnexpectedDelimiter when there is a middle delimiter first' do + expect { parse_text('=======') }. + to raise_error(Gitlab::Conflict::Parser::UnexpectedDelimiter) + end + + it 'raises UnexpectedDelimiter when there is an end delimiter first' do + expect { parse_text('>>>>>>> README.md') }. + to raise_error(Gitlab::Conflict::Parser::UnexpectedDelimiter) + end + + it 'does not raise when there is an end delimiter for a different path first' do + expect { parse_text('>>>>>>> some-other-path.md') }. + not_to raise_error + end end - it 'raises UnexpectedDelimiter when a start delimiter is followed by a non-middle delimiter' do - start_text = "<<<<<<< README.md\n" - end_text = "\n=======\n>>>>>>> README.md" + context 'when a start delimiter is followed by a non-middle delimiter' do + let(:start_text) { "<<<<<<< README.md\n" } + let(:end_text) { "\n=======\n>>>>>>> README.md" } - expect { parse_text(start_text + '>>>>>>> README.md' + end_text) }. - to raise_error(Gitlab::Conflict::Parser::UnexpectedDelimiter) + it 'raises UnexpectedDelimiter when it is followed by an end delimiter' do + expect { parse_text(start_text + '>>>>>>> README.md' + end_text) }. + to raise_error(Gitlab::Conflict::Parser::UnexpectedDelimiter) + end - expect { parse_text(start_text + start_text + end_text) }. - to raise_error(Gitlab::Conflict::Parser::UnexpectedDelimiter) + it 'raises UnexpectedDelimiter when it is followed by another start delimiter' do + expect { parse_text(start_text + start_text + end_text) }. + to raise_error(Gitlab::Conflict::Parser::UnexpectedDelimiter) + end - expect { parse_text(start_text + '>>>>>>> some-other-path.md' + end_text) }. - not_to raise_error + it 'does not raise when it is followed by a start delimiter for a different path' do + expect { parse_text(start_text + '>>>>>>> some-other-path.md' + end_text) }. + not_to raise_error + end end - it 'raises UnexpectedDelimiter when a middle delimiter is followed by a non-end delimiter' do - start_text = "<<<<<<< README.md\n=======\n" - end_text = "\n>>>>>>> README.md" + context 'when a middle delimiter is followed by a non-end delimiter' do + let(:start_text) { "<<<<<<< README.md\n=======\n" } + let(:end_text) { "\n>>>>>>> README.md" } - expect { parse_text(start_text + '=======' + end_text) }. - to raise_error(Gitlab::Conflict::Parser::UnexpectedDelimiter) + it 'raises UnexpectedDelimiter when it is followed by another middle delimiter' do + expect { parse_text(start_text + '=======' + end_text) }. + to raise_error(Gitlab::Conflict::Parser::UnexpectedDelimiter) + end - expect { parse_text(start_text + start_text + end_text) }. - to raise_error(Gitlab::Conflict::Parser::UnexpectedDelimiter) + it 'raises UnexpectedDelimiter when it is followed by a start delimiter' do + expect { parse_text(start_text + start_text + end_text) }. + to raise_error(Gitlab::Conflict::Parser::UnexpectedDelimiter) + end - expect { parse_text(start_text + '>>>>>>> some-other-path.md' + end_text) }. - not_to raise_error + it 'does not raise when it is followed by a start delimiter for another path' do + expect { parse_text(start_text + '<<<<<<< some-other-path.md' + end_text) }. + not_to raise_error + end end it 'raises MissingEndDelimiter when there is no end delimiter at the end' do @@ -184,9 +202,20 @@ CONFLICT to raise_error(Gitlab::Conflict::Parser::UnmergeableFile) end - it 'raises UnsupportedEncoding when the file contains non-UTF-8 characters' do - expect { parse_text("a\xC4\xFC".force_encoding(Encoding::ASCII_8BIT)) }. - to raise_error(Gitlab::Conflict::Parser::UnsupportedEncoding) + # All text from Rugged has an encoding of ASCII_8BIT, so force that in + # these strings. + context 'when the file contains UTF-8 characters' do + it 'does not raise' do + expect { parse_text("Espa\xC3\xB1a".force_encoding(Encoding::ASCII_8BIT)) }. + not_to raise_error + end + end + + context 'when the file contains non-UTF-8 characters' do + it 'raises UnsupportedEncoding' do + expect { parse_text("a\xC4\xFC".force_encoding(Encoding::ASCII_8BIT)) }. + to raise_error(Gitlab::Conflict::Parser::UnsupportedEncoding) + end end end end