diff --git a/app/models/concerns/cache_markdown_field.rb b/app/models/concerns/cache_markdown_field.rb index 73a27326f6c23bee242f9b5baf0de8028ff61471..002f3e17891b86a2fd07b5a7799604b6d84e02a2 100644 --- a/app/models/concerns/cache_markdown_field.rb +++ b/app/models/concerns/cache_markdown_field.rb @@ -15,7 +15,7 @@ module CacheMarkdownField # Increment this number every time the renderer changes its output CACHE_REDCARPET_VERSION = 3 CACHE_COMMONMARK_VERSION_START = 10 - CACHE_COMMONMARK_VERSION = 13 + CACHE_COMMONMARK_VERSION = 14 # changes to these attributes cause the cache to be invalidates INVALIDATED_BY = %w[author project].freeze diff --git a/changelogs/unreleased/security-2769-idn-homograph-attack.yml b/changelogs/unreleased/security-2769-idn-homograph-attack.yml new file mode 100644 index 0000000000000000000000000000000000000000..a014b522c96c802da28656e6edda3b40761a31bd --- /dev/null +++ b/changelogs/unreleased/security-2769-idn-homograph-attack.yml @@ -0,0 +1,5 @@ +--- +title: Make potentially malicious links more visible in the UI and scrub RTLO chars from links +merge_request: 2770 +author: +type: security diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb index deda4b1872e149e1f0504b7055fdededd06213fb..f3061bad4ff9b2533b21a8a823e64d525fdc3c23 100644 --- a/lib/banzai/filter/autolink_filter.rb +++ b/lib/banzai/filter/autolink_filter.rb @@ -8,6 +8,10 @@ module Banzai # # Based on HTML::Pipeline::AutolinkFilter # + # Note that our CommonMark parser, `commonmarker` (using the autolink extension) + # handles standard autolinking, like http/https. We detect additional + # schemes (smb, rdar, etc). + # # Context options: # :autolink - Boolean, skips all processing done by this filter when false # :link_attr - Hash of attributes for the generated links @@ -107,10 +111,13 @@ module Banzai end end - # match has come from node.to_html above, so we know it's encoded - # correctly. + # Since this came from a Text node, make sure the new href is encoded. + # `commonmarker` percent encodes the domains of links it handles, so + # do the same (instead of using `normalized_encode`). + href_safe = Addressable::URI.encode(match).html_safe + html_safe_match = match.html_safe - options = link_options.merge(href: html_safe_match) + options = link_options.merge(href: href_safe) content_tag(:a, html_safe_match, options) + dropped end diff --git a/lib/banzai/filter/external_link_filter.rb b/lib/banzai/filter/external_link_filter.rb index 4f60b6f84c60de206581d5641f47747ea8ee56f0..61ee3eac216c0bf48f0bd70fec61b895477081fb 100644 --- a/lib/banzai/filter/external_link_filter.rb +++ b/lib/banzai/filter/external_link_filter.rb @@ -4,17 +4,29 @@ module Banzai module Filter # HTML Filter to modify the attributes of external links class ExternalLinkFilter < HTML::Pipeline::Filter - SCHEMES = ['http', 'https', nil].freeze + SCHEMES = ['http', 'https', nil].freeze + RTLO = "\u202E".freeze + ENCODED_RTLO = '%E2%80%AE'.freeze def call links.each do |node| - uri = uri(node['href'].to_s) - - node.set_attribute('href', uri.to_s) if uri + # URI.parse does stricter checking on the url than Addressable, + # such as on `mailto:` links. Since we've been using it, do an + # initial parse for validity and then use Addressable + # for IDN support, etc + uri = uri_strict(node['href'].to_s) + if uri + node.set_attribute('href', uri.to_s) + addressable_uri = addressable_uri(node['href']) + else + addressable_uri = nil + end - if SCHEMES.include?(uri&.scheme) && !internal_url?(uri) - node.set_attribute('rel', 'nofollow noreferrer noopener') - node.set_attribute('target', '_blank') + unless internal_url?(addressable_uri) + punycode_autolink_node!(addressable_uri, node) + sanitize_link_text!(node) + add_malicious_tooltip!(addressable_uri, node) + add_nofollow!(addressable_uri, node) end end @@ -23,12 +35,18 @@ module Banzai private - def uri(href) + def uri_strict(href) URI.parse(href) rescue URI::Error nil end + def addressable_uri(href) + Addressable::URI.parse(href) + rescue Addressable::URI::InvalidURIError + nil + end + def links query = 'descendant-or-self::a[@href and not(@href = "")]' doc.xpath(query) @@ -45,6 +63,57 @@ module Banzai def internal_url @internal_url ||= URI.parse(Gitlab.config.gitlab.url) end + + # Only replace an autolink with an IDN with it's punycode + # version if we need emailable links. Otherwise let it + # be shown normally and the tooltips will show the + # punycode version. + def punycode_autolink_node!(uri, node) + return unless uri + return unless context[:emailable_links] + + unencoded_uri_str = Addressable::URI.unencode(node['href']) + + if unencoded_uri_str == node.content && idn?(uri) + node.content = uri.normalize + end + end + + # escape any right-to-left (RTLO) characters in link text + def sanitize_link_text!(node) + node.inner_html = node.inner_html.gsub(RTLO, ENCODED_RTLO) + end + + # If the domain is an international domain name (IDN), + # let's expose with a tooltip in case it's intended + # to be malicious. This is particularly useful for links + # where the link text is not the same as the actual link. + # We will continue to show the unicode version of the domain + # in autolinked link text, which could contain emojis, etc. + # + # Also show the tooltip if the url contains the RTLO character, + # as this is an indicator of a malicious link + def add_malicious_tooltip!(uri, node) + if idn?(uri) || has_encoded_rtlo?(uri) + node.add_class('has-tooltip') + node.set_attribute('title', uri.normalize) + end + end + + def add_nofollow!(uri, node) + if SCHEMES.include?(uri&.scheme) + node.set_attribute('rel', 'nofollow noreferrer noopener') + node.set_attribute('target', '_blank') + end + end + + def idn?(uri) + uri&.normalized_host&.start_with?('xn--') + end + + def has_encoded_rtlo?(uri) + uri&.to_s&.include?(ENCODED_RTLO) + end end end end diff --git a/lib/banzai/pipeline/email_pipeline.rb b/lib/banzai/pipeline/email_pipeline.rb index 2c08581ce0d30badbfe0a281ccd630a7f8840c09..fc51063c06c6a4433a637af628ae30606c44c10a 100644 --- a/lib/banzai/pipeline/email_pipeline.rb +++ b/lib/banzai/pipeline/email_pipeline.rb @@ -11,7 +11,8 @@ module Banzai def self.transform_context(context) super(context).merge( - only_path: false + only_path: false, + emailable_links: true ) end end diff --git a/spec/lib/banzai/filter/autolink_filter_spec.rb b/spec/lib/banzai/filter/autolink_filter_spec.rb index 7a457403b51c1be87cbf84ca4ae95edcb0e44290..6217381c49148528290726776f2618123ef48397 100644 --- a/spec/lib/banzai/filter/autolink_filter_spec.rb +++ b/spec/lib/banzai/filter/autolink_filter_spec.rb @@ -188,6 +188,22 @@ describe Banzai::Filter::AutolinkFilter do expect(doc.at_css('a')['class']).to eq 'custom' end + it 'escapes RTLO and other characters' do + # rendered text looks like "http://example.com/evilexe.mp3" + evil_link = "#{link}evil\u202E3pm.exe" + doc = filter("#{evil_link}") + + expect(doc.at_css('a')['href']).to eq "http://about.gitlab.com/evil%E2%80%AE3pm.exe" + end + + it 'encodes international domains' do + link = "http://one😄two.com" + expected = "http://one%F0%9F%98%84two.com" + doc = filter(link) + + expect(doc.at_css('a')['href']).to eq expected + end + described_class::IGNORE_PARENTS.each do |elem| it "ignores valid links contained inside '#{elem}' element" do exp = act = "<#{elem}>See #{link}" diff --git a/spec/lib/banzai/filter/external_link_filter_spec.rb b/spec/lib/banzai/filter/external_link_filter_spec.rb index e6dae8d53825d07c35ef8012163048ae7b93d1d0..2acbe05f0828ea3159e2ae0185b23a97f9e306e6 100644 --- a/spec/lib/banzai/filter/external_link_filter_spec.rb +++ b/spec/lib/banzai/filter/external_link_filter_spec.rb @@ -62,6 +62,13 @@ describe Banzai::Filter::ExternalLinkFilter do expect(doc.to_html).to eq(expected) end + + it 'adds rel and target to improperly formatted autolinks' do + doc = filter %q(

mailto://jblogs@example.com

) + expected = %q(

mailto://jblogs@example.com

) + + expect(doc.to_html).to eq(expected) + end end context 'for links with a username' do @@ -112,4 +119,62 @@ describe Banzai::Filter::ExternalLinkFilter do it_behaves_like 'an external link with rel attribute' end + + context 'links with RTLO character' do + # In rendered text this looks like "http://example.com/evilexe.mp3" + let(:doc) { filter %Q(http://example.com/evil\u202E3pm.exe) } + + it_behaves_like 'an external link with rel attribute' + + it 'escapes RTLO in link text' do + expected = %q(http://example.com/evil%E2%80%AE3pm.exe) + + expect(doc.to_html).to include(expected) + end + + it 'does not mangle the link text' do + doc = filter %Q(Oneand\u202Eexe.mp3) + + expect(doc.to_html).to include('Oneand%E2%80%AEexe.mp3') + end + end + + context 'for generated autolinks' do + context 'with an IDN character' do + let(:doc) { filter(%q(http://exa😄mple.com)) } + let(:doc_email) { filter(%q(http://exa😄mple.com), emailable_links: true) } + + it_behaves_like 'an external link with rel attribute' + + it 'does not change the link text' do + expect(doc.to_html).to include('http://exa😄mple.com') + end + + it 'uses punycode for emails' do + expect(doc_email.to_html).to include('http://xn--example-6p25f.com/') + end + end + end + + context 'for links that look malicious' do + context 'with an IDN character' do + let(:doc) { filter %q(http://exa😄mple.com) } + + it 'adds a toolip with punycode' do + expect(doc.to_html).to include('http://exa😄mple.com') + expect(doc.to_html).to include('class="has-tooltip"') + expect(doc.to_html).to include('title="http://xn--example-6p25f.com/"') + end + end + + context 'with RTLO character' do + let(:doc) { filter %q(Evil Test) } + + it 'adds a toolip with punycode' do + expect(doc.to_html).to include('Evil Test') + expect(doc.to_html).to include('class="has-tooltip"') + expect(doc.to_html).to include('title="http://example.com/evil%E2%80%AE3pm.exe"') + end + end + end end diff --git a/spec/lib/banzai/pipeline/email_pipeline_spec.rb b/spec/lib/banzai/pipeline/email_pipeline_spec.rb index 6a11ca2f9d5588a2b06ef49fbf31f3dff8b4ae53..b99161109eb5fc5e3f6c3b4cbc87e509de2beb32 100644 --- a/spec/lib/banzai/pipeline/email_pipeline_spec.rb +++ b/spec/lib/banzai/pipeline/email_pipeline_spec.rb @@ -10,5 +10,19 @@ describe Banzai::Pipeline::EmailPipeline do expect(described_class.filters).not_to be_empty expect(described_class.filters).not_to include(Banzai::Filter::ImageLazyLoadFilter) end + + it 'shows punycode for autolinks' do + examples = %W[ + http://one😄two.com + http://\u0261itlab.com + ] + + examples.each do |markdown| + result = described_class.call(markdown, project: nil)[:output] + link = result.css('a').first + + expect(link.content).to include('http://xn--') + end + end end end diff --git a/spec/lib/banzai/pipeline/full_pipeline_spec.rb b/spec/lib/banzai/pipeline/full_pipeline_spec.rb index 3634655c6a536fab5d660f0f9d2a6d18d26cc5bb..162856be4c539dc30d26d7285de120f8bb6e3181 100644 --- a/spec/lib/banzai/pipeline/full_pipeline_spec.rb +++ b/spec/lib/banzai/pipeline/full_pipeline_spec.rb @@ -57,4 +57,42 @@ describe Banzai::Pipeline::FullPipeline do expect(html.lines.map(&:strip).join("\n")).to eq filtered_footnote end end + + describe 'links are detected as malicious' do + it 'has tooltips for malicious links' do + examples = %W[ + http://example.com/evil\u202E3pm.exe + [evilexe.mp3](http://example.com/evil\u202E3pm.exe) + rdar://localhost.com/\u202E3pm.exe + http://one😄two.com + [Evil-Test](http://one😄two.com) + http://\u0261itlab.com + [Evil-GitLab-link](http://\u0261itlab.com) + ![Evil-GitLab-link](http://\u0261itlab.com.png) + ] + + examples.each do |markdown| + result = described_class.call(markdown, project: nil)[:output] + link = result.css('a').first + + expect(link[:class]).to include('has-tooltip') + end + end + + it 'has no tooltips for safe links' do + examples = %w[ + http://example.com + [Safe-Test](http://example.com) + https://commons.wikimedia.org/wiki/File:اسكرام_2_-_تمنراست.jpg + [Wikipedia-link](https://commons.wikimedia.org/wiki/File:اسكرام_2_-_تمنراست.jpg) + ] + + examples.each do |markdown| + result = described_class.call(markdown, project: nil)[:output] + link = result.css('a').first + + expect(link[:class]).to be_nil + end + end + end end