sanitization_filter.rb 2.6 KB
Newer Older
1
require 'gitlab/markdown'
2 3 4 5 6 7 8 9 10 11
require 'html/pipeline/filter'
require 'html/pipeline/sanitization_filter'

module Gitlab
  module Markdown
    # Sanitize HTML
    #
    # Extends HTML::Pipeline::SanitizationFilter with a custom whitelist.
    class SanitizationFilter < HTML::Pipeline::SanitizationFilter
      def whitelist
12 13 14 15
        # Descriptions are more heavily sanitized, allowing only a few elements.
        # See http://git.io/vkuAN
        if pipeline == :description
          whitelist = LIMITED
16
          whitelist[:elements] -= %w(pre code img ol ul li)
17 18 19 20 21 22 23 24
        else
          whitelist = super
        end

        customize_whitelist(whitelist)

        whitelist
      end
25

26 27 28 29 30 31 32 33 34 35 36
      private

      def pipeline
        context[:pipeline] || :default
      end

      def customized?(transformers)
        transformers.last.source_location[0] == __FILE__
      end

      def customize_whitelist(whitelist)
37
        # Only push these customizations once
38
        return if customized?(whitelist[:transformers])
39

40 41 42
        # Allow code highlighting
        whitelist[:attributes]['pre'] = %w(class)
        whitelist[:attributes]['span'] = %w(class)
43

44 45 46
        # Allow table alignment
        whitelist[:attributes]['th'] = %w(style)
        whitelist[:attributes]['td'] = %w(style)
47

48 49
        # Allow span elements
        whitelist[:elements].push('span')
50

51 52 53 54 55 56
        # Allow any protocol in `a` elements...
        whitelist[:protocols].delete('a')

        # ...but then remove links with the `javascript` protocol
        whitelist[:transformers].push(remove_javascript_links)

57 58 59 60 61
        # Remove `rel` attribute from `a` elements
        whitelist[:transformers].push(remove_rel)

        # Remove `class` attribute from non-highlight spans
        whitelist[:transformers].push(clean_spans)
62

63 64 65
        whitelist
      end

66 67 68 69 70 71 72 73 74 75 76 77 78
      def remove_javascript_links
        lambda do |env|
          node = env[:node]

          return unless node.name == 'a'
          return unless node.has_attribute?('href')

          if node['href'].start_with?('javascript', ':javascript')
            node.remove_attribute('href')
          end
        end
      end

79 80 81 82 83 84 85
      def remove_rel
        lambda do |env|
          if env[:node_name] == 'a'
            env[:node].remove_attribute('rel')
          end
        end
      end
86 87 88

      def clean_spans
        lambda do |env|
89
          node = env[:node]
90

91 92 93 94 95
          return unless node.name == 'span'
          return unless node.has_attribute?('class')

          unless has_ancestor?(node, 'pre')
            node.remove_attribute('class')
96
          end
97

R
Robert Speicher 已提交
98
          { node_whitelist: [node] }
99 100
        end
      end
101 102 103
    end
  end
end