sanitize_helper.rb 8.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
require 'action_view/helpers/tag_helper'
require 'html/document'

module ActionView
  module Helpers #:nodoc:
    # The SanitizeHelper module provides a set of methods for scrubbing text of undesired HTML elements.
    # These helper methods extend ActionView making them callable within your template files.
    module SanitizeHelper
      def self.included(base)
        base.extend(ClassMethods)
      end
      
13
      # This +sanitize+ helper will html encode all tags and strip all attributes that aren't specifically allowed.
14 15 16 17 18 19 20
      # It also strips href/src tags with invalid protocols, like javascript: especially.  It does its best to counter any
      # tricks that hackers may use, like throwing in unicode/ascii/hex values to get past the javascript: filters.  Check out
      # the extensive test suite.
      #
      #   <%= sanitize @article.body %>
      # 
      # You can add or remove tags/attributes if you want to customize it a bit.  See ActionView::Base for full docs on the
21
      # available options.  You can add tags/attributes for single uses of +sanitize+ by passing either the <tt>:attributes</tt> or <tt>:tags</tt> options:
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
      #
      # Normal Use
      #
      #   <%= sanitize @article.body %>
      #
      # Custom Use (only the mentioned tags and attributes are allowed, nothing else)
      #
      #   <%= sanitize @article.body, :tags => %w(table tr td), :attributes => %w(id class style)
      # 
      # Add table tags to the default allowed tags
      #   
      #   Rails::Initializer.run do |config|
      #     config.action_view.sanitized_allowed_tags = 'table', 'tr', 'td'
      #   end
      # 
      # Remove tags to the default allowed tags
      #   
      #   Rails::Initializer.run do |config|
      #     config.after_initialize do
      #       ActionView::Base.sanitized_allowed_tags.delete 'div'
      #     end
      #   end
      # 
      # Change allowed default attributes
      # 
      #   Rails::Initializer.run do |config|
      #     config.action_view.sanitized_allowed_attributes = 'id', 'class', 'style'
      #   end
      # 
51 52 53 54 55
      # Please note that sanitizing user-provided text does not guarantee that the
      # resulting markup is valid (conforming to a document type) or even well-formed.
      # The output may still contain e.g. unescaped '<', '>', '&' characters and
      # confuse browsers.
      #
56
      def sanitize(html, options = {})
57
        self.class.white_list_sanitizer.sanitize(html, options)
58 59
      end

P
Pratik Naik 已提交
60
      # Sanitizes a block of CSS code. Used by +sanitize+ when it comes across a style attribute.
61
      def sanitize_css(style)
62
        self.class.white_list_sanitizer.sanitize_css(style)
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
      end

      # Strips all HTML tags from the +html+, including comments.  This uses the 
      # html-scanner tokenizer and so its HTML parsing ability is limited by 
      # that of html-scanner.
      #
      # ==== Examples
      #
      #   strip_tags("Strip <i>these</i> tags!")
      #   # => Strip these tags!
      #
      #   strip_tags("<b>Bold</b> no more!  <a href='more.html'>See more here</a>...")
      #   # => Bold no more!  See more here...
      # 
      #   strip_tags("<div id='top-bar'>Welcome to my website!</div>")
      #   # => Welcome to my website!
      def strip_tags(html)     
80
        self.class.full_sanitizer.sanitize(html)
81 82 83 84 85 86 87 88 89 90 91 92 93 94
      end

      # Strips all link tags from +text+ leaving just the link text.
      #
      # ==== Examples
      #   strip_links('<a href="http://www.rubyonrails.org">Ruby on Rails</a>')
      #   # => Ruby on Rails
      #
      #   strip_links('Please e-mail me at <a href="mailto:me@email.com">me@email.com</a>.')
      #   # => Please e-mail me at me@email.com.
      #
      #   strip_links('Blog: <a href="http://www.myblog.com/" class="nav" target=\"_blank\">Visit</a>.')
      #   # => Blog: Visit
      def strip_links(html)
95
        self.class.link_sanitizer.sanitize(html)
96 97 98 99 100
      end

      module ClassMethods #:nodoc:
        def self.extended(base)
          class << base
101 102
            attr_writer :full_sanitizer, :link_sanitizer, :white_list_sanitizer

103
            # we want these to be class methods on ActionView::Base, they'll get mattr_readers for these below.
104
            helper_def = [:sanitized_protocol_separator, :sanitized_uri_attributes, :sanitized_bad_tags, :sanitized_allowed_tags,
105
                :sanitized_allowed_attributes, :sanitized_allowed_css_properties, :sanitized_allowed_css_keywords,
106 107 108 109 110
                :sanitized_shorthand_css_properties, :sanitized_allowed_protocols, :sanitized_protocol_separator=].collect! do |prop|
              prop = prop.to_s
              "def #{prop}(#{:value if prop =~ /=$/}) white_list_sanitizer.#{prop.sub /sanitized_/, ''} #{:value if prop =~ /=$/} end"
            end.join("\n")
            eval helper_def
111 112
          end
        end
113
        
P
Pratik Naik 已提交
114 115
        # Gets the HTML::FullSanitizer instance used by +strip_tags+.  Replace with
        # any object that responds to +sanitize+.
116 117 118 119 120 121 122 123 124
        #
        #   Rails::Initializer.run do |config|
        #     config.action_view.full_sanitizer = MySpecialSanitizer.new
        #   end
        #
        def full_sanitizer
          @full_sanitizer ||= HTML::FullSanitizer.new
        end

P
Pratik Naik 已提交
125 126
        # Gets the HTML::LinkSanitizer instance used by +strip_links+.  Replace with
        # any object that responds to +sanitize+.
127 128 129 130 131 132 133 134 135
        #
        #   Rails::Initializer.run do |config|
        #     config.action_view.link_sanitizer = MySpecialSanitizer.new
        #   end
        #
        def link_sanitizer
          @link_sanitizer ||= HTML::LinkSanitizer.new
        end

P
Pratik Naik 已提交
136 137
        # Gets the HTML::WhiteListSanitizer instance used by sanitize and +sanitize_css+.
        # Replace with any object that responds to +sanitize+.
138 139 140 141 142 143 144 145
        #
        #   Rails::Initializer.run do |config|
        #     config.action_view.white_list_sanitizer = MySpecialSanitizer.new
        #   end
        #
        def white_list_sanitizer
          @white_list_sanitizer ||= HTML::WhiteListSanitizer.new
        end
146

P
Pratik Naik 已提交
147
        # Adds valid HTML attributes that the +sanitize+ helper checks for URIs.
148 149 150 151 152 153
        #
        #   Rails::Initializer.run do |config|
        #     config.action_view.sanitized_uri_attributes = 'lowsrc', 'target'
        #   end
        #
        def sanitized_uri_attributes=(attributes)
154
          HTML::WhiteListSanitizer.uri_attributes.merge(attributes)
155 156
        end

P
Pratik Naik 已提交
157
        # Adds to the Set of 'bad' tags for the +sanitize+ helper.
158 159 160 161 162 163
        #
        #   Rails::Initializer.run do |config|
        #     config.action_view.sanitized_bad_tags = 'embed', 'object'
        #   end
        #
        def sanitized_bad_tags=(attributes)
164
          HTML::WhiteListSanitizer.bad_tags.merge(attributes)
165
        end
P
Pratik Naik 已提交
166 167

        # Adds to the Set of allowed tags for the +sanitize+ helper.
168 169 170 171 172 173
        #
        #   Rails::Initializer.run do |config|
        #     config.action_view.sanitized_allowed_tags = 'table', 'tr', 'td'
        #   end
        #
        def sanitized_allowed_tags=(attributes)
174
          HTML::WhiteListSanitizer.allowed_tags.merge(attributes)
175 176
        end

P
Pratik Naik 已提交
177
        # Adds to the Set of allowed HTML attributes for the +sanitize+ helper.
178 179 180 181 182 183
        #
        #   Rails::Initializer.run do |config|
        #     config.action_view.sanitized_allowed_attributes = 'onclick', 'longdesc'
        #   end
        #
        def sanitized_allowed_attributes=(attributes)
184
          HTML::WhiteListSanitizer.allowed_attributes.merge(attributes)
185 186
        end

P
Pratik Naik 已提交
187
        # Adds to the Set of allowed CSS properties for the #sanitize and +sanitize_css+ helpers.
188 189 190 191 192 193
        #
        #   Rails::Initializer.run do |config|
        #     config.action_view.sanitized_allowed_css_properties = 'expression'
        #   end
        #
        def sanitized_allowed_css_properties=(attributes)
194
          HTML::WhiteListSanitizer.allowed_css_properties.merge(attributes)
195 196
        end

P
Pratik Naik 已提交
197
        # Adds to the Set of allowed CSS keywords for the +sanitize+ and +sanitize_css+ helpers.
198 199 200 201 202 203
        #
        #   Rails::Initializer.run do |config|
        #     config.action_view.sanitized_allowed_css_keywords = 'expression'
        #   end
        #
        def sanitized_allowed_css_keywords=(attributes)
204
          HTML::WhiteListSanitizer.allowed_css_keywords.merge(attributes)
205 206
        end

P
Pratik Naik 已提交
207
        # Adds to the Set of allowed shorthand CSS properties for the +sanitize+ and +sanitize_css+ helpers.
208 209 210 211 212 213
        #
        #   Rails::Initializer.run do |config|
        #     config.action_view.sanitized_shorthand_css_properties = 'expression'
        #   end
        #
        def sanitized_shorthand_css_properties=(attributes)
214
          HTML::WhiteListSanitizer.shorthand_css_properties.merge(attributes)
215 216
        end

P
Pratik Naik 已提交
217
        # Adds to the Set of allowed protocols for the +sanitize+ helper.
218 219 220 221 222 223
        #
        #   Rails::Initializer.run do |config|
        #     config.action_view.sanitized_allowed_protocols = 'ssh', 'feed'
        #   end
        #
        def sanitized_allowed_protocols=(attributes)
224
          HTML::WhiteListSanitizer.allowed_protocols.merge(attributes)
225 226 227 228 229
        end
      end
    end
  end
end