diff --git a/actionpack/CHANGELOG b/actionpack/CHANGELOG
index df25ec800faddd99f8cd967784f7efdb8381be3f..b684148f91bf888800108bce3e400a027e04a131 100644
--- a/actionpack/CHANGELOG
+++ b/actionpack/CHANGELOG
@@ -1,5 +1,10 @@
*SVN*
+* Secure #sanitize, #strip_tags, and #strip_links helpers against xss attacks. Closes #8877. [Rick, lifofifo, Jacques Distler]
+
+ This merges and renames the popular white_list helper (along with some css sanitizing from Jacques Distler version of the same plugin).
+ Also applied updated versions of #strip_tags and #strip_links from #8877.
+
* Remove use of & logic operator. Closes #8114. [watson]
* Fixed JavaScriptHelper#escape_javascript to also escape closing tags #8023 [rubyruy]
diff --git a/actionpack/lib/action_view/base.rb b/actionpack/lib/action_view/base.rb
index 14c42ce85516184b8f232323b85cb18acbe092eb..8e778f6830505da7091d8c5b80d3def33870e1f0 100644
--- a/actionpack/lib/action_view/base.rb
+++ b/actionpack/lib/action_view/base.rb
@@ -198,6 +198,135 @@ class Base
@@erb_variable = '_erbout'
cattr_accessor :erb_variable
+
+ # A regular expression of the valid characters used to separate protocols like
+ # the ':' in 'http://foo.com'
+ @@sanitized_protocol_separator = /:|(*58)|(p)|(%|%)3A/
+ cattr_accessor :sanitized_protocol_separator
+
+ # Specifies a Set of HTML attributes that can have URIs.
+ @@sanitized_uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc))
+ cattr_reader :sanitized_uri_attributes
+
+ # Adds valid HTML attributes that the #sanitize helper checks for URIs.
+ #
+ # Rails::Initializer.run do |config|
+ # config.action_view.sanitized_uri_attributes = 'lowsrc', 'target'
+ # end
+ #
+ def self.sanitized_uri_attributes=(attributes)
+ @@sanitized_uri_attributes.merge(attributes)
+ end
+
+ # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed
+ # to just escaping harmless tags like <font>
+ @@sanitized_bad_tags = Set.new('script')
+ cattr_reader :sanitized_bad_tags
+
+ # Adds to the Set of 'bad' tags for the #sanitize helper.
+ #
+ # Rails::Initializer.run do |config|
+ # config.action_view.sanitized_bad_tags = 'embed', 'object'
+ # end
+ #
+ def self.sanitized_bad_tags=(attributes)
+ @@sanitized_bad_tags.merge(attributes)
+ end
+
+ # Specifies the default Set of tags that the #sanitize helper will allow unscathed.
+ @@sanitized_allowed_tags = Set.new(%w(strong em b i p code pre tt output samp kbd var sub
+ sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dt dd abbr
+ acronym a img blockquote del ins fieldset legend))
+ cattr_reader :sanitized_allowed_tags
+
+ # Adds to the Set of allowed tags for the #sanitize helper.
+ #
+ # Rails::Initializer.run do |config|
+ # config.action_view.sanitized_allowed_tags = 'table', 'tr', 'td'
+ # end
+ #
+ def self.sanitized_allowed_tags=(attributes)
+ @@sanitized_allowed_tags.merge(attributes)
+ end
+
+ # Specifies the default Set of html attributes that the #sanitize helper will leave
+ # in the allowed tag.
+ @@sanitized_allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr))
+ cattr_reader :sanitized_allowed_attributes
+
+ # Adds to the Set of allowed html attributes for the #sanitize helper.
+ #
+ # Rails::Initializer.run do |config|
+ # config.action_view.sanitized_allowed_attributes = 'onclick', 'longdesc'
+ # end
+ #
+ def self.sanitized_allowed_attributes=(attributes)
+ @@sanitized_allowed_attributes.merge(attributes)
+ end
+
+ # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept.
+ @@sanitized_allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse
+ border-color border-left-color border-right-color border-top-color clear color cursor direction display
+ elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height
+ overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation
+ speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space
+ width))
+ cattr_reader :sanitized_allowed_css_properties
+
+ # Adds to the Set of allowed css properties for the #sanitize and #sanitize_css heleprs.
+ #
+ # Rails::Initializer.run do |config|
+ # config.action_view.sanitized_allowed_css_properties = 'expression'
+ # end
+ #
+ def self.sanitized_allowed_css_properties=(attributes)
+ @@sanitized_allowed_css_properties.merge(attributes)
+ end
+
+ # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept.
+ @@sanitized_allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center
+ collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal
+ nowrap olive pointer purple red right solid silver teal top transparent underline white yellow))
+ cattr_reader :sanitized_allowed_css_keywords
+
+ # Adds to the Set of allowed css keywords for the #sanitize and #sanitize_css helpers.
+ #
+ # Rails::Initializer.run do |config|
+ # config.action_view.sanitized_allowed_css_keywords = 'expression'
+ # end
+ #
+ def self.sanitized_allowed_css_keywords=(attributes)
+ @@sanitized_allowed_css_keywords.merge(attributes)
+ end
+
+ # Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers.
+ @@sanitized_shorthand_css_properties = Set.new(%w(background border margin padding))
+ cattr_reader :sanitized_shorthand_css_properties
+
+ # Adds to the Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers.
+ #
+ # Rails::Initializer.run do |config|
+ # config.action_view.sanitized_shorthand_css_properties = 'expression'
+ # end
+ #
+ def self.sanitized_shorthand_css_properties=(attributes)
+ @@sanitized_shorthand_css_properties.merge(attributes)
+ end
+
+ # Specifies the default Set of protocols that the #sanitize helper will leave in
+ # protocol attributes.
+ @@sanitized_allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto feed svn urn aim rsync tag ssh sftp rtsp afs))
+ cattr_reader :sanitized_allowed_protocols
+
+ # Adds to the Set of allowed protocols for the #sanitize helper.
+ #
+ # Rails::Initializer.run do |config|
+ # config.action_view.sanitized_allowed_protocols = 'ssh', 'feed'
+ # end
+ #
+ def self.sanitized_allowed_protocols=(attributes)
+ @@sanitized_allowed_protocols.merge(attributes)
+ end
@@template_handlers = HashWithIndifferentAccess.new
diff --git a/actionpack/lib/action_view/helpers/text_helper.rb b/actionpack/lib/action_view/helpers/text_helper.rb
index e7a6303154350c9d081336ce6705667055c02c2e..af6f6e4bb89d874e47bd250630ac3d08864fd906 100644
--- a/actionpack/lib/action_view/helpers/text_helper.rb
+++ b/actionpack/lib/action_view/helpers/text_helper.rb
@@ -324,63 +324,118 @@ def auto_link(text, link = :all, href_options = {}, &block)
#
# strip_links('Blog: Visit.')
# # => Blog: Visit
- def strip_links(text)
- text.gsub(/(.*?)<\/a>/mi, '\1')
+ def strip_links(html)
+ # Stupid firefox treats 'something' as link!
+ if html.index(" and ')
- # # => <script> do_nasty_stuff() </script>
+ # <%= sanitize @article.body %>
+ #
+ # You can add or remove tags/attributes if you want to customize it a bit. See ActionView::Base for full docs on the
+ # available options. You can add tags/attributes for single uses of #sanitize by passing either the :attributes or :tags options:
#
- # sanitize('Click here for $100')
- # # => Click here for $100
+ # Normal Use
#
- # sanitize('Click here!!!')
- # # => Click here!!!
+ # <%= sanitize @article.body %>
#
- # sanitize('')
- # # =>
- def sanitize(html)
- # only do this if absolutely necessary
- if html.index("<")
+ # Custom Use
+ #
+ # <%= sanitize @article.body, :tags => %w(table tr td), :attributes => %w(id class style)
+ #
+ # Add table tags
+ #
+ # Rails::Initializer.run do |config|
+ # config.action_view.sanitized_allowed_tags = 'table', 'tr', 'td'
+ # end
+ #
+ # Remove tags
+ #
+ # Rails::Initializer.run do |config|
+ # config.after_initialize do
+ # ActionView::Base.sanitized_allowed_tags.delete 'div'
+ # end
+ # end
+ #
+ # Change allowed attributes
+ #
+ # Rails::Initializer.run do |config|
+ # config.action_view.sanitized_allowed_attributes = 'id', 'class', 'style'
+ # end
+ #
+ def sanitize(html, options = {})
+ return html if html.blank? || !html.include?('<')
+ attrs = options.key?(:attributes) ? Set.new(options[:attributes]).merge(sanitized_allowed_attributes) : sanitized_allowed_attributes
+ tags = options.key?(:tags) ? Set.new(options[:tags] ).merge(sanitized_allowed_tags) : sanitized_allowed_tags
+ returning [] do |new_text|
tokenizer = HTML::Tokenizer.new(html)
- new_text = ""
-
+ parent = []
while token = tokenizer.next
node = HTML::Node.parse(nil, 0, 0, token, false)
new_text << case node
when HTML::Tag
- if VERBOTEN_TAGS.include?(node.name)
- node.to_s.gsub(/, "<")
+ if node.closing == :close
+ parent.shift
else
- if node.closing != :close
- node.attributes.delete_if { |attr,v| attr =~ VERBOTEN_ATTRS }
- %w(href src).each do |attr|
- node.attributes.delete attr if node.attributes[attr] =~ /^javascript:/i
- end
- end
- node.to_s
+ parent.unshift node.name
end
+ node.attributes.keys.each do |attr_name|
+ value = node.attributes[attr_name].to_s
+ if !attrs.include?(attr_name) || contains_bad_protocols?(attr_name, value)
+ node.attributes.delete(attr_name)
+ else
+ node.attributes[attr_name] = attr_name == 'style' ? sanitize_css(value) : CGI::escapeHTML(value)
+ end
+ end if node.attributes
+ tags.include?(node.name) ? node : nil
else
- node.to_s.gsub(/, "<")
+ sanitized_bad_tags.include?(parent.first) ? nil : node.to_s.gsub(/, "<")
end
end
+ end.join
+ end
- html = new_text
+ # Sanitizes a block of css code. Used by #sanitize when it comes across a style attribute
+ def sanitize_css(style)
+ # disallow urls
+ style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
+
+ # gauntlet
+ if style !~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/ ||
+ style !~ /^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$/
+ return ''
end
- html
+ returning [] do |clean|
+ style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val|
+ if sanitized_allowed_css_properties.include?(prop.downcase)
+ clean << prop + ': ' + val + ';'
+ elsif sanitized_shorthand_css_properties.include?(prop.split('-')[0].downcase)
+ unless val.split().any? do |keyword|
+ !sanitized_allowed_css_keywords.include?(keyword) &&
+ keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
+ end
+ clean << prop + ': ' + val + ';'
+ end
+ end
+ end
+ end.join(' ')
end
-
+
# Strips all HTML tags from the +html+, including comments. This uses the
# html-scanner tokenizer and so its HTML parsing ability is limited by
# that of html-scanner.
@@ -407,7 +462,7 @@ def strip_tags(html)
end
# strip any comments, and if they have a newline at the end (ie. line with
# only a comment) strip that too
- text.gsub(/[\n]?/m, "")
+ strip_tags(text.gsub(/[\n]?/m, "")) # Recurse - handle all dirty nested tags
else
html # already plain text
end
@@ -574,6 +629,11 @@ def auto_link_email_addresses(text)
end
end
end
+
+ def contains_bad_protocols?(attr_name, value)
+ sanitized_uri_attributes.include?(attr_name) &&
+ (value =~ /(^[^\/:]*):|(*58)|(p)|(%|%)3A/ && !sanitized_allowed_protocols.include?(value.split(sanitized_protocol_separator).first))
+ end
end
end
end
diff --git a/actionpack/test/template/text_helper_test.rb b/actionpack/test/template/text_helper_test.rb
index 822b88adee2d0ae7ab81240d9b57d80378ea97ab..80b9c773b3e7c93a5335c13ba4d201c255fba5fd 100644
--- a/actionpack/test/template/text_helper_test.rb
+++ b/actionpack/test/template/text_helper_test.rb
@@ -5,7 +5,7 @@ class TextHelperTest < Test::Unit::TestCase
include ActionView::Helpers::TextHelper
include ActionView::Helpers::TagHelper
include TestingSandbox
-
+
def setup
# This simulates the fact that instance variables are reset every time
# a view is rendered. The cycle helper depends on this behavior.
@@ -47,7 +47,13 @@ def test_truncate_multibyte
end
def test_strip_links
+ assert_equal "Dont touch me", strip_links("Dont touch me")
assert_equal "on my mind\nall day long", strip_links("on my mind\nall day long")
+ assert_equal "0wn3d", strip_links("0wn3d")
+ assert_equal "Magic", strip_links("Magic")
+ assert_equal "FrrFox", strip_links("FrrFox")
+ assert_equal "My mind\nall day long", strip_links("My mind\nall day long")
+ assert_equal "all day long", strip_links("<a href='hello'>all day long</a>")
end
def test_highlighter
@@ -255,41 +261,198 @@ def test_auto_link_with_block
end
def test_sanitize_form
- raw = "
"
- result = sanitize(raw)
- assert_equal %(<form action="/foo/bar" method="post"></form>), result
+ assert_sanitized "", ''
end
def test_sanitize_plaintext
raw = "foo"
- result = sanitize(raw)
- assert_equal "<plaintext>foo</plaintext>", result
+ assert_sanitized raw, "foo"
end
def test_sanitize_script
- raw = ""
- result = sanitize(raw)
- assert_equal %{<script language="Javascript">blah blah blah</script>}, result
+ raw = "a b cd e f"
+ assert_sanitized raw, "a b cd e f"
end
def test_sanitize_js_handlers
raw = %{onthis="do that" hello}
- result = sanitize(raw)
- assert_equal %{onthis="do that" hello}, result
+ assert_sanitized raw, %{onthis="do that" hello}
end
def test_sanitize_javascript_href
raw = %{href="javascript:bang" foo, bar}
- result = sanitize(raw)
- assert_equal %{href="javascript:bang" foo, bar}, result
+ assert_sanitized raw, %{href="javascript:bang" foo, bar}
end
def test_sanitize_image_src
raw = %{src="javascript:bang" foo, bar}
- result = sanitize(raw)
- assert_equal %{src="javascript:bang" foo, bar}, result
+ assert_sanitized raw, %{src="javascript:bang" foo, bar}
+ end
+
+ ActionView::Base.sanitized_allowed_tags.each do |tag_name|
+ define_method "test_should_allow_#{tag_name}_tag" do
+ assert_sanitized "start <#{tag_name} title=\"1\" onclick=\"foo\">foo bar baz#{tag_name}> end", %(start <#{tag_name} title="1">foo bar baz#{tag_name}> end)
+ end
+ end
+
+ def test_should_allow_anchors
+ assert_sanitized %(), %()
+ end
+
+ # RFC 3986, sec 4.2
+ def test_allow_colons_in_path_component
+ assert_sanitized("foo")
+ end
+
+ %w(src width height alt).each do |img_attr|
+ define_method "test_should_allow_image_#{img_attr}_attribute" do
+ assert_sanitized %(), %()
+ end
+ end
+
+ def test_should_handle_non_html
+ assert_sanitized 'abc'
+ end
+
+ def test_should_handle_blank_text
+ assert_sanitized nil
+ assert_sanitized ''
+ end
+
+ def test_should_allow_custom_tags
+ text = "foo"
+ assert_equal(text, sanitize(text, :tags => %w(u)))
+ end
+
+ def test_should_allow_custom_tags_with_attributes
+ text = %()
+ assert_equal(text, sanitize(text, :attributes => ['foo']))
+ end
+
+ [%w(img src), %w(a href)].each do |(tag, attr)|
+ define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do
+ assert_sanitized %(<#{tag} #{attr}="javascript:bang" title="1">boo#{tag}>), %(<#{tag} title="1">boo#{tag}>)
+ end
+ end
+
+ def test_should_flag_bad_protocols
+ %w(about chrome data disk hcp help javascript livescript lynxcgi lynxexec ms-help ms-its mhtml mocha opera res resource shell vbscript view-source vnd.ms.radio wysiwyg).each do |proto|
+ assert contains_bad_protocols?('src', "#{proto}://bad")
+ end
+ end
+
+ def test_should_accept_good_protocols
+ sanitized_allowed_protocols.each do |proto|
+ assert !contains_bad_protocols?('src', "#{proto}://good")
+ end
+ end
+
+ def test_should_reject_hex_codes_in_protocol
+ assert contains_bad_protocols?('src', "%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29")
+ assert_sanitized %(1), "1"
+ end
+
+ def test_should_block_script_tag
+ assert_sanitized %(), ""
+ end
+
+ [%(),
+ %(),
+ %(),
+ %(">),
+ %(),
+ %(),
+ %(),
+ %(),
+ %(),
+ %(),
+ %(),
+ %(),
+ %(),
+ %(),
+ %()].each_with_index do |img_hack, i|
+ define_method "test_should_not_fall_for_xss_image_hack_#{i+1}" do
+ assert_sanitized img_hack, ""
+ end
+ end
+
+ def test_should_sanitize_tag_broken_up_by_null
+ assert_sanitized %(alert(\"XSS\")), "alert(\"XSS\")"
+ end
+
+ def test_should_sanitize_invalid_script_tag
+ assert_sanitized %(), ""
end
+ def test_should_sanitize_script_tag_with_multiple_open_brackets
+ assert_sanitized %(<), "<"
+ assert_sanitized %(