提交 1a182272 编写于 作者: J Jeffrey Hardy 提交者: Jeremy Kemper

Fix that HTML::Node.parse would blow up on unclosed CDATA sections.

If an unclosed CDATA section is encountered and parsing is strict, an
exception will be raised. Otherwise, we consider the remainder of the line to
be the section contents. This is consistent with HTML::Tokenizer#scan_tag.
Signed-off-by: NJeremy Kemper <jeremy@bitsweat.net>
上级 838cb1aa
......@@ -150,7 +150,14 @@ def parse(parent, line, pos, content, strict=true)
end
if scanner.skip(/!\[CDATA\[/)
scanner.scan_until(/\]\]>/)
unless scanner.skip_until(/\]\]>/)
if strict
raise "expected ]]> (got #{scanner.rest.inspect} for #{content})"
else
scanner.skip_until(/\Z/)
end
end
return CDATA.new(parent, line, pos, scanner.pre_match.gsub(/<!\[CDATA\[/, ''))
end
......
......@@ -65,4 +65,25 @@ def test_parse_with_unclosed_tag
assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) }
assert node.attributes.has_key?("onmouseover")
end
def test_parse_with_valid_cdata_section
s = "<![CDATA[<span>contents</span>]]>"
node = nil
assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) }
assert_kind_of HTML::CDATA, node
assert_equal '<span>contents</span>', node.content
end
def test_parse_strict_with_unterminated_cdata_section
s = "<![CDATA[neverending..."
assert_raise(RuntimeError) { HTML::Node.parse(nil,0,0,s) }
end
def test_parse_relaxed_with_unterminated_cdata_section
s = "<![CDATA[neverending..."
node = nil
assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) }
assert_kind_of HTML::CDATA, node
assert_equal 'neverending...', node.content
end
end
......@@ -17,6 +17,8 @@ def test_strip_tags
%{This is a test.\n\n\nIt no longer contains any HTML.\n}, sanitizer.sanitize(
%{<title>This is <b>a <a href="" target="_blank">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n}))
assert_equal "This has a here.", sanitizer.sanitize("This has a <!-- comment --> here.")
assert_equal "This has a here.", sanitizer.sanitize("This has a <![CDATA[<section>]]> here.")
assert_equal "This has an unclosed ", sanitizer.sanitize("This has an unclosed <![CDATA[<section>]] here...")
[nil, '', ' '].each { |blank| assert_equal blank, sanitizer.sanitize(blank) }
end
......@@ -243,6 +245,14 @@ def test_should_sanitize_img_vbscript
assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), '<img />'
end
def test_should_sanitize_cdata_section
assert_sanitized "<![CDATA[<span>section</span>]]>", "&lt;![CDATA[&lt;span>section&lt;/span>]]>"
end
def test_should_sanitize_unterminated_cdata_section
assert_sanitized "<![CDATA[<span>neverending...", "&lt;![CDATA[&lt;span>neverending...]]>"
end
protected
def assert_sanitized(input, expected = nil)
@sanitizer ||= HTML::WhiteListSanitizer.new
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册