提交 c2a424da 编写于 作者: C Claudio Salazar

Fixed XML selector against XXE attacks

上级 43217fd6
...@@ -15,11 +15,16 @@ from .csstranslator import ScrapyHTMLTranslator, ScrapyGenericTranslator ...@@ -15,11 +15,16 @@ from .csstranslator import ScrapyHTMLTranslator, ScrapyGenericTranslator
__all__ = ['Selector', 'SelectorList'] __all__ = ['Selector', 'SelectorList']
class SafeXMLParser(etree.XMLParser):
def __init__(self, *args, **kwargs):
super(SafeXMLParser, self).__init__(*args, resolve_entities=False, **kwargs)
_ctgroup = { _ctgroup = {
'html': {'_parser': etree.HTMLParser, 'html': {'_parser': etree.HTMLParser,
'_csstranslator': ScrapyHTMLTranslator(), '_csstranslator': ScrapyHTMLTranslator(),
'_tostring_method': 'html'}, '_tostring_method': 'html'},
'xml': {'_parser': etree.XMLParser, 'xml': {'_parser': SafeXMLParser,
'_csstranslator': ScrapyGenericTranslator(), '_csstranslator': ScrapyGenericTranslator(),
'_tostring_method': 'xml'}, '_tostring_method': 'xml'},
} }
......
...@@ -332,6 +332,16 @@ class SelectorTestCase(unittest.TestCase): ...@@ -332,6 +332,16 @@ class SelectorTestCase(unittest.TestCase):
div_class = x.xpath('//div/@class') div_class = x.xpath('//div/@class')
self.assertTrue(all(map(lambda e: hasattr(e._root, 'getparent'), div_class))) self.assertTrue(all(map(lambda e: hasattr(e._root, 'getparent'), div_class)))
def test_xml_entity_expansion(self):
malicious_xml = '<?xml version="1.0" encoding="ISO-8859-1"?>'\
'<!DOCTYPE foo [ <!ELEMENT foo ANY > <!ENTITY xxe SYSTEM '\
'"file:///etc/passwd" >]><foo>&xxe;</foo>'
response = XmlResponse('http://example.com', body=malicious_xml)
sel = self.sscls(response=response)
self.assertEqual(sel.extract(), '<foo>&xxe;</foo>')
class DeprecatedXpathSelectorTest(unittest.TestCase): class DeprecatedXpathSelectorTest(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册