提交 c2a424da 编写于 作者: C Claudio Salazar

Fixed XML selector against XXE attacks

上级 43217fd6
......@@ -15,11 +15,16 @@ from .csstranslator import ScrapyHTMLTranslator, ScrapyGenericTranslator
__all__ = ['Selector', 'SelectorList']
class SafeXMLParser(etree.XMLParser):
def __init__(self, *args, **kwargs):
super(SafeXMLParser, self).__init__(*args, resolve_entities=False, **kwargs)
_ctgroup = {
'html': {'_parser': etree.HTMLParser,
'_csstranslator': ScrapyHTMLTranslator(),
'_tostring_method': 'html'},
'xml': {'_parser': etree.XMLParser,
'xml': {'_parser': SafeXMLParser,
'_csstranslator': ScrapyGenericTranslator(),
'_tostring_method': 'xml'},
}
......
......@@ -332,6 +332,16 @@ class SelectorTestCase(unittest.TestCase):
div_class = x.xpath('//div/@class')
self.assertTrue(all(map(lambda e: hasattr(e._root, 'getparent'), div_class)))
def test_xml_entity_expansion(self):
malicious_xml = '<?xml version="1.0" encoding="ISO-8859-1"?>'\
'<!DOCTYPE foo [ <!ELEMENT foo ANY > <!ENTITY xxe SYSTEM '\
'"file:///etc/passwd" >]><foo>&xxe;</foo>'
response = XmlResponse('http://example.com', body=malicious_xml)
sel = self.sscls(response=response)
self.assertEqual(sel.extract(), '<foo>&xxe;</foo>')
class DeprecatedXpathSelectorTest(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册