Check for PEP263 headers in req files

50ed3d49 · Xavier Fernandez · 5589ff28 · 50ed3d49 · 50ed3d49
隐藏空白更改
内联并排

Showing with 12 addition and 0 deletion

pip/utils/encoding.py pip/utils/encoding.py +8 -0

tests/unit/test_utils.py tests/unit/test_utils.py +4 -0

未找到文件。
--- a/pip/utils/encoding.py
+++ b/pip/utils/encoding.py
 import codecs
 import locale
+import re


 BOMS = [
@@ -12,6 +13,8 @@ BOMS = [
    (codecs.BOM_UTF32_LE, 'utf32-le'),
 ]

+ENCODING_RE = re.compile('coding[:=]\s*([-\w.]+)')
+

 def auto_decode(data):
    """Check a bytes string for a BOM to correctly detect the encoding
@@ -20,4 +23,9 @@ def auto_decode(data):
    for bom, encoding in BOMS:
        if data.startswith(bom):
            return data[len(bom):].decode(encoding)
+    # Lets check the first two lines as in PEP263
+    for line in data.splitlines()[:2]:
+        if line.startswith('#') and ENCODING_RE.search(line):
+            encoding = ENCODING_RE.search(line).groups()[0]
+            return data.decode(encoding)
    return data.decode(locale.getpreferredencoding(False))
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -464,3 +464,7 @@ class TestEncoding(object):

    def test_auto_decode_no_bom(self):
        assert auto_decode(b'foobar') == u'foobar'
+
+    def test_auto_decode_pep263_headers(self):
+        latin1_req = u'# coding=latin1\n# Pas trop de café'
+        assert auto_decode(latin1_req.encode('latin1')) == latin1_req