提交 0fcec2ce 编写于 作者: J Jan H. Schönherr 提交者: Junio C Hamano

format-patch: make rfc2047 encoding more strict

RFC 2047 requires more characters to be encoded than it is currently done.
Especially, RFC 2047 distinguishes between allowed remaining characters
in encoded words in addresses (From, To, etc.) and other headers, such
as Subject.

Make add_rfc2047() and is_rfc2047_special() location dependent and include
all non-allowed characters to hopefully be RFC 2047 conformant.

This especially fixes a problem, where RFC 822 specials (e. g. ".") were
left unencoded in addresses, which was solved with a non-standard-conforming
workaround in the past (which is going to be removed in a follow-up patch).
Signed-off-by: NJan H. Schönherr <schnhrr@cs.tu-berlin.de>
Signed-off-by: NJunio C Hamano <gitster@pobox.com>
上级 f9b7204b
...@@ -466,6 +466,7 @@ extern const char tolower_trans_tbl[256]; ...@@ -466,6 +466,7 @@ extern const char tolower_trans_tbl[256];
#undef isdigit #undef isdigit
#undef isalpha #undef isalpha
#undef isalnum #undef isalnum
#undef isprint
#undef islower #undef islower
#undef isupper #undef isupper
#undef tolower #undef tolower
...@@ -483,6 +484,7 @@ extern unsigned char sane_ctype[256]; ...@@ -483,6 +484,7 @@ extern unsigned char sane_ctype[256];
#define isdigit(x) sane_istest(x,GIT_DIGIT) #define isdigit(x) sane_istest(x,GIT_DIGIT)
#define isalpha(x) sane_istest(x,GIT_ALPHA) #define isalpha(x) sane_istest(x,GIT_ALPHA)
#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
#define isprint(x) ((x) >= 0x20 && (x) <= 0x7e)
#define islower(x) sane_iscase(x, 1) #define islower(x) sane_iscase(x, 1)
#define isupper(x) sane_iscase(x, 0) #define isupper(x) sane_iscase(x, 0)
#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
......
...@@ -272,16 +272,65 @@ static void add_rfc822_quoted(struct strbuf *out, const char *s, int len) ...@@ -272,16 +272,65 @@ static void add_rfc822_quoted(struct strbuf *out, const char *s, int len)
strbuf_addch(out, '"'); strbuf_addch(out, '"');
} }
static int is_rfc2047_special(char ch) enum rfc2047_type {
RFC2047_SUBJECT,
RFC2047_ADDRESS,
};
static int is_rfc2047_special(char ch, enum rfc2047_type type)
{ {
if (ch == ' ' || ch == '\n') /*
* rfc2047, section 4.2:
*
* 8-bit values which correspond to printable ASCII characters other
* than "=", "?", and "_" (underscore), MAY be represented as those
* characters. (But see section 5 for restrictions.) In
* particular, SPACE and TAB MUST NOT be represented as themselves
* within encoded words.
*/
/*
* rule out non-ASCII characters and non-printable characters (the
* non-ASCII check should be redundant as isprint() is not localized
* and only knows about ASCII, but be defensive about that)
*/
if (non_ascii(ch) || !isprint(ch))
return 1;
/*
* rule out special printable characters (' ' should be the only
* whitespace character considered printable, but be defensive and use
* isspace())
*/
if (isspace(ch) || ch == '=' || ch == '?' || ch == '_')
return 1; return 1;
return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_')); /*
* rfc2047, section 5.3:
*
* As a replacement for a 'word' entity within a 'phrase', for example,
* one that precedes an address in a From, To, or Cc header. The ABNF
* definition for 'phrase' from RFC 822 thus becomes:
*
* phrase = 1*( encoded-word / word )
*
* In this case the set of characters that may be used in a "Q"-encoded
* 'encoded-word' is restricted to: <upper and lower case ASCII
* letters, decimal digits, "!", "*", "+", "-", "/", "=", and "_"
* (underscore, ASCII 95.)>. An 'encoded-word' that appears within a
* 'phrase' MUST be separated from any adjacent 'word', 'text' or
* 'special' by 'linear-white-space'.
*/
if (type != RFC2047_ADDRESS)
return 0;
/* '=' and '_' are special cases and have been checked above */
return !(isalnum(ch) || ch == '!' || ch == '*' || ch == '+' || ch == '-' || ch == '/');
} }
static void add_rfc2047(struct strbuf *sb, const char *line, int len, static void add_rfc2047(struct strbuf *sb, const char *line, int len,
const char *encoding) const char *encoding, enum rfc2047_type type)
{ {
static const int max_length = 78; /* per rfc2822 */ static const int max_length = 78; /* per rfc2822 */
static const int max_encoded_length = 76; /* per rfc2047 */ static const int max_encoded_length = 76; /* per rfc2047 */
...@@ -304,7 +353,7 @@ static void add_rfc2047(struct strbuf *sb, const char *line, int len, ...@@ -304,7 +353,7 @@ static void add_rfc2047(struct strbuf *sb, const char *line, int len,
line_len += strlen(encoding) + 5; /* 5 for =??q? */ line_len += strlen(encoding) + 5; /* 5 for =??q? */
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
unsigned ch = line[i] & 0xFF; unsigned ch = line[i] & 0xFF;
int is_special = is_rfc2047_special(ch); int is_special = is_rfc2047_special(ch, type);
/* /*
* According to RFC 2047, we could encode the special character * According to RFC 2047, we could encode the special character
...@@ -358,11 +407,13 @@ void pp_user_info(const struct pretty_print_context *pp, ...@@ -358,11 +407,13 @@ void pp_user_info(const struct pretty_print_context *pp,
display_name_length = name_tail - line; display_name_length = name_tail - line;
strbuf_addstr(sb, "From: "); strbuf_addstr(sb, "From: ");
if (!has_rfc822_specials(line, display_name_length)) { if (!has_rfc822_specials(line, display_name_length)) {
add_rfc2047(sb, line, display_name_length, encoding); add_rfc2047(sb, line, display_name_length,
encoding, RFC2047_ADDRESS);
} else { } else {
struct strbuf quoted = STRBUF_INIT; struct strbuf quoted = STRBUF_INIT;
add_rfc822_quoted(&quoted, line, display_name_length); add_rfc822_quoted(&quoted, line, display_name_length);
add_rfc2047(sb, quoted.buf, quoted.len, encoding); add_rfc2047(sb, quoted.buf, quoted.len,
encoding, RFC2047_ADDRESS);
strbuf_release(&quoted); strbuf_release(&quoted);
} }
if (namelen - display_name_length + last_line_length(sb) > 78) { if (namelen - display_name_length + last_line_length(sb) > 78) {
...@@ -1294,7 +1345,7 @@ void pp_title_line(const struct pretty_print_context *pp, ...@@ -1294,7 +1345,7 @@ void pp_title_line(const struct pretty_print_context *pp,
strbuf_grow(sb, title.len + 1024); strbuf_grow(sb, title.len + 1024);
if (pp->subject) { if (pp->subject) {
strbuf_addstr(sb, pp->subject); strbuf_addstr(sb, pp->subject);
add_rfc2047(sb, title.buf, title.len, encoding); add_rfc2047(sb, title.buf, title.len, encoding, RFC2047_SUBJECT);
} else { } else {
strbuf_addbuf(sb, &title); strbuf_addbuf(sb, &title);
} }
......
...@@ -818,21 +818,28 @@ check_author() { ...@@ -818,21 +818,28 @@ check_author() {
cat >expect <<'EOF' cat >expect <<'EOF'
From: "Foo B. Bar" <author@example.com> From: "Foo B. Bar" <author@example.com>
EOF EOF
test_expect_success 'format-patch quotes dot in headers' ' test_expect_success 'format-patch quotes dot in from-headers' '
check_author "Foo B. Bar" check_author "Foo B. Bar"
' '
cat >expect <<'EOF' cat >expect <<'EOF'
From: "Foo \"The Baz\" Bar" <author@example.com> From: "Foo \"The Baz\" Bar" <author@example.com>
EOF EOF
test_expect_success 'format-patch quotes double-quote in headers' ' test_expect_success 'format-patch quotes double-quote in from-headers' '
check_author "Foo \"The Baz\" Bar" check_author "Foo \"The Baz\" Bar"
' '
cat >expect <<'EOF' cat >expect <<'EOF'
From: =?UTF-8?q?"F=C3=B6o=20B.=20Bar"?= <author@example.com> From: =?UTF-8?q?F=C3=B6o=20Bar?= <author@example.com>
EOF EOF
test_expect_success 'rfc2047-encoded headers also double-quote 822 specials' ' test_expect_success 'format-patch uses rfc2047-encoded from-headers when necessary' '
check_author "Föo Bar"
'
cat >expect <<'EOF'
From: =?UTF-8?q?F=C3=B6o=20B=2E=20Bar?= <author@example.com>
EOF
test_expect_failure 'rfc2047-encoded from-headers leave no rfc822 specials' '
check_author "Föo B. Bar" check_author "Föo B. Bar"
' '
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册