diff --git a/git-compat-util.h b/git-compat-util.h index 5bd9ad7d2a23773b1410ded9f4f241ebe4d4da00..f011a8d7bb9fc61b0bd840ea954257f505bd62d5 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -466,6 +466,7 @@ extern const char tolower_trans_tbl[256]; #undef isdigit #undef isalpha #undef isalnum +#undef isprint #undef islower #undef isupper #undef tolower @@ -483,6 +484,7 @@ extern unsigned char sane_ctype[256]; #define isdigit(x) sane_istest(x,GIT_DIGIT) #define isalpha(x) sane_istest(x,GIT_ALPHA) #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) +#define isprint(x) ((x) >= 0x20 && (x) <= 0x7e) #define islower(x) sane_iscase(x, 1) #define isupper(x) sane_iscase(x, 0) #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) diff --git a/pretty.c b/pretty.c index 482402d2838ce3a8f8700dea47afc1cdc37f38e3..613e4eab0db8baeaf905ae825bb7294b4c23363b 100644 --- a/pretty.c +++ b/pretty.c @@ -272,16 +272,65 @@ static void add_rfc822_quoted(struct strbuf *out, const char *s, int len) strbuf_addch(out, '"'); } -static int is_rfc2047_special(char ch) +enum rfc2047_type { + RFC2047_SUBJECT, + RFC2047_ADDRESS, +}; + +static int is_rfc2047_special(char ch, enum rfc2047_type type) { - if (ch == ' ' || ch == '\n') + /* + * rfc2047, section 4.2: + * + * 8-bit values which correspond to printable ASCII characters other + * than "=", "?", and "_" (underscore), MAY be represented as those + * characters. (But see section 5 for restrictions.) In + * particular, SPACE and TAB MUST NOT be represented as themselves + * within encoded words. + */ + + /* + * rule out non-ASCII characters and non-printable characters (the + * non-ASCII check should be redundant as isprint() is not localized + * and only knows about ASCII, but be defensive about that) + */ + if (non_ascii(ch) || !isprint(ch)) + return 1; + + /* + * rule out special printable characters (' ' should be the only + * whitespace character considered printable, but be defensive and use + * isspace()) + */ + if (isspace(ch) || ch == '=' || ch == '?' || ch == '_') return 1; - return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_')); + /* + * rfc2047, section 5.3: + * + * As a replacement for a 'word' entity within a 'phrase', for example, + * one that precedes an address in a From, To, or Cc header. The ABNF + * definition for 'phrase' from RFC 822 thus becomes: + * + * phrase = 1*( encoded-word / word ) + * + * In this case the set of characters that may be used in a "Q"-encoded + * 'encoded-word' is restricted to: . An 'encoded-word' that appears within a + * 'phrase' MUST be separated from any adjacent 'word', 'text' or + * 'special' by 'linear-white-space'. + */ + + if (type != RFC2047_ADDRESS) + return 0; + + /* '=' and '_' are special cases and have been checked above */ + return !(isalnum(ch) || ch == '!' || ch == '*' || ch == '+' || ch == '-' || ch == '/'); } static void add_rfc2047(struct strbuf *sb, const char *line, int len, - const char *encoding) + const char *encoding, enum rfc2047_type type) { static const int max_length = 78; /* per rfc2822 */ static const int max_encoded_length = 76; /* per rfc2047 */ @@ -304,7 +353,7 @@ static void add_rfc2047(struct strbuf *sb, const char *line, int len, line_len += strlen(encoding) + 5; /* 5 for =??q? */ for (i = 0; i < len; i++) { unsigned ch = line[i] & 0xFF; - int is_special = is_rfc2047_special(ch); + int is_special = is_rfc2047_special(ch, type); /* * According to RFC 2047, we could encode the special character @@ -358,11 +407,13 @@ void pp_user_info(const struct pretty_print_context *pp, display_name_length = name_tail - line; strbuf_addstr(sb, "From: "); if (!has_rfc822_specials(line, display_name_length)) { - add_rfc2047(sb, line, display_name_length, encoding); + add_rfc2047(sb, line, display_name_length, + encoding, RFC2047_ADDRESS); } else { struct strbuf quoted = STRBUF_INIT; add_rfc822_quoted("ed, line, display_name_length); - add_rfc2047(sb, quoted.buf, quoted.len, encoding); + add_rfc2047(sb, quoted.buf, quoted.len, + encoding, RFC2047_ADDRESS); strbuf_release("ed); } if (namelen - display_name_length + last_line_length(sb) > 78) { @@ -1294,7 +1345,7 @@ void pp_title_line(const struct pretty_print_context *pp, strbuf_grow(sb, title.len + 1024); if (pp->subject) { strbuf_addstr(sb, pp->subject); - add_rfc2047(sb, title.buf, title.len, encoding); + add_rfc2047(sb, title.buf, title.len, encoding, RFC2047_SUBJECT); } else { strbuf_addbuf(sb, &title); } diff --git a/t/t4014-format-patch.sh b/t/t4014-format-patch.sh index 1d5636d8a047b7a411f244413dab026a4b1bb1b5..727d606884993335524a078f20c35731cfc2d88a 100755 --- a/t/t4014-format-patch.sh +++ b/t/t4014-format-patch.sh @@ -818,21 +818,28 @@ check_author() { cat >expect <<'EOF' From: "Foo B. Bar" EOF -test_expect_success 'format-patch quotes dot in headers' ' +test_expect_success 'format-patch quotes dot in from-headers' ' check_author "Foo B. Bar" ' cat >expect <<'EOF' From: "Foo \"The Baz\" Bar" EOF -test_expect_success 'format-patch quotes double-quote in headers' ' +test_expect_success 'format-patch quotes double-quote in from-headers' ' check_author "Foo \"The Baz\" Bar" ' cat >expect <<'EOF' -From: =?UTF-8?q?"F=C3=B6o=20B.=20Bar"?= +From: =?UTF-8?q?F=C3=B6o=20Bar?= EOF -test_expect_success 'rfc2047-encoded headers also double-quote 822 specials' ' +test_expect_success 'format-patch uses rfc2047-encoded from-headers when necessary' ' + check_author "Föo Bar" +' + +cat >expect <<'EOF' +From: =?UTF-8?q?F=C3=B6o=20B=2E=20Bar?= +EOF +test_expect_failure 'rfc2047-encoded from-headers leave no rfc822 specials' ' check_author "Föo B. Bar" '