diff --git a/attr.c b/attr.c index 1f9eebd2ddb7fe8a561500c41f33a52d1fb4fdd1..7f445965c1886fe8cb53966969b48f4b0bde4826 100644 --- a/attr.c +++ b/attr.c @@ -12,6 +12,7 @@ #include "exec_cmd.h" #include "attr.h" #include "dir.h" +#include "utf8.h" const char git_attr__true[] = "(builtin)true"; const char git_attr__false[] = "\0(builtin)false"; @@ -379,8 +380,12 @@ static struct attr_stack *read_attr_from_file(const char *path, int macro_ok) return NULL; } res = xcalloc(1, sizeof(*res)); - while (fgets(buf, sizeof(buf), fp)) - handle_attr_line(res, buf, path, ++lineno, macro_ok); + while (fgets(buf, sizeof(buf), fp)) { + char *bufp = buf; + if (!lineno) + skip_utf8_bom(&bufp, strlen(bufp)); + handle_attr_line(res, bufp, path, ++lineno, macro_ok); + } fclose(fp); return res; } diff --git a/config.c b/config.c index 66c0a51bce529e4c027f11017697a62dd737b3bd..c4424c01388496b5995e19f9601f0c87b9fdd3c0 100644 --- a/config.c +++ b/config.c @@ -12,6 +12,7 @@ #include "quote.h" #include "hashmap.h" #include "string-list.h" +#include "utf8.h" struct config_source { struct config_source *prev; @@ -417,8 +418,7 @@ static int git_parse_source(config_fn_t fn, void *data) struct strbuf *var = &cf->var; /* U+FEFF Byte Order Mark in UTF8 */ - static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf"; - const unsigned char *bomptr = utf8_bom; + const char *bomptr = utf8_bom; for (;;) { int c = get_next_char(); @@ -426,7 +426,7 @@ static int git_parse_source(config_fn_t fn, void *data) /* We are at the file beginning; skip UTF8-encoded BOM * if present. Sane editors won't put this in on their * own, but e.g. Windows Notepad will do it happily. */ - if ((unsigned char) c == *bomptr) { + if (c == (*bomptr & 0377)) { bomptr++; continue; } else { diff --git a/dir.c b/dir.c index 0943a81964ddb7b5b1d83c9c8eafe2b3b2b9da09..a3e70734004e5da22dce9ba4062c0d6684061855 100644 --- a/dir.c +++ b/dir.c @@ -12,6 +12,7 @@ #include "refs.h" #include "wildmatch.h" #include "pathspec.h" +#include "utf8.h" struct path_simplify { int len; @@ -617,7 +618,12 @@ int add_excludes_from_file_to_list(const char *fname, } el->filebuf = buf; + + if (skip_utf8_bom(&buf, size)) + size -= buf - el->filebuf; + entry = buf; + for (i = 0; i < size; i++) { if (buf[i] == '\n') { if (entry != buf + i && entry[0] != '#') { diff --git a/t/t7061-wtstatus-ignore.sh b/t/t7061-wtstatus-ignore.sh index 460789b4d85241eb51c0c1a5a1f4be7d4a0d7154..cdc0747bf01b0598a2e864073887896162815019 100755 --- a/t/t7061-wtstatus-ignore.sh +++ b/t/t7061-wtstatus-ignore.sh @@ -20,6 +20,15 @@ test_expect_success 'status untracked directory with --ignored' ' test_cmp expected actual ' +test_expect_success 'same with gitignore starting with BOM' ' + printf "\357\273\277ignored\n" >.gitignore && + mkdir -p untracked && + : >untracked/ignored && + : >untracked/uncommitted && + git status --porcelain --ignored >actual && + test_cmp expected actual +' + cat >expected <<\EOF ?? .gitignore ?? actual diff --git a/utf8.c b/utf8.c index 520fbb4994ab1bdcfa0bc4589a0ca5131a89c490..28e6d76a425db4c16a8cc32e6f17c7aa72c2b1f0 100644 --- a/utf8.c +++ b/utf8.c @@ -633,3 +633,14 @@ int is_hfs_dotgit(const char *path) return 1; } + +const char utf8_bom[] = "\357\273\277"; + +int skip_utf8_bom(char **text, size_t len) +{ + if (len < strlen(utf8_bom) || + memcmp(*text, utf8_bom, strlen(utf8_bom))) + return 0; + *text += strlen(utf8_bom); + return 1; +} diff --git a/utf8.h b/utf8.h index e4d9183c5fec81f8dbc75d1f2aea136f9b397a72..e7b2aa416844a0b2ccb2d2cf8a1bce68c4d60be1 100644 --- a/utf8.h +++ b/utf8.h @@ -13,6 +13,9 @@ int same_encoding(const char *, const char *); __attribute__((format (printf, 2, 3))) int utf8_fprintf(FILE *, const char *, ...); +extern const char utf8_bom[]; +extern int skip_utf8_bom(char **, size_t); + void strbuf_add_wrapped_text(struct strbuf *buf, const char *text, int indent, int indent2, int width); void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len,