diff --git a/convert.c b/convert.c index cb5fbb45ea04cead65316ecf6b44730b17108122..677d339a8b5aa8e56b4e6ee0df05e6570b9270e0 100644 --- a/convert.c +++ b/convert.c @@ -312,11 +312,12 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len, struct filter_params { const char *src; unsigned long size; + int fd; const char *cmd; const char *path; }; -static int filter_buffer(int in, int out, void *data) +static int filter_buffer_or_fd(int in, int out, void *data) { /* * Spawn cmd and feed the buffer contents through its stdin. @@ -355,7 +356,12 @@ static int filter_buffer(int in, int out, void *data) sigchain_push(SIGPIPE, SIG_IGN); - write_err = (write_in_full(child_process.in, params->src, params->size) < 0); + if (params->src) { + write_err = (write_in_full(child_process.in, params->src, params->size) < 0); + } else { + write_err = copy_fd(params->fd, child_process.in); + } + if (close(child_process.in)) write_err = 1; if (write_err) @@ -371,7 +377,7 @@ static int filter_buffer(int in, int out, void *data) return (write_err || status); } -static int apply_filter(const char *path, const char *src, size_t len, +static int apply_filter(const char *path, const char *src, size_t len, int fd, struct strbuf *dst, const char *cmd) { /* @@ -392,11 +398,12 @@ static int apply_filter(const char *path, const char *src, size_t len, return 1; memset(&async, 0, sizeof(async)); - async.proc = filter_buffer; + async.proc = filter_buffer_or_fd; async.data = ¶ms; async.out = -1; params.src = src; params.size = len; + params.fd = fd; params.cmd = cmd; params.path = path; @@ -747,6 +754,25 @@ static void convert_attrs(struct conv_attrs *ca, const char *path) } } +int would_convert_to_git_filter_fd(const char *path) +{ + struct conv_attrs ca; + + convert_attrs(&ca, path); + if (!ca.drv) + return 0; + + /* + * Apply a filter to an fd only if the filter is required to succeed. + * We must die if the filter fails, because the original data before + * filtering is not available. + */ + if (!ca.drv->required) + return 0; + + return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean); +} + int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst, enum safe_crlf checksafe) { @@ -761,7 +787,7 @@ int convert_to_git(const char *path, const char *src, size_t len, required = ca.drv->required; } - ret |= apply_filter(path, src, len, dst, filter); + ret |= apply_filter(path, src, len, -1, dst, filter); if (!ret && required) die("%s: clean filter '%s' failed", path, ca.drv->name); @@ -778,6 +804,23 @@ int convert_to_git(const char *path, const char *src, size_t len, return ret | ident_to_git(path, src, len, dst, ca.ident); } +void convert_to_git_filter_fd(const char *path, int fd, struct strbuf *dst, + enum safe_crlf checksafe) +{ + struct conv_attrs ca; + convert_attrs(&ca, path); + + assert(ca.drv); + assert(ca.drv->clean); + + if (!apply_filter(path, NULL, 0, fd, dst, ca.drv->clean)) + die("%s: clean filter '%s' failed", path, ca.drv->name); + + ca.crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr); + crlf_to_git(path, dst->buf, dst->len, dst, ca.crlf_action, checksafe); + ident_to_git(path, dst->buf, dst->len, dst, ca.ident); +} + static int convert_to_working_tree_internal(const char *path, const char *src, size_t len, struct strbuf *dst, int normalizing) @@ -811,7 +854,7 @@ static int convert_to_working_tree_internal(const char *path, const char *src, } } - ret_filter = apply_filter(path, src, len, dst, filter); + ret_filter = apply_filter(path, src, len, -1, dst, filter); if (!ret_filter && required) die("%s: smudge filter %s failed", path, ca.drv->name); diff --git a/convert.h b/convert.h index c638b33632f5fe0398aed7da72fb3b157ff222cd..d9d853cd3d2f6d94965a2c2fc8e587386402a83b 100644 --- a/convert.h +++ b/convert.h @@ -44,6 +44,11 @@ static inline int would_convert_to_git(const char *path) { return convert_to_git(path, NULL, 0, NULL, 0); } +/* Precondition: would_convert_to_git_filter_fd(path) == true */ +extern void convert_to_git_filter_fd(const char *path, int fd, + struct strbuf *dst, + enum safe_crlf checksafe); +extern int would_convert_to_git_filter_fd(const char *path); /***************************************************************** * diff --git a/sha1_file.c b/sha1_file.c index d9b51578ccd3b5e487b3ad31fefbe1f8b3e7e99e..423ec64e8732687afbd5c118885a6864927e2db6 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -3090,6 +3090,29 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size, return ret; } +static int index_stream_convert_blob(unsigned char *sha1, int fd, + const char *path, unsigned flags) +{ + int ret; + const int write_object = flags & HASH_WRITE_OBJECT; + struct strbuf sbuf = STRBUF_INIT; + + assert(path); + assert(would_convert_to_git_filter_fd(path)); + + convert_to_git_filter_fd(path, fd, &sbuf, + write_object ? safe_crlf : SAFE_CRLF_FALSE); + + if (write_object) + ret = write_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB), + sha1); + else + ret = hash_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB), + sha1); + strbuf_release(&sbuf); + return ret; +} + static int index_pipe(unsigned char *sha1, int fd, enum object_type type, const char *path, unsigned flags) { @@ -3157,7 +3180,9 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, int ret; size_t size = xsize_t(st->st_size); - if (!S_ISREG(st->st_mode)) + if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(path)) + ret = index_stream_convert_blob(sha1, fd, path, flags); + else if (!S_ISREG(st->st_mode)) ret = index_pipe(sha1, fd, type, path, flags); else if (size <= big_file_threshold || type != OBJ_BLOB || (path && would_convert_to_git(path))) diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh index f890c54d137aab021e6d6a61c5741af872555c1f..ca7d2a630a8442d8812b444708d6f28f2e45fb32 100755 --- a/t/t0021-conversion.sh +++ b/t/t0021-conversion.sh @@ -153,17 +153,23 @@ test_expect_success 'filter shell-escaped filenames' ' : ' -test_expect_success 'required filter success' ' - git config filter.required.smudge cat && - git config filter.required.clean cat && +test_expect_success 'required filter should filter data' ' + git config filter.required.smudge ./rot13.sh && + git config filter.required.clean ./rot13.sh && git config filter.required.required true && echo "*.r filter=required" >.gitattributes && - echo test >test.r && + cat test.o >test.r && git add test.r && + rm -f test.r && - git checkout -- test.r + git checkout -- test.r && + cmp test.o test.r && + + ./rot13.sh expected && + git cat-file blob :test.r >actual && + cmp expected actual ' test_expect_success 'required filter smudge failure' ' @@ -190,6 +196,14 @@ test_expect_success 'required filter clean failure' ' test_must_fail git add test.fc ' +test_expect_success 'filtering large input to small output should use little memory' ' + git config filter.devnull.clean "cat >/dev/null" && + git config filter.devnull.required true && + for i in $(test_seq 1 30); do printf "%1048576d" 1; done >30MB && + echo "30MB filter=devnull" >.gitattributes && + GIT_MMAP_LIMIT=1m GIT_ALLOC_LIMIT=1m git add 30MB +' + test_expect_success EXPENSIVE 'filter large file' ' git config filter.largefile.smudge cat && git config filter.largefile.clean cat &&