diff --git a/diff.c b/diff.c index 2db0d7c0f5c63c2e764d88cd04b1f5c335df20fe..31720abf8f9cc6ce8e62de9a67afb54d8e20a7a8 100644 --- a/diff.c +++ b/diff.c @@ -605,6 +605,47 @@ enum diff_symbol { #define DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF (1<<16) #define DIFF_SYMBOL_CONTENT_WS_MASK (WSEH_NEW | WSEH_OLD | WSEH_CONTEXT | WS_RULE_MASK) +/* + * This struct is used when we need to buffer the output of the diff output. + * + * NEEDSWORK: Instead of storing a copy of the line, add an offset pointer + * into the pre/post image file. This pointer could be a union with the + * line pointer. By storing an offset into the file instead of the literal line, + * we can decrease the memory footprint for the buffered output. At first we + * may want to only have indirection for the content lines, but we could also + * enhance the state for emitting prefabricated lines, e.g. the similarity + * score line or hunk/file headers would only need to store a number or path + * and then the output can be constructed later on depending on state. + */ +struct emitted_diff_symbol { + const char *line; + int len; + int flags; + enum diff_symbol s; +}; +#define EMITTED_DIFF_SYMBOL_INIT {NULL} + +struct emitted_diff_symbols { + struct emitted_diff_symbol *buf; + int nr, alloc; +}; +#define EMITTED_DIFF_SYMBOLS_INIT {NULL, 0, 0} + +static void append_emitted_diff_symbol(struct diff_options *o, + struct emitted_diff_symbol *e) +{ + struct emitted_diff_symbol *f; + + ALLOC_GROW(o->emitted_symbols->buf, + o->emitted_symbols->nr + 1, + o->emitted_symbols->alloc); + f = &o->emitted_symbols->buf[o->emitted_symbols->nr++]; + + memcpy(f, e, sizeof(struct emitted_diff_symbol)); + f->line = e->line ? xmemdupz(e->line, e->len) : NULL; +} + + static void emit_line_ws_markup(struct diff_options *o, const char *set, const char *reset, const char *line, int len, char sign, @@ -631,12 +672,18 @@ static void emit_line_ws_markup(struct diff_options *o, } } -static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s, - const char *line, int len, unsigned flags) +static void emit_diff_symbol_from_struct(struct diff_options *o, + struct emitted_diff_symbol *eds) { static const char *nneof = " No newline at end of file\n"; const char *context, *reset, *set, *meta, *fraginfo; struct strbuf sb = STRBUF_INIT; + + enum diff_symbol s = eds->s; + const char *line = eds->line; + int len = eds->len; + unsigned flags = eds->flags; + switch (s) { case DIFF_SYMBOL_NO_LF_EOF: context = diff_get_color_opt(o, DIFF_CONTEXT); @@ -778,6 +825,17 @@ static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s, strbuf_release(&sb); } +static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s, + const char *line, int len, unsigned flags) +{ + struct emitted_diff_symbol e = {line, len, flags, s}; + + if (o->emitted_symbols) + append_emitted_diff_symbol(o, &e); + else + emit_diff_symbol_from_struct(o, &e); +} + void diff_emit_submodule_del(struct diff_options *o, const char *line) { emit_diff_symbol(o, DIFF_SYMBOL_SUBMODULE_DEL, line, strlen(line), 0); @@ -1374,9 +1432,29 @@ static void diff_words_show(struct diff_words_data *diff_words) /* In "color-words" mode, show word-diff of words accumulated in the buffer */ static void diff_words_flush(struct emit_callback *ecbdata) { + struct diff_options *wo = ecbdata->diff_words->opt; + if (ecbdata->diff_words->minus.text.size || ecbdata->diff_words->plus.text.size) diff_words_show(ecbdata->diff_words); + + if (wo->emitted_symbols) { + struct diff_options *o = ecbdata->opt; + struct emitted_diff_symbols *wol = wo->emitted_symbols; + int i; + + /* + * NEEDSWORK: + * Instead of appending each, concat all words to a line? + */ + for (i = 0; i < wol->nr; i++) + append_emitted_diff_symbol(o, &wol->buf[i]); + + for (i = 0; i < wol->nr; i++) + free((void *)wol->buf[i].line); + + wol->nr = 0; + } } static void diff_filespec_load_driver(struct diff_filespec *one) @@ -1412,6 +1490,11 @@ static void init_diff_words_data(struct emit_callback *ecbdata, xcalloc(1, sizeof(struct diff_words_data)); ecbdata->diff_words->type = o->word_diff; ecbdata->diff_words->opt = o; + + if (orig_opts->emitted_symbols) + o->emitted_symbols = + xcalloc(1, sizeof(struct emitted_diff_symbols)); + if (!o->word_regex) o->word_regex = userdiff_word_regex(one); if (!o->word_regex) @@ -1446,6 +1529,7 @@ static void free_diff_words_data(struct emit_callback *ecbdata) { if (ecbdata->diff_words) { diff_words_flush(ecbdata); + free (ecbdata->diff_words->opt->emitted_symbols); free (ecbdata->diff_words->opt); free (ecbdata->diff_words->minus.text.ptr); free (ecbdata->diff_words->minus.orig); @@ -4996,16 +5080,37 @@ void diff_warn_rename_limit(const char *varname, int needed, int degraded_cc) static void diff_flush_patch_all_file_pairs(struct diff_options *o) { int i; + static struct emitted_diff_symbols esm = EMITTED_DIFF_SYMBOLS_INIT; struct diff_queue_struct *q = &diff_queued_diff; if (WSEH_NEW & WS_RULE_MASK) die("BUG: WS rules bit mask overlaps with diff symbol flags"); + /* + * For testing purposes we want to make sure the diff machinery + * works completely with the buffer. If there is anything emitted + * outside the emit_string, then the order is screwed + * up and the tests will fail. + * + * TODO (later in this series): + * We'll unset this pointer in a later patch. + */ + o->emitted_symbols = &esm; + for (i = 0; i < q->nr; i++) { struct diff_filepair *p = q->queue[i]; if (check_pair_status(p)) diff_flush_patch(p, o); } + + if (o->emitted_symbols) { + for (i = 0; i < esm.nr; i++) + emit_diff_symbol_from_struct(o, &esm.buf[i]); + + for (i = 0; i < esm.nr; i++) + free((void *)esm.buf[i].line); + } + esm.nr = 0; } void diff_flush(struct diff_options *options) diff --git a/diff.h b/diff.h index d6094a1eedc1542e7cc402b548bc2881aa7eafeb..4a3b9bde4064ba365dcf836092f7aef0e749896c 100644 --- a/diff.h +++ b/diff.h @@ -186,6 +186,8 @@ struct diff_options { void *output_prefix_data; int diff_path_counter; + + struct emitted_diff_symbols *emitted_symbols; }; void diff_emit_submodule_del(struct diff_options *o, const char *line);