processing.py 58.7 KB
Newer Older
1
import json
2
import logging
3 4 5
import math
import os
import sys
6
import hashlib
7 8 9 10 11

import torch
import numpy as np
from PIL import Image, ImageFilter, ImageOps
import random
12 13
import cv2
from skimage import exposure
A
AUTOMATIC 已提交
14
from typing import Any, Dict, List
15

16
import modules.sd_hijack
A
AUTOMATIC 已提交
17
from modules import devices, prompt_parser, masking, sd_samplers, lowvram, generation_parameters_copypaste, extra_networks, sd_vae_approx, scripts, sd_samplers_common, sd_unet
18 19 20
from modules.sd_hijack import model_hijack
from modules.shared import opts, cmd_opts, state
import modules.shared as shared
21
import modules.paths as paths
A
AUTOMATIC 已提交
22
import modules.face_restoration
23
import modules.images as images
A
AUTOMATIC 已提交
24
import modules.styles
25 26
import modules.sd_models as sd_models
import modules.sd_vae as sd_vae
J
Jay Smith 已提交
27 28
from ldm.data.util import AddMiDaS
from ldm.models.diffusion.ddpm import LatentDepth2ImageDiffusion
29

J
Jay Smith 已提交
30
from einops import repeat, rearrange
31
from blendmodes.blend import blendLayers, BlendType
32 33


34 35 36 37 38
# some of those options should not be changed at all because they would break the model, so I removed them from options.
opt_C = 4
opt_f = 8


39
def setup_color_correction(image):
R
Robin Fernandes 已提交
40
    logging.info("Calibrating color correction.")
41 42 43 44
    correction_target = cv2.cvtColor(np.asarray(image.copy()), cv2.COLOR_RGB2LAB)
    return correction_target


45
def apply_color_correction(correction, original_image):
R
Robin Fernandes 已提交
46
    logging.info("Applying color correction.")
47 48
    image = Image.fromarray(cv2.cvtColor(exposure.match_histograms(
        cv2.cvtColor(
49
            np.asarray(original_image),
50 51 52 53 54
            cv2.COLOR_RGB2LAB
        ),
        correction,
        channel_axis=2
    ), cv2.COLOR_LAB2RGB).astype("uint8"))
55

56
    image = blendLayers(image, original_image, BlendType.LUMINOSITY)
57

58 59
    return image

A
AUTOMATIC 已提交
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76

def apply_overlay(image, paste_loc, index, overlays):
    if overlays is None or index >= len(overlays):
        return image

    overlay = overlays[index]

    if paste_loc is not None:
        x, y, w, h = paste_loc
        base_image = Image.new('RGBA', (overlay.width, overlay.height))
        image = images.resize_image(1, image, w, h)
        base_image.paste(image, (x, y))
        image = base_image

    image = image.convert('RGBA')
    image.alpha_composite(overlay)
    image = image.convert('RGB')
77 78

    return image
79

F
frostydad 已提交
80

81
def txt2img_image_conditioning(sd_model, x, width, height):
82 83 84 85 86 87 88 89 90 91 92
    if sd_model.model.conditioning_key in {'hybrid', 'concat'}: # Inpainting models

        # The "masked-image" in this case will just be all zeros since the entire image is masked.
        image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device)
        image_conditioning = sd_model.get_first_stage_encoding(sd_model.encode_first_stage(image_conditioning))

        # Add the fake full 1s mask to the first dimension.
        image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
        image_conditioning = image_conditioning.to(x.dtype)

        return image_conditioning
93

94
    elif sd_model.model.conditioning_key == "crossattn-adm": # UnCLIP models
95

96
        return x.new_zeros(x.shape[0], 2*sd_model.noise_augmentor.time_embed.dim, dtype=x.dtype, device=x.device)
97

98 99 100 101 102
    else:
        # Dummy zero conditioning if we're not using inpainting or unclip models.
        # Still takes up a bit of memory, but no encoder call.
        # Pretty sure we can just make this a 1x1 image since its not going to be used besides its batch size.
        return x.new_zeros(x.shape[0], 5, 1, 1, dtype=x.dtype, device=x.device)
103 104


105
class StableDiffusionProcessing:
A
arcticfaded 已提交
106 107 108
    """
    The first set of paramaters: sd_models -> do_not_reload_embeddings represent the minimum required to create a StableDiffusionProcessing
    """
D
devdn 已提交
109
    def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str = "", styles: List[str] = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, seed_enable_extras: bool = True, sampler_name: str = None, batch_size: int = 1, n_iter: int = 1, steps: int = 50, cfg_scale: float = 7.0, width: int = 512, height: int = 512, restore_faces: bool = False, tiling: bool = False, do_not_save_samples: bool = False, do_not_save_grid: bool = False, extra_generation_params: Dict[Any, Any] = None, overlay_images: Any = None, negative_prompt: str = None, eta: float = None, do_not_reload_embeddings: bool = False, denoising_strength: float = 0, ddim_discretize: str = None, s_min_uncond: float = 0.0, s_churn: float = 0.0, s_tmax: float = None, s_tmin: float = 0.0, s_noise: float = 1.0, override_settings: Dict[str, Any] = None, override_settings_restore_afterwards: bool = True, sampler_index: int = None, script_args: list = None):
110
        if sampler_index is not None:
111
            print("sampler_index argument for StableDiffusionProcessing does not do anything; use sampler_name", file=sys.stderr)
112

113 114 115
        self.outpath_samples: str = outpath_samples
        self.outpath_grids: str = outpath_grids
        self.prompt: str = prompt
A
AUTOMATIC 已提交
116
        self.prompt_for_display: str = None
117
        self.negative_prompt: str = (negative_prompt or "")
118
        self.styles: list = styles or []
119
        self.seed: int = seed
120 121 122 123
        self.subseed: int = subseed
        self.subseed_strength: float = subseed_strength
        self.seed_resize_from_h: int = seed_resize_from_h
        self.seed_resize_from_w: int = seed_resize_from_w
124
        self.sampler_name: str = sampler_name
125 126 127 128 129 130
        self.batch_size: int = batch_size
        self.n_iter: int = n_iter
        self.steps: int = steps
        self.cfg_scale: float = cfg_scale
        self.width: int = width
        self.height: int = height
A
AUTOMATIC 已提交
131
        self.restore_faces: bool = restore_faces
132
        self.tiling: bool = tiling
133 134
        self.do_not_save_samples: bool = do_not_save_samples
        self.do_not_save_grid: bool = do_not_save_grid
A
AUTOMATIC 已提交
135
        self.extra_generation_params: dict = extra_generation_params or {}
136
        self.overlay_images = overlay_images
137
        self.eta = eta
138
        self.do_not_reload_embeddings = do_not_reload_embeddings
139
        self.paste_to = None
140
        self.color_corrections = None
141
        self.denoising_strength: float = denoising_strength
142
        self.sampler_noise_scheduler_override = None
143
        self.ddim_discretize = ddim_discretize or opts.ddim_discretize
D
devdn 已提交
144
        self.s_min_uncond = s_min_uncond or opts.s_min_uncond
A
arcticfaded 已提交
145 146 147 148
        self.s_churn = s_churn or opts.s_churn
        self.s_tmin = s_tmin or opts.s_tmin
        self.s_tmax = s_tmax or float('inf')  # not representable as a standard ui option
        self.s_noise = s_noise or opts.s_noise
149
        self.override_settings = {k: v for k, v in (override_settings or {}).items() if k not in shared.restricted_opts}
150
        self.override_settings_restore_afterwards = override_settings_restore_afterwards
151
        self.is_using_inpainting_conditioning = False
A
AUTOMATIC 已提交
152
        self.disable_extra_networks = False
153 154
        self.token_merging_ratio = 0
        self.token_merging_ratio_hr = 0
155

156 157 158 159 160 161
        if not seed_enable_extras:
            self.subseed = -1
            self.subseed_strength = 0
            self.seed_resize_from_h = 0
            self.seed_resize_from_w = 0

162
        self.scripts = None
N
noodleanon 已提交
163
        self.script_args = script_args
164
        self.all_prompts = None
165
        self.all_negative_prompts = None
166 167
        self.all_seeds = None
        self.all_subseeds = None
168
        self.iteration = 0
A
AUTOMATIC 已提交
169
        self.is_hr_pass = False
170
        self.sampler = None
171

172 173
        self.prompts = None
        self.negative_prompts = None
W
w-e-w 已提交
174
        self.extra_network_data = None
175 176 177 178 179 180 181 182
        self.seeds = None
        self.subseeds = None

        self.step_multiplier = 1
        self.cached_uc = [None, None]
        self.cached_c = [None, None]
        self.uc = None
        self.c = None
183

184 185 186 187
    @property
    def sd_model(self):
        return shared.sd_model

188
    def txt2img_image_conditioning(self, x, width=None, height=None):
189
        self.is_using_inpainting_conditioning = self.sd_model.model.conditioning_key in {'hybrid', 'concat'}
190

191
        return txt2img_image_conditioning(self.sd_model, x, width or self.width, height or self.height)
192

J
Jay Smith 已提交
193 194 195 196 197 198 199
    def depth2img_image_conditioning(self, source_image):
        # Use the AddMiDaS helper to Format our source image to suit the MiDaS model
        transformer = AddMiDaS(model_type="dpt_hybrid")
        transformed = transformer({"jpg": rearrange(source_image[0], "c h w -> h w c")})
        midas_in = torch.from_numpy(transformed["midas_in"][None, ...]).to(device=shared.device)
        midas_in = repeat(midas_in, "1 ... -> n ...", n=self.batch_size)

200
        conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(source_image))
J
Jay Smith 已提交
201 202 203 204 205 206 207 208 209 210
        conditioning = torch.nn.functional.interpolate(
            self.sd_model.depth_model(midas_in),
            size=conditioning_image.shape[2:],
            mode="bicubic",
            align_corners=False,
        )

        (depth_min, depth_max) = torch.aminmax(conditioning)
        conditioning = 2. * (conditioning - depth_min) / (depth_max - depth_min) - 1.
        return conditioning
211

212
    def edit_image_conditioning(self, source_image):
K
Kyle 已提交
213
        conditioning_image = self.sd_model.encode_first_stage(source_image).mode()
214 215 216

        return conditioning_image

217 218 219 220 221 222 223 224
    def unclip_image_conditioning(self, source_image):
        c_adm = self.sd_model.embedder(source_image)
        if self.sd_model.noise_augmentor is not None:
            noise_level = 0 # TODO: Allow other noise levels?
            c_adm, noise_level_emb = self.sd_model.noise_augmentor(c_adm, noise_level=repeat(torch.tensor([noise_level]).to(c_adm.device), '1 -> b', b=c_adm.shape[0]))
            c_adm = torch.cat((c_adm, noise_level_emb), 1)
        return c_adm

225
    def inpainting_image_conditioning(self, source_image, latent_image, image_mask=None):
226 227
        self.is_using_inpainting_conditioning = True

228 229 230 231 232 233 234 235 236 237 238 239
        # Handle the different mask inputs
        if image_mask is not None:
            if torch.is_tensor(image_mask):
                conditioning_mask = image_mask
            else:
                conditioning_mask = np.array(image_mask.convert("L"))
                conditioning_mask = conditioning_mask.astype(np.float32) / 255.0
                conditioning_mask = torch.from_numpy(conditioning_mask[None, None])

                # Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0
                conditioning_mask = torch.round(conditioning_mask)
        else:
240
            conditioning_mask = source_image.new_ones(1, 1, *source_image.shape[-2:])
241 242 243

        # Create another latent image, this time with a masked version of the original input.
        # Smoothly interpolate between the masked and unmasked latent conditioning image using a parameter.
244
        conditioning_mask = conditioning_mask.to(device=source_image.device, dtype=source_image.dtype)
245 246 247 248 249
        conditioning_image = torch.lerp(
            source_image,
            source_image * (1.0 - conditioning_mask),
            getattr(self, "inpainting_mask_weight", shared.opts.inpainting_mask_weight)
        )
J
Jim Hays 已提交
250

251
        # Encode the new masked image using first stage of network.
252
        conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image))
253 254 255 256 257 258 259 260 261

        # Create the concatenated conditioning tensor to be fed to `c_concat`
        conditioning_mask = torch.nn.functional.interpolate(conditioning_mask, size=latent_image.shape[-2:])
        conditioning_mask = conditioning_mask.expand(conditioning_image.shape[0], -1, -1, -1)
        image_conditioning = torch.cat([conditioning_mask, conditioning_image], dim=1)
        image_conditioning = image_conditioning.to(shared.device).type(self.sd_model.dtype)

        return image_conditioning

J
Jay Smith 已提交
262
    def img2img_image_conditioning(self, source_image, latent_image, image_mask=None):
263 264
        source_image = devices.cond_cast_float(source_image)

J
Jay Smith 已提交
265 266 267
        # HACK: Using introspection as the Depth2Image model doesn't appear to uniquely
        # identify itself with a field common to all models. The conditioning_key is also hybrid.
        if isinstance(self.sd_model, LatentDepth2ImageDiffusion):
268
            return self.depth2img_image_conditioning(source_image)
J
Jay Smith 已提交
269

270 271 272
        if self.sd_model.cond_stage_key == "edit":
            return self.edit_image_conditioning(source_image)

J
Jay Smith 已提交
273
        if self.sampler.conditioning_key in {'hybrid', 'concat'}:
274
            return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask)
J
Jay Smith 已提交
275

276 277 278
        if self.sampler.conditioning_key == "crossattn-adm":
            return self.unclip_image_conditioning(source_image)

J
Jay Smith 已提交
279 280 281
        # Dummy zero conditioning if we're not using inpainting or depth model.
        return latent_image.new_zeros(latent_image.shape[0], 5, 1, 1)

A
AUTOMATIC 已提交
282
    def init(self, all_prompts, all_seeds, all_subseeds):
283 284
        pass

285
    def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
286 287
        raise NotImplementedError()

288 289
    def close(self):
        self.sampler = None
290 291 292 293
        self.c = None
        self.uc = None
        self.cached_c = [None, None]
        self.cached_uc = [None, None]
294

295 296 297 298 299 300
    def get_token_merging_ratio(self, for_hr=False):
        if for_hr:
            return self.token_merging_ratio_hr or opts.token_merging_ratio_hr or self.token_merging_ratio or opts.token_merging_ratio

        return self.token_merging_ratio or opts.token_merging_ratio

301 302 303 304 305 306 307 308 309 310 311 312 313 314
    def setup_prompts(self):
        if type(self.prompt) == list:
            self.all_prompts = self.prompt
        else:
            self.all_prompts = self.batch_size * self.n_iter * [self.prompt]

        if type(self.negative_prompt) == list:
            self.all_negative_prompts = self.negative_prompt
        else:
            self.all_negative_prompts = self.batch_size * self.n_iter * [self.negative_prompt]

        self.all_prompts = [shared.prompt_styles.apply_styles_to_prompt(x, self.styles) for x in self.all_prompts]
        self.all_negative_prompts = [shared.prompt_styles.apply_negative_styles_to_prompt(x, self.styles) for x in self.all_negative_prompts]

W
w-e-w 已提交
315
    def get_conds_with_caching(self, function, required_prompts, steps, cache, extra_network_data):
316 317 318 319 320 321 322 323 324
        """
        Returns the result of calling function(shared.sd_model, required_prompts, steps)
        using a cache to store the result if the same arguments have been used before.

        cache is an array containing two elements. The first element is a tuple
        representing the previously used arguments, or None if no arguments
        have been used before. The second element is where the previously
        computed result is stored.
        """
W
w-e-w 已提交
325
        if cache[0] is not None and (required_prompts, steps, opts.CLIP_stop_at_last_layers, shared.sd_model.sd_checkpoint_info, extra_network_data) == cache[0]:
326 327 328 329 330
            return cache[1]

        with devices.autocast():
            cache[1] = function(shared.sd_model, required_prompts, steps)

W
w-e-w 已提交
331
        cache[0] = (required_prompts, steps, opts.CLIP_stop_at_last_layers, shared.sd_model.sd_checkpoint_info, extra_network_data)
332 333 334 335 336 337
        return cache[1]

    def setup_conds(self):
        sampler_config = sd_samplers.find_sampler_config(self.sampler_name)
        self.step_multiplier = 2 if sampler_config and sampler_config.options.get("second_order", False) else 1

W
w-e-w 已提交
338 339
        self.uc = self.get_conds_with_caching(prompt_parser.get_learned_conditioning, self.negative_prompts, self.steps * self.step_multiplier, self.cached_uc, self.extra_network_data)
        self.c = self.get_conds_with_caching(prompt_parser.get_multicond_learned_conditioning, self.prompts, self.steps * self.step_multiplier, self.cached_c, self.extra_network_data)
340 341 342 343 344 345

    def parse_extra_network_prompts(self):
        self.prompts, extra_network_data = extra_networks.parse_prompts(self.prompts)

        return extra_network_data

346 347

class Processed:
348
    def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info="", subseed=None, all_prompts=None, all_negative_prompts=None, all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None, comments=""):
349 350
        self.images = images_list
        self.prompt = p.prompt
351
        self.negative_prompt = p.negative_prompt
352
        self.seed = seed
353 354
        self.subseed = subseed
        self.subseed_strength = p.subseed_strength
355
        self.info = info
356
        self.comments = comments
357 358
        self.width = p.width
        self.height = p.height
359
        self.sampler_name = p.sampler_name
360
        self.cfg_scale = p.cfg_scale
K
Kyle 已提交
361
        self.image_cfg_scale = getattr(p, 'image_cfg_scale', None)
362
        self.steps = p.steps
363 364 365 366 367 368 369 370 371
        self.batch_size = p.batch_size
        self.restore_faces = p.restore_faces
        self.face_restoration_model = opts.face_restoration_model if p.restore_faces else None
        self.sd_model_hash = shared.sd_model.sd_model_hash
        self.seed_resize_from_w = p.seed_resize_from_w
        self.seed_resize_from_h = p.seed_resize_from_h
        self.denoising_strength = getattr(p, 'denoising_strength', None)
        self.extra_generation_params = p.extra_generation_params
        self.index_of_first_image = index_of_first_image
M
Milly 已提交
372
        self.styles = p.styles
M
Milly 已提交
373
        self.job_timestamp = state.job_timestamp
374
        self.clip_skip = opts.CLIP_stop_at_last_layers
375 376
        self.token_merging_ratio = p.token_merging_ratio
        self.token_merging_ratio_hr = p.token_merging_ratio_hr
377

C
C43H66N12O12S2 已提交
378
        self.eta = p.eta
379 380 381 382 383
        self.ddim_discretize = p.ddim_discretize
        self.s_churn = p.s_churn
        self.s_tmin = p.s_tmin
        self.s_tmax = p.s_tmax
        self.s_noise = p.s_noise
A
Aarni Koskela 已提交
384
        self.s_min_uncond = p.s_min_uncond
385
        self.sampler_noise_scheduler_override = p.sampler_noise_scheduler_override
386 387
        self.prompt = self.prompt if type(self.prompt) != list else self.prompt[0]
        self.negative_prompt = self.negative_prompt if type(self.negative_prompt) != list else self.negative_prompt[0]
G
githublsx 已提交
388
        self.seed = int(self.seed if type(self.seed) != list else self.seed[0]) if self.seed is not None else -1
389
        self.subseed = int(self.subseed if type(self.subseed) != list else self.subseed[0]) if self.subseed is not None else -1
390
        self.is_using_inpainting_conditioning = p.is_using_inpainting_conditioning
391

392 393 394 395
        self.all_prompts = all_prompts or p.all_prompts or [self.prompt]
        self.all_negative_prompts = all_negative_prompts or p.all_negative_prompts or [self.negative_prompt]
        self.all_seeds = all_seeds or p.all_seeds or [self.seed]
        self.all_subseeds = all_subseeds or p.all_subseeds or [self.subseed]
396
        self.infotexts = infotexts or [info]
397 398 399

    def js(self):
        obj = {
400
            "prompt": self.all_prompts[0],
401
            "all_prompts": self.all_prompts,
402 403
            "negative_prompt": self.all_negative_prompts[0],
            "all_negative_prompts": self.all_negative_prompts,
404 405 406 407
            "seed": self.seed,
            "all_seeds": self.all_seeds,
            "subseed": self.subseed,
            "all_subseeds": self.all_subseeds,
408
            "subseed_strength": self.subseed_strength,
409 410
            "width": self.width,
            "height": self.height,
411
            "sampler_name": self.sampler_name,
412 413
            "cfg_scale": self.cfg_scale,
            "steps": self.steps,
414 415 416 417 418 419 420 421 422
            "batch_size": self.batch_size,
            "restore_faces": self.restore_faces,
            "face_restoration_model": self.face_restoration_model,
            "sd_model_hash": self.sd_model_hash,
            "seed_resize_from_w": self.seed_resize_from_w,
            "seed_resize_from_h": self.seed_resize_from_h,
            "denoising_strength": self.denoising_strength,
            "extra_generation_params": self.extra_generation_params,
            "index_of_first_image": self.index_of_first_image,
423
            "infotexts": self.infotexts,
M
Milly 已提交
424
            "styles": self.styles,
M
Milly 已提交
425
            "job_timestamp": self.job_timestamp,
426
            "clip_skip": self.clip_skip,
427
            "is_using_inpainting_conditioning": self.is_using_inpainting_conditioning,
428 429 430 431
        }

        return json.dumps(obj)

S
space-nuko 已提交
432
    def infotext(self, p: StableDiffusionProcessing, index):
433 434
        return create_infotext(p, self.all_prompts, self.all_seeds, self.all_subseeds, comments=[], position_in_batch=index % self.batch_size, iteration=index // self.batch_size)

435 436 437
    def get_token_merging_ratio(self, for_hr=False):
        return self.token_merging_ratio_hr if for_hr else self.token_merging_ratio

438

439 440 441 442
# from https://discuss.pytorch.org/t/help-regarding-slerp-function-for-generative-model-sampling/32475/3
def slerp(val, low, high):
    low_norm = low/torch.norm(low, dim=1, keepdim=True)
    high_norm = high/torch.norm(high, dim=1, keepdim=True)
443 444 445 446 447 448
    dot = (low_norm*high_norm).sum(1)

    if dot.mean() > 0.9995:
        return low * val + high * (1 - val)

    omega = torch.acos(dot)
449 450 451
    so = torch.sin(omega)
    res = (torch.sin((1.0-val)*omega)/so).unsqueeze(1)*low + (torch.sin(val*omega)/so).unsqueeze(1) * high
    return res
452

453

454
def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, seed_resize_from_h=0, seed_resize_from_w=0, p=None):
455
    eta_noise_seed_delta = opts.eta_noise_seed_delta or 0
456
    xs = []
457

458 459
    # if we have multiple seeds, this means we are working with batch size>1; this then
    # enables the generation of additional tensors with noise that the sampler will use during its processing.
S
Steve Eberhardt 已提交
460
    # Using those pre-generated tensors instead of simple torch.randn allows a batch with seeds [100, 101] to
461
    # produce the same images as with two batches [100], [101].
462
    if p is not None and p.sampler is not None and (len(seeds) > 1 and opts.enable_batch_seeds or eta_noise_seed_delta > 0):
463 464 465 466
        sampler_noises = [[] for _ in range(p.sampler.number_of_needed_noises(p))]
    else:
        sampler_noises = None

467 468 469 470 471 472
    for i, seed in enumerate(seeds):
        noise_shape = shape if seed_resize_from_h <= 0 or seed_resize_from_w <= 0 else (shape[0], seed_resize_from_h//8, seed_resize_from_w//8)

        subnoise = None
        if subseeds is not None:
            subseed = 0 if i >= len(subseeds) else subseeds[i]
E
Elias Oenal 已提交
473

A
AUTOMATIC 已提交
474
            subnoise = devices.randn(subseed, noise_shape)
475 476 477

        # randn results depend on device; gpu and cpu get different results for same seed;
        # the way I see it, it's better to do this on CPU, so that everyone gets same result;
478
        # but the original script had it like this, so I do not dare change it for now because
479
        # it will break everyone's seeds.
A
AUTOMATIC 已提交
480
        noise = devices.randn(seed, noise_shape)
481 482 483 484 485

        if subnoise is not None:
            noise = slerp(subseed_strength, noise, subnoise)

        if noise_shape != shape:
A
AUTOMATIC 已提交
486 487
            x = devices.randn(seed, shape)
            dx = (shape[2] - noise_shape[2]) // 2
488 489 490 491 492 493 494 495 496 497 498
            dy = (shape[1] - noise_shape[1]) // 2
            w = noise_shape[2] if dx >= 0 else noise_shape[2] + 2 * dx
            h = noise_shape[1] if dy >= 0 else noise_shape[1] + 2 * dy
            tx = 0 if dx < 0 else dx
            ty = 0 if dy < 0 else dy
            dx = max(-dx, 0)
            dy = max(-dy, 0)

            x[:, ty:ty+h, tx:tx+w] = noise[:, dy:dy+h, dx:dx+w]
            noise = x

499 500
        if sampler_noises is not None:
            cnt = p.sampler.number_of_needed_noises(p)
501

502 503
            if eta_noise_seed_delta > 0:
                torch.manual_seed(seed + eta_noise_seed_delta)
A
AUTOMATIC 已提交
504

505 506
            for j in range(cnt):
                sampler_noises[j].append(devices.randn_without_seed(tuple(noise_shape)))
507 508

        xs.append(noise)
509 510 511 512

    if sampler_noises is not None:
        p.sampler.sampler_noises = [torch.stack(n).to(shared.device) for n in sampler_noises]

513
    x = torch.stack(xs).to(shared.device)
514 515 516
    return x


A
AUTOMATIC 已提交
517 518
def decode_first_stage(model, x):
    with devices.autocast(disable=x.dtype == devices.dtype_vae):
519
        x = model.decode_first_stage(x)
A
AUTOMATIC 已提交
520 521 522 523

    return x


524 525 526 527 528 529 530
def get_fixed_seed(seed):
    if seed is None or seed == '' or seed == -1:
        return int(random.randrange(4294967294))

    return seed


531
def fix_seed(p):
532 533
    p.seed = get_fixed_seed(p.seed)
    p.subseed = get_fixed_seed(p.subseed)
A
AUTOMATIC 已提交
534 535


536 537 538 539 540 541 542 543 544 545
def program_version():
    import launch

    res = launch.git_tag()
    if res == "<none>":
        res = None

    return res


546
def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iteration=0, position_in_batch=0):
547 548
    index = position_in_batch + iteration * p.batch_size

549
    clip_skip = getattr(p, 'clip_skip', opts.CLIP_stop_at_last_layers)
P
papuSpartan 已提交
550
    enable_hr = getattr(p, 'enable_hr', False)
551 552
    token_merging_ratio = p.get_token_merging_ratio()
    token_merging_ratio_hr = p.get_token_merging_ratio(for_hr=True)
553

554 555 556 557
    uses_ensd = opts.eta_noise_seed_delta != 0
    if uses_ensd:
        uses_ensd = sd_samplers_common.is_sampler_using_eta_noise_seed_delta(p)

558 559
    generation_params = {
        "Steps": p.steps,
560
        "Sampler": p.sampler_name,
561
        "CFG scale": p.cfg_scale,
K
Kyle 已提交
562
        "Image CFG scale": getattr(p, 'image_cfg_scale', None),
563 564 565 566
        "Seed": all_seeds[index],
        "Face restoration": (opts.face_restoration_model if p.restore_faces else None),
        "Size": f"{p.width}x{p.height}",
        "Model hash": getattr(p, 'sd_model_hash', None if not opts.add_model_hash_to_info or not shared.sd_model.sd_model_hash else shared.sd_model.sd_model_hash),
567
        "Model": (None if not opts.add_model_name_to_info or not shared.sd_model.sd_checkpoint_info.model_name else shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')),
568 569 570 571
        "Variation seed": (None if p.subseed_strength == 0 else all_subseeds[index]),
        "Variation seed strength": (None if p.subseed_strength == 0 else p.subseed_strength),
        "Seed resize from": (None if p.seed_resize_from_w == 0 or p.seed_resize_from_h == 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}"),
        "Denoising strength": getattr(p, 'denoising_strength', None),
572
        "Conditional mask weight": getattr(p, "inpainting_mask_weight", shared.opts.inpainting_mask_weight) if p.is_using_inpainting_conditioning else None,
573
        "Clip skip": None if clip_skip <= 1 else clip_skip,
574
        "ENSD": opts.eta_noise_seed_delta if uses_ensd else None,
575 576
        "Token merging ratio": None if token_merging_ratio == 0 else token_merging_ratio,
        "Token merging ratio hr": None if not enable_hr or token_merging_ratio_hr == 0 else token_merging_ratio_hr,
577
        "Init image hash": getattr(p, 'init_img_hash', None),
578 579
        "RNG": opts.randn_source if opts.randn_source != "GPU" else None,
        "NGMS": None if p.s_min_uncond == 0 else p.s_min_uncond,
580
        **p.extra_generation_params,
581
        "Version": program_version() if opts.add_version_to_infotext else None,
582 583
    }

584
    generation_params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in generation_params.items() if v is not None])
585

586
    negative_prompt_text = f"\nNegative prompt: {p.all_negative_prompts[index]}" if p.all_negative_prompts[index] else ""
587

588
    return f"{all_prompts[index]}{negative_prompt_text}\n{generation_params_text}".strip()
589 590


591
def process_images(p: StableDiffusionProcessing) -> Processed:
592 593 594
    if p.scripts is not None:
        p.scripts.before_process(p)

595 596 597
    stored_opts = {k: opts.data[k] for k in p.override_settings.keys()}

    try:
W
w-e-w 已提交
598
        # if no checkpoint override or the override checkpoint can't be found, remove override entry and load opts checkpoint
599
        if sd_models.checkpoint_alisases.get(p.override_settings.get('sd_model_checkpoint')) is None:
W
w-e-w 已提交
600 601 602
            p.override_settings.pop('sd_model_checkpoint', None)
            sd_models.reload_model_weights()

603
        for k, v in p.override_settings.items():
604
            setattr(opts, k, v)
605 606

            if k == 'sd_model_checkpoint':
A
AUTOMATIC 已提交
607
                sd_models.reload_model_weights()
608 609

            if k == 'sd_vae':
A
AUTOMATIC 已提交
610
                sd_vae.reload_vae_weights()
611

612
        sd_models.apply_token_merging(p.sd_model, p.get_token_merging_ratio())
613

614 615
        res = process_images_inner(p)

616
    finally:
617
        sd_models.apply_token_merging(p.sd_model, 0)
618

619 620 621 622
        # restore opts to original state
        if p.override_settings_restore_afterwards:
            for k, v in stored_opts.items():
                setattr(opts, k, v)
A
AUTOMATIC 已提交
623 624 625

                if k == 'sd_vae':
                    sd_vae.reload_vae_weights()
626 627 628 629 630

    return res


def process_images_inner(p: StableDiffusionProcessing) -> Processed:
631 632
    """this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch"""

633 634 635 636
    if type(p.prompt) == list:
        assert(len(p.prompt) > 0)
    else:
        assert p.prompt is not None
637

638
    devices.torch_gc()
639

640 641
    seed = get_fixed_seed(p.seed)
    subseed = get_fixed_seed(p.subseed)
642

643
    modules.sd_hijack.model_hijack.apply_circular(p.tiling)
644
    modules.sd_hijack.model_hijack.clear_comments()
645

646
    comments = {}
A
AUTOMATIC 已提交
647

648
    p.setup_prompts()
I
invincibledude 已提交
649

650
    if type(seed) == list:
651
        p.all_seeds = seed
A
AUTOMATIC 已提交
652
    else:
653
        p.all_seeds = [int(seed) + (x if p.subseed_strength == 0 else 0) for x in range(len(p.all_prompts))]
654

655
    if type(subseed) == list:
656
        p.all_subseeds = subseed
657
    else:
658
        p.all_subseeds = [int(subseed) + x for x in range(len(p.all_prompts))]
659 660

    def infotext(iteration=0, position_in_batch=0):
661
        return create_infotext(p, p.all_prompts, p.all_seeds, p.all_subseeds, comments, iteration, position_in_batch)
662

663
    if os.path.exists(cmd_opts.embeddings_dir) and not p.do_not_reload_embeddings:
664
        model_hijack.embedding_db.load_textual_inversion_embeddings()
665

666
    if p.scripts is not None:
A
AUTOMATIC 已提交
667
        p.scripts.process(p)
668

669
    infotexts = []
670
    output_images = []
671

672
    with torch.no_grad(), p.sd_model.ema_scope():
A
AUTOMATIC 已提交
673
        with devices.autocast():
674
            p.init(p.all_prompts, p.all_seeds, p.all_subseeds)
675

676 677
            # for OSX, loading the model during sampling changes the generated picture, so it is loaded here
            if shared.opts.live_previews_enable and opts.show_progress_type == "Approx NN":
678 679
                sd_vae_approx.model()

A
AUTOMATIC 已提交
680 681
            sd_unet.apply_unet()

A
AUTOMATIC 已提交
682 683
        if state.job_count == -1:
            state.job_count = p.n_iter
A
AUTOMATIC 已提交
684

685
        for n in range(p.n_iter):
686 687
            p.iteration = n

688 689
            if state.skipped:
                state.skipped = False
J
Jim Hays 已提交
690

691 692 693
            if state.interrupted:
                break

694 695 696 697
            p.prompts = p.all_prompts[n * p.batch_size:(n + 1) * p.batch_size]
            p.negative_prompts = p.all_negative_prompts[n * p.batch_size:(n + 1) * p.batch_size]
            p.seeds = p.all_seeds[n * p.batch_size:(n + 1) * p.batch_size]
            p.subseeds = p.all_subseeds[n * p.batch_size:(n + 1) * p.batch_size]
698

699
            if p.scripts is not None:
700
                p.scripts.before_process_batch(p, batch_number=n, prompts=p.prompts, seeds=p.seeds, subseeds=p.subseeds)
701

702
            if len(p.prompts) == 0:
703 704
                break

W
w-e-w 已提交
705
            p.extra_network_data = p.parse_extra_network_prompts()
I
InvincibleDude 已提交
706

707 708
            if not p.disable_extra_networks:
                with devices.autocast():
W
w-e-w 已提交
709
                    extra_networks.activate(p, p.extra_network_data)
710

A
Artem Zagidulin 已提交
711
            if p.scripts is not None:
712
                p.scripts.process_batch(p, batch_number=n, prompts=p.prompts, seeds=p.seeds, subseeds=p.subseeds)
A
Artem Zagidulin 已提交
713

714 715 716 717 718 719 720 721 722
            # params.txt should be saved after scripts.process_batch, since the
            # infotext could be modified by that callback
            # Example: a wildcard processed by process_batch sets an extra model
            # strength, which is saved as "Model Strength: 1.0" in the infotext
            if n == 0:
                with open(os.path.join(paths.data_path, "params.txt"), "w", encoding="utf8") as file:
                    processed = Processed(p, [], p.seed, "")
                    file.write(processed.infotext(p, 0))

723
            p.setup_conds()
724 725

            if len(model_hijack.comments) > 0:
726 727
                for comment in model_hijack.comments:
                    comments[comment] = 1
728 729

            if p.n_iter > 1:
730
                shared.state.job = f"Batch {n+1} out of {p.n_iter}"
731

732
            with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
733
                samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
734

735
            x_samples_ddim = [decode_first_stage(p.sd_model, samples_ddim[i:i+1].to(dtype=devices.dtype_vae))[0].cpu() for i in range(samples_ddim.size(0))]
736 737 738
            for x in x_samples_ddim:
                devices.test_for_nans(x, "vae")

739
            x_samples_ddim = torch.stack(x_samples_ddim).float()
740 741
            x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)

742 743 744 745 746 747 748
            del samples_ddim

            if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
                lowvram.send_everything_to_cpu()

            devices.torch_gc()

749 750
            if p.scripts is not None:
                p.scripts.postprocess_batch(p, x_samples_ddim, batch_number=n)
G
GRMrGecko 已提交
751

752
            for i, x_sample in enumerate(x_samples_ddim):
753 754
                p.batch_index = i

755 756 757
                x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2)
                x_sample = x_sample.astype(np.uint8)

758
                if p.restore_faces:
759
                    if opts.save and not p.do_not_save_samples and opts.save_images_before_face_restoration:
760
                        images.save_image(Image.fromarray(x_sample), p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(n, i), p=p, suffix="-before-face-restoration")
761

762
                    devices.torch_gc()
763

764 765
                    x_sample = modules.face_restoration.restore_faces(x_sample)
                    devices.torch_gc()
766

767
                image = Image.fromarray(x_sample)
V
Vladimir Repin 已提交
768

769 770 771 772 773
                if p.scripts is not None:
                    pp = scripts.PostprocessImageArgs(image)
                    p.scripts.postprocess_image(p, pp)
                    image = pp.image

774
                if p.color_corrections is not None and i < len(p.color_corrections):
775
                    if opts.save and not p.do_not_save_samples and opts.save_images_before_color_correction:
A
AUTOMATIC 已提交
776
                        image_without_cc = apply_overlay(image, p.paste_to, i, p.overlay_images)
777
                        images.save_image(image_without_cc, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(n, i), p=p, suffix="-before-color-correction")
778
                    image = apply_color_correction(p.color_corrections[i], image)
779

A
AUTOMATIC 已提交
780
                image = apply_overlay(image, p.paste_to, i, p.overlay_images)
781 782

                if opts.samples_save and not p.do_not_save_samples:
783
                    images.save_image(image, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(n, i), p=p)
784

785 786
                text = infotext(n, i)
                infotexts.append(text)
787 788
                if opts.enable_pnginfo:
                    image.info["parameters"] = text
789 790
                output_images.append(image)

791
                if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]):
792
                    image_mask = p.mask_for_overlay.convert('RGB')
793
                    image_mask_composite = Image.composite(image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(2, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA')
794 795

                    if opts.save_mask:
796
                        images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(n, i), p=p, suffix="-mask")
797 798

                    if opts.save_mask_composite:
799
                        images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(n, i), p=p, suffix="-mask-composite")
800 801 802

                    if opts.return_mask:
                        output_images.append(image_mask)
803

804 805 806
                    if opts.return_mask_composite:
                        output_images.append(image_mask_composite)

J
Jim Hays 已提交
807
            del x_samples_ddim
A
AUTOMATIC 已提交
808

809
            devices.torch_gc()
810

811
            state.nextjob()
812

813 814
        p.color_corrections = None

815
        index_of_first_image = 0
816
        unwanted_grid_because_of_img_count = len(output_images) < 2 and opts.grid_only_if_multiple
817
        if (opts.return_grid or opts.grid_save) and not p.do_not_save_grid and not unwanted_grid_because_of_img_count:
A
AUTOMATIC 已提交
818
            grid = images.image_grid(output_images, p.batch_size)
819

820
            if opts.return_grid:
821 822
                text = infotext()
                infotexts.insert(0, text)
823 824
                if opts.enable_pnginfo:
                    grid.info["parameters"] = text
825
                output_images.insert(0, grid)
826
                index_of_first_image = 1
827 828

            if opts.grid_save:
829
                images.save_image(grid, p.outpath_grids, "grid", p.all_seeds[0], p.all_prompts[0], opts.grid_format, info=infotext(), short_filename=not opts.grid_extended_filename, p=p, grid=True)
830

W
w-e-w 已提交
831 832
    if not p.disable_extra_networks and p.extra_network_data:
        extra_networks.deactivate(p, p.extra_network_data)
A
AUTOMATIC 已提交
833

834
    devices.torch_gc()
A
AUTOMATIC 已提交
835

836 837 838 839 840
    res = Processed(
        p,
        images_list=output_images,
        seed=p.all_seeds[0],
        info=infotext(),
841
        comments="".join(f"{comment}\n" for comment in comments),
842 843 844 845
        subseed=p.all_subseeds[0],
        index_of_first_image=index_of_first_image,
        infotexts=infotexts,
    )
A
AUTOMATIC 已提交
846 847 848 849 850

    if p.scripts is not None:
        p.scripts.postprocess(p, res)

    return res
851 852


853 854 855 856 857 858 859 860 861 862 863 864
def old_hires_fix_first_pass_dimensions(width, height):
    """old algorithm for auto-calculating first pass size"""

    desired_pixel_count = 512 * 512
    actual_pixel_count = width * height
    scale = math.sqrt(desired_pixel_count / actual_pixel_count)
    width = math.ceil(scale * width / 64) * 64
    height = math.ceil(scale * height / 64) * 64

    return width, height


865 866
class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
    sampler = None
A
AUTOMATIC 已提交
867

868
    def __init__(self, enable_hr: bool = False, denoising_strength: float = 0.75, firstphase_width: int = 0, firstphase_height: int = 0, hr_scale: float = 2.0, hr_upscaler: str = None, hr_second_pass_steps: int = 0, hr_resize_x: int = 0, hr_resize_y: int = 0, hr_sampler_name: str = None, hr_prompt: str = '', hr_negative_prompt: str = '', **kwargs):
A
AUTOMATIC 已提交
869 870 871
        super().__init__(**kwargs)
        self.enable_hr = enable_hr
        self.denoising_strength = denoising_strength
A
AUTOMATIC 已提交
872 873
        self.hr_scale = hr_scale
        self.hr_upscaler = hr_upscaler
874 875 876 877 878
        self.hr_second_pass_steps = hr_second_pass_steps
        self.hr_resize_x = hr_resize_x
        self.hr_resize_y = hr_resize_y
        self.hr_upscale_to_x = hr_resize_x
        self.hr_upscale_to_y = hr_resize_y
879 880 881
        self.hr_sampler_name = hr_sampler_name
        self.hr_prompt = hr_prompt
        self.hr_negative_prompt = hr_negative_prompt
I
invincibledude 已提交
882 883
        self.all_hr_prompts = None
        self.all_hr_negative_prompts = None
A
AUTOMATIC 已提交
884 885

        if firstphase_width != 0 or firstphase_height != 0:
886 887
            self.hr_upscale_to_x = self.width
            self.hr_upscale_to_y = self.height
A
AUTOMATIC 已提交
888 889
            self.width = firstphase_width
            self.height = firstphase_height
A
AUTOMATIC 已提交
890

891 892
        self.truncate_x = 0
        self.truncate_y = 0
893
        self.applied_old_hires_behavior_to = None
A
AUTOMATIC 已提交
894

895 896 897 898 899 900 901
        self.hr_prompts = None
        self.hr_negative_prompts = None
        self.hr_extra_network_data = None

        self.hr_c = None
        self.hr_uc = None

A
AUTOMATIC 已提交
902 903
    def init(self, all_prompts, all_seeds, all_subseeds):
        if self.enable_hr:
904 905 906 907 908
            if self.hr_sampler_name is not None and self.hr_sampler_name != self.sampler_name:
                self.extra_generation_params["Hires sampler"] = self.hr_sampler_name

            if tuple(self.hr_prompt) != tuple(self.prompt):
                self.extra_generation_params["Hires prompt"] = self.hr_prompt
I
InvincibleDude 已提交
909

910 911
            if tuple(self.hr_negative_prompt) != tuple(self.negative_prompt):
                self.extra_generation_params["Hires negative prompt"] = self.hr_negative_prompt
I
InvincibleDude 已提交
912

913 914 915 916 917 918 919 920 921
            if opts.use_old_hires_fix_width_height and self.applied_old_hires_behavior_to != (self.width, self.height):
                self.hr_resize_x = self.width
                self.hr_resize_y = self.height
                self.hr_upscale_to_x = self.width
                self.hr_upscale_to_y = self.height

                self.width, self.height = old_hires_fix_first_pass_dimensions(self.width, self.height)
                self.applied_old_hires_behavior_to = (self.width, self.height)

922 923 924 925
            if self.hr_resize_x == 0 and self.hr_resize_y == 0:
                self.extra_generation_params["Hires upscale"] = self.hr_scale
                self.hr_upscale_to_x = int(self.width * self.hr_scale)
                self.hr_upscale_to_y = int(self.height * self.hr_scale)
A
AUTOMATIC 已提交
926
            else:
927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950
                self.extra_generation_params["Hires resize"] = f"{self.hr_resize_x}x{self.hr_resize_y}"

                if self.hr_resize_y == 0:
                    self.hr_upscale_to_x = self.hr_resize_x
                    self.hr_upscale_to_y = self.hr_resize_x * self.height // self.width
                elif self.hr_resize_x == 0:
                    self.hr_upscale_to_x = self.hr_resize_y * self.width // self.height
                    self.hr_upscale_to_y = self.hr_resize_y
                else:
                    target_w = self.hr_resize_x
                    target_h = self.hr_resize_y
                    src_ratio = self.width / self.height
                    dst_ratio = self.hr_resize_x / self.hr_resize_y

                    if src_ratio < dst_ratio:
                        self.hr_upscale_to_x = self.hr_resize_x
                        self.hr_upscale_to_y = self.hr_resize_x * self.height // self.width
                    else:
                        self.hr_upscale_to_x = self.hr_resize_y * self.width // self.height
                        self.hr_upscale_to_y = self.hr_resize_y

                    self.truncate_x = (self.hr_upscale_to_x - target_w) // opt_f
                    self.truncate_y = (self.hr_upscale_to_y - target_h) // opt_f

951 952 953 954 955 956 957
            # special case: the user has chosen to do nothing
            if self.hr_upscale_to_x == self.width and self.hr_upscale_to_y == self.height:
                self.enable_hr = False
                self.denoising_strength = None
                self.extra_generation_params.pop("Hires upscale", None)
                self.extra_generation_params.pop("Hires resize", None)
                return
958

959 960 961
            if not state.processing_has_refined_job_count:
                if state.job_count == -1:
                    state.job_count = self.n_iter
962

963 964 965
                shared.total_tqdm.updateTotal((self.steps + (self.hr_second_pass_steps or self.steps)) * state.job_count)
                state.job_count = state.job_count * 2
                state.processing_has_refined_job_count = True
966

967 968
            if self.hr_second_pass_steps:
                self.extra_generation_params["Hires steps"] = self.hr_second_pass_steps
969

A
AUTOMATIC 已提交
970 971
            if self.hr_upscaler is not None:
                self.extra_generation_params["Hires upscaler"] = self.hr_upscaler
972

973
    def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
974
        self.sampler = sd_samplers.create_sampler(self.sampler_name, self.sd_model)
975

976
        latent_scale_mode = shared.latent_upscale_modes.get(self.hr_upscaler, None) if self.hr_upscaler is not None else shared.latent_upscale_modes.get(shared.latent_upscale_default_mode, "nearest")
A
AUTOMATIC 已提交
977 978 979 980 981 982
        if self.enable_hr and latent_scale_mode is None:
            assert len([x for x in shared.sd_upscalers if x.name == self.hr_upscaler]) > 0, f"could not find upscaler named {self.hr_upscaler}"

        x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
        samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))

983
        if not self.enable_hr:
A
AUTOMATIC 已提交
984 985
            return samples

A
AUTOMATIC 已提交
986 987
        self.is_hr_pass = True

988 989
        target_width = self.hr_upscale_to_x
        target_height = self.hr_upscale_to_y
A
AUTOMATIC 已提交
990

991
        def save_intermediate(image, index):
A
AUTOMATIC 已提交
992 993
            """saves image before applying hires fix, if enabled in options; takes as an argument either an image or batch with latent space images"""

994 995 996 997
            if not opts.save or self.do_not_save_samples or not opts.save_images_before_highres_fix:
                return

            if not isinstance(image, Image.Image):
M
MMaker 已提交
998
                image = sd_samplers.sample_to_image(image, index, approximation=0)
999

1000 1001
            info = create_infotext(self, self.all_prompts, self.all_seeds, self.all_subseeds, [], iteration=self.iteration, position_in_batch=index)
            images.save_image(image, self.outpath_samples, "", seeds[index], prompts[index], opts.samples_format, info=info, suffix="-before-highres-fix")
1002

A
AUTOMATIC 已提交
1003
        if latent_scale_mode is not None:
1004 1005 1006
            for i in range(samples.shape[0]):
                save_intermediate(samples, i)

M
MMaker 已提交
1007
            samples = torch.nn.functional.interpolate(samples, size=(target_height // opt_f, target_width // opt_f), mode=latent_scale_mode["mode"], antialias=latent_scale_mode["antialias"])
1008

J
Jim Hays 已提交
1009
            # Avoid making the inpainting conditioning unless necessary as
1010 1011 1012 1013 1014
            # this does need some extra compute to decode / encode the image again.
            if getattr(self, "inpainting_mask_weight", shared.opts.inpainting_mask_weight) < 1.0:
                image_conditioning = self.img2img_image_conditioning(decode_first_stage(self.sd_model, samples), samples)
            else:
                image_conditioning = self.txt2img_image_conditioning(samples)
A
AUTOMATIC 已提交
1015
        else:
1016
            decoded_samples = decode_first_stage(self.sd_model, samples)
1017
            lowres_samples = torch.clamp((decoded_samples + 1.0) / 2.0, min=0.0, max=1.0)
1018

1019 1020 1021 1022 1023
            batch_images = []
            for i, x_sample in enumerate(lowres_samples):
                x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2)
                x_sample = x_sample.astype(np.uint8)
                image = Image.fromarray(x_sample)
1024 1025 1026

                save_intermediate(image, i)

A
AUTOMATIC 已提交
1027
                image = images.resize_image(0, image, target_width, target_height, upscaler_name=self.hr_upscaler)
1028 1029 1030 1031 1032 1033 1034 1035
                image = np.array(image).astype(np.float32) / 255.0
                image = np.moveaxis(image, 2, 0)
                batch_images.append(image)

            decoded_samples = torch.from_numpy(np.array(batch_images))
            decoded_samples = decoded_samples.to(shared.device)
            decoded_samples = 2. * decoded_samples - 1.

1036
            samples = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(decoded_samples))
A
AUTOMATIC 已提交
1037

1038
            image_conditioning = self.img2img_image_conditioning(decoded_samples, samples)
1039

A
AUTOMATIC 已提交
1040
        shared.state.nextjob()
1041

1042
        img2img_sampler_name = self.hr_sampler_name or self.sampler_name
1043

S
bug fix  
space-nuko 已提交
1044 1045
        if self.sampler_name in ['PLMS', 'UniPC']:  # PLMS/UniPC do not support img2img so we just silently switch to DDIM
            img2img_sampler_name = 'DDIM'
1046

1047
        self.sampler = sd_samplers.create_sampler(img2img_sampler_name, self.sd_model)
A
AUTOMATIC 已提交
1048

1049 1050
        samples = samples[:, :, self.truncate_y//2:samples.shape[2]-(self.truncate_y+1)//2, self.truncate_x//2:samples.shape[3]-(self.truncate_x+1)//2]

A
AUTOMATIC 已提交
1051
        noise = create_random_tensors(samples.shape[1:], seeds=seeds, subseeds=subseeds, subseed_strength=subseed_strength, p=self)
1052 1053 1054 1055

        # GC now before running the next img2img to prevent running out of memory
        x = None
        devices.torch_gc()
1056

1057 1058 1059 1060
        if not self.disable_extra_networks:
            with devices.autocast():
                extra_networks.activate(self, self.hr_extra_network_data)

1061
        sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio(for_hr=True))
1062

1063
        samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning)
A
AUTOMATIC 已提交
1064

1065
        sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio())
P
papuSpartan 已提交
1066

A
AUTOMATIC 已提交
1067 1068
        self.is_hr_pass = False

A
AUTOMATIC 已提交
1069
        return samples
1070

1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103
    def close(self):
        self.hr_c = None
        self.hr_uc = None

    def setup_prompts(self):
        super().setup_prompts()

        if not self.enable_hr:
            return

        if self.hr_prompt == '':
            self.hr_prompt = self.prompt

        if self.hr_negative_prompt == '':
            self.hr_negative_prompt = self.negative_prompt

        if type(self.hr_prompt) == list:
            self.all_hr_prompts = self.hr_prompt
        else:
            self.all_hr_prompts = self.batch_size * self.n_iter * [self.hr_prompt]

        if type(self.hr_negative_prompt) == list:
            self.all_hr_negative_prompts = self.hr_negative_prompt
        else:
            self.all_hr_negative_prompts = self.batch_size * self.n_iter * [self.hr_negative_prompt]

        self.all_hr_prompts = [shared.prompt_styles.apply_styles_to_prompt(x, self.styles) for x in self.all_hr_prompts]
        self.all_hr_negative_prompts = [shared.prompt_styles.apply_negative_styles_to_prompt(x, self.styles) for x in self.all_hr_negative_prompts]

    def setup_conds(self):
        super().setup_conds()

        if self.enable_hr:
W
w-e-w 已提交
1104 1105
            self.hr_uc = self.get_conds_with_caching(prompt_parser.get_learned_conditioning, self.hr_negative_prompts, self.steps * self.step_multiplier, self.cached_uc, self.hr_extra_network_data)
            self.hr_c = self.get_conds_with_caching(prompt_parser.get_multicond_learned_conditioning, self.hr_prompts, self.steps * self.step_multiplier, self.cached_c, self.hr_extra_network_data)
1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117

    def parse_extra_network_prompts(self):
        res = super().parse_extra_network_prompts()

        if self.enable_hr:
            self.hr_prompts = self.all_hr_prompts[self.iteration * self.batch_size:(self.iteration + 1) * self.batch_size]
            self.hr_negative_prompts = self.all_hr_negative_prompts[self.iteration * self.batch_size:(self.iteration + 1) * self.batch_size]

            self.hr_prompts, self.hr_extra_network_data = extra_networks.parse_prompts(self.hr_prompts)

        return res

1118 1119 1120 1121

class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
    sampler = None

1122
    def __init__(self, init_images: list = None, resize_mode: int = 0, denoising_strength: float = 0.75, image_cfg_scale: float = None, mask: Any = None, mask_blur: int = 4, inpainting_fill: int = 0, inpaint_full_res: bool = True, inpaint_full_res_padding: int = 0, inpainting_mask_invert: int = 0, initial_noise_multiplier: float = None, **kwargs):
1123 1124 1125 1126 1127
        super().__init__(**kwargs)

        self.init_images = init_images
        self.resize_mode: int = resize_mode
        self.denoising_strength: float = denoising_strength
K
Kyle 已提交
1128
        self.image_cfg_scale: float = image_cfg_scale if shared.sd_model.cond_stage_key == "edit" else None
1129 1130
        self.init_latent = None
        self.image_mask = mask
A
AUTOMATIC 已提交
1131
        self.latent_mask = None
1132 1133 1134 1135
        self.mask_for_overlay = None
        self.mask_blur = mask_blur
        self.inpainting_fill = inpainting_fill
        self.inpaint_full_res = inpaint_full_res
1136
        self.inpaint_full_res_padding = inpaint_full_res_padding
A
AUTOMATIC 已提交
1137
        self.inpainting_mask_invert = inpainting_mask_invert
1138
        self.initial_noise_multiplier = opts.initial_noise_multiplier if initial_noise_multiplier is None else initial_noise_multiplier
1139 1140
        self.mask = None
        self.nmask = None
1141
        self.image_conditioning = None
1142

A
AUTOMATIC 已提交
1143
    def init(self, all_prompts, all_seeds, all_subseeds):
1144
        self.sampler = sd_samplers.create_sampler(self.sampler_name, self.sd_model)
1145 1146
        crop_region = None

1147
        image_mask = self.image_mask
A
AUTOMATIC 已提交
1148

1149 1150
        if image_mask is not None:
            image_mask = image_mask.convert('L')
A
AUTOMATIC 已提交
1151

1152 1153
            if self.inpainting_mask_invert:
                image_mask = ImageOps.invert(image_mask)
A
AUTOMATIC 已提交
1154

1155
            if self.mask_blur > 0:
1156
                image_mask = image_mask.filter(ImageFilter.GaussianBlur(self.mask_blur))
1157 1158

            if self.inpaint_full_res:
1159 1160
                self.mask_for_overlay = image_mask
                mask = image_mask.convert('L')
1161
                crop_region = masking.get_crop_region(np.array(mask), self.inpaint_full_res_padding)
1162
                crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height)
1163 1164 1165
                x1, y1, x2, y2 = crop_region

                mask = mask.crop(crop_region)
1166
                image_mask = images.resize_image(2, mask, self.width, self.height)
1167 1168
                self.paste_to = (x1, y1, x2-x1, y2-y1)
            else:
1169
                image_mask = images.resize_image(self.resize_mode, image_mask, self.width, self.height)
1170
                np_mask = np.array(image_mask)
J
JJ 已提交
1171
                np_mask = np.clip((np_mask.astype(np.float32)) * 2, 0, 255).astype(np.uint8)
1172
                self.mask_for_overlay = Image.fromarray(np_mask)
1173 1174 1175

            self.overlay_images = []

1176
        latent_mask = self.latent_mask if self.latent_mask is not None else image_mask
1177

1178 1179 1180
        add_color_corrections = opts.img2img_color_correction and self.color_corrections is None
        if add_color_corrections:
            self.color_corrections = []
1181 1182
        imgs = []
        for img in self.init_images:
1183 1184 1185 1186 1187 1188

            # Save init image
            if opts.save_init_img:
                self.init_img_hash = hashlib.md5(img.tobytes()).hexdigest()
                images.save_image(img, path=opts.outdir_init_images, basename=None, forced_filename=self.init_img_hash, save_to_dirs=False)

1189
            image = images.flatten(img, opts.img2img_background_color)
1190

A
Andrew Ryan 已提交
1191
            if crop_region is None and self.resize_mode != 3:
1192
                image = images.resize_image(self.resize_mode, image, self.width, self.height)
1193

1194
            if image_mask is not None:
1195 1196 1197 1198 1199
                image_masked = Image.new('RGBa', (image.width, image.height))
                image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L')))

                self.overlay_images.append(image_masked.convert('RGBA'))

1200
            # crop_region is not None if we are doing inpaint full res
1201 1202 1203 1204
            if crop_region is not None:
                image = image.crop(crop_region)
                image = images.resize_image(2, image, self.width, self.height)

1205
            if image_mask is not None:
1206
                if self.inpainting_fill != 1:
1207
                    image = masking.fill(image, latent_mask)
1208

1209
            if add_color_corrections:
1210 1211
                self.color_corrections.append(setup_color_correction(image))

1212 1213 1214 1215 1216 1217 1218 1219 1220
            image = np.array(image).astype(np.float32) / 255.0
            image = np.moveaxis(image, 2, 0)

            imgs.append(image)

        if len(imgs) == 1:
            batch_images = np.expand_dims(imgs[0], axis=0).repeat(self.batch_size, axis=0)
            if self.overlay_images is not None:
                self.overlay_images = self.overlay_images * self.batch_size
1221 1222 1223 1224

            if self.color_corrections is not None and len(self.color_corrections) == 1:
                self.color_corrections = self.color_corrections * self.batch_size

1225 1226 1227 1228 1229 1230 1231 1232
        elif len(imgs) <= self.batch_size:
            self.batch_size = len(imgs)
            batch_images = np.array(imgs)
        else:
            raise RuntimeError(f"bad number of images passed: {len(imgs)}; expecting {self.batch_size} or less")

        image = torch.from_numpy(batch_images)
        image = 2. * image - 1.
1233
        image = image.to(shared.device)
1234 1235 1236

        self.init_latent = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image))

1237 1238
        if self.resize_mode == 3:
            self.init_latent = torch.nn.functional.interpolate(self.init_latent, size=(self.height // opt_f, self.width // opt_f), mode="bilinear")
A
Andrew Ryan 已提交
1239

1240
        if image_mask is not None:
1241
            init_mask = latent_mask
A
AUTOMATIC 已提交
1242
            latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2]))
A
AUTOMATIC 已提交
1243
            latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255
1244
            latmask = latmask[0]
1245
            latmask = np.around(latmask)
1246 1247 1248 1249 1250
            latmask = np.tile(latmask[None], (4, 1, 1))

            self.mask = torch.asarray(1.0 - latmask).to(shared.device).type(self.sd_model.dtype)
            self.nmask = torch.asarray(latmask).to(shared.device).type(self.sd_model.dtype)

A
AUTOMATIC 已提交
1251
            # this needs to be fixed to be done in sample() using actual seeds for batches
1252
            if self.inpainting_fill == 2:
A
AUTOMATIC 已提交
1253
                self.init_latent = self.init_latent * self.mask + create_random_tensors(self.init_latent.shape[1:], all_seeds[0:self.init_latent.shape[0]]) * self.nmask
1254 1255 1256
            elif self.inpainting_fill == 3:
                self.init_latent = self.init_latent * self.mask

1257
        self.image_conditioning = self.img2img_image_conditioning(image, self.init_latent, image_mask)
1258

1259
    def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
A
AUTOMATIC 已提交
1260
        x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
1261 1262 1263 1264

        if self.initial_noise_multiplier != 1.0:
            self.extra_generation_params["Noise multiplier"] = self.initial_noise_multiplier
            x *= self.initial_noise_multiplier
A
AUTOMATIC 已提交
1265

1266
        samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
1267 1268 1269 1270

        if self.mask is not None:
            samples = samples * self.nmask + self.init_latent * self.mask

1271 1272 1273
        del x
        devices.torch_gc()

1274
        return samples
1275 1276 1277

    def get_token_merging_ratio(self, for_hr=False):
        return self.token_merging_ratio or ("token_merging_ratio" in self.override_settings and opts.token_merging_ratio) or opts.token_merging_ratio_img2img or opts.token_merging_ratio