integrator.cpp 31.9 KB
Newer Older
M
Matt Pharr 已提交
1 2 3 4
// pbrt is Copyright(c) 1998-2020 Matt Pharr, Wenzel Jakob, and Greg Humphreys.
// The pbrt source code is licensed under the Apache License, Version 2.0.
// SPDX: Apache-2.0

5
#include <pbrt/wavefront/integrator.h>
M
Matt Pharr 已提交
6 7 8 9 10

#include <pbrt/base/medium.h>
#include <pbrt/cameras.h>
#include <pbrt/film.h>
#include <pbrt/filters.h>
11 12 13
#ifdef PBRT_BUILD_GPU_RENDERER
#include <pbrt/gpu/aggregate.h>
#include <pbrt/gpu/memory.h>
14
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
15 16 17 18 19 20 21 22 23 24 25
#include <pbrt/lights.h>
#include <pbrt/lightsamplers.h>
#include <pbrt/util/color.h>
#include <pbrt/util/colorspace.h>
#include <pbrt/util/display.h>
#include <pbrt/util/file.h>
#include <pbrt/util/image.h>
#include <pbrt/util/log.h>
#include <pbrt/util/print.h>
#include <pbrt/util/progressreporter.h>
#include <pbrt/util/pstd.h>
26
#include <pbrt/util/spectrum.h>
M
Matt Pharr 已提交
27
#include <pbrt/util/stats.h>
M
Matt Pharr 已提交
28
#include <pbrt/util/string.h>
M
Matt Pharr 已提交
29
#include <pbrt/util/taggedptr.h>
30
#include <pbrt/wavefront/aggregate.h>
M
Matt Pharr 已提交
31

32
#include <atomic>
M
Matt Pharr 已提交
33 34 35 36
#include <cstring>
#include <iostream>
#include <map>

37
#ifdef PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
38 39
#include <cuda.h>
#include <cuda_runtime.h>
40
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
41 42 43

namespace pbrt {

44
STAT_MEMORY_COUNTER("Memory/Wavefront integrator pixel state", pathIntegratorBytes);
M
Matt Pharr 已提交
45

46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
static void updateMaterialNeeds(
    Material m, pstd::array<bool, Material::NumTags()> *haveBasicEvalMaterial,
    pstd::array<bool, Material::NumTags()> *haveUniversalEvalMaterial,
    bool *haveSubsurface) {
    if (!m)
        return;

    if (MixMaterial *mix = m.CastOrNullptr<MixMaterial>(); mix) {
        // This is a somewhat odd place for this check, but it's convenient...
        if (!m.CanEvaluateTextures(BasicTextureEvaluator()))
            ErrorExit("\"mix\" material has a texture that can't be evaluated with the "
                      "BasicTextureEvaluator, which is all that is currently supported "
                      "int the wavefront renderer--sorry! %s",
                      *mix);

        updateMaterialNeeds(mix->GetMaterial(0), haveBasicEvalMaterial,
                            haveUniversalEvalMaterial, haveSubsurface);
        updateMaterialNeeds(mix->GetMaterial(1), haveBasicEvalMaterial,
                            haveUniversalEvalMaterial, haveSubsurface);
        return;
    }

    *haveSubsurface |= m.HasSubsurfaceScattering();

    FloatTexture displace = m.GetDisplacement();
    if (m.CanEvaluateTextures(BasicTextureEvaluator()) &&
        (!displace || BasicTextureEvaluator().CanEvaluate({displace}, {})))
        (*haveBasicEvalMaterial)[m.Tag()] = true;
    else
        (*haveUniversalEvalMaterial)[m.Tag()] = true;
}
77

78
WavefrontPathIntegrator::WavefrontPathIntegrator(Allocator alloc, ParsedScene &scene) {
M
Matt Pharr 已提交
79
    // Allocate all of the data structures that represent the scene...
80
    std::map<std::string, Medium> media = scene.CreateMedia(alloc);
M
Matt Pharr 已提交
81

82 83 84 85 86
    // "haveMedia" is a bit of a misnomer in that determines both whether
    // queues are allocated for the medium sampling kernels and they are
    // launched as well as whether the ray marching shadow ray kernel is
    // launched... Thus, it will be true if there actually are no media,
    // but some "interface" materials are present in the scene.
M
Matt Pharr 已提交
87 88 89 90 91 92 93 94
    haveMedia = false;
    // Check the shapes...
    for (const auto &shape : scene.shapes)
        if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
            haveMedia = true;
    for (const auto &shape : scene.animatedShapes)
        if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
            haveMedia = true;
95 96 97 98 99 100
    for (const auto &mtl : scene.materials)
        if (mtl.name == "interface")
            haveMedia = true;
    for (const auto &namedMtl : scene.namedMaterials)
        if (namedMtl.second.name == "interface")
            haveMedia = true;
M
Matt Pharr 已提交
101

102
    auto findMedium = [&](const std::string &s, const FileLoc *loc) -> Medium {
M
Matt Pharr 已提交
103 104 105 106 107 108 109 110 111 112
        if (s.empty())
            return nullptr;

        auto iter = media.find(s);
        if (iter == media.end())
            ErrorExit(loc, "%s: medium not defined", s);
        haveMedia = true;
        return iter->second;
    };

113 114
    filter = Filter::Create(scene.filter.name, scene.filter.parameters, &scene.filter.loc,
                            alloc);
M
Matt Pharr 已提交
115

116 117
    Float exposureTime = scene.camera.parameters.GetOneFloat("shutterclose", 1.f) -
                         scene.camera.parameters.GetOneFloat("shutteropen", 0.f);
118 119 120 121 122
    if (exposureTime <= 0)
        ErrorExit(&scene.camera.loc,
                  "The specified camera shutter times imply that the shutter "
                  "does not open.  A black image will result.");

123 124
    film = Film::Create(scene.film.name, scene.film.parameters, exposureTime,
                        scene.camera.cameraTransform, filter, &scene.film.loc, alloc);
M
Matt Pharr 已提交
125 126
    initializeVisibleSurface = film.UsesVisibleSurface();

127 128
    sampler = Sampler::Create(scene.sampler.name, scene.sampler.parameters,
                              film.FullResolution(), &scene.sampler.loc, alloc);
129
    samplesPerPixel = sampler.SamplesPerPixel();
M
Matt Pharr 已提交
130

131 132 133
    Medium cameraMedium = findMedium(scene.camera.medium, &scene.camera.loc);
    camera = Camera::Create(scene.camera.name, scene.camera.parameters, cameraMedium,
                            scene.camera.cameraTransform, film, &scene.camera.loc, alloc);
M
Matt Pharr 已提交
134

135 136
    // Textures
    LOG_VERBOSE("Starting to create textures");
137
    NamedTextures textures = scene.CreateTextures(alloc, Options->useGPU);
138 139
    LOG_VERBOSE("Done creating textures");

140
    pstd::vector<Light> allLights;
M
Matt Pharr 已提交
141

142
    infiniteLights = alloc.new_object<pstd::vector<Light>>(alloc);
M
Matt Pharr 已提交
143
    for (const auto &light : scene.lights) {
144
        Medium outsideMedium = findMedium(light.medium, &light.loc);
M
Matt Pharr 已提交
145 146 147 148
        if (light.renderFromObject.IsAnimated())
            Warning(&light.loc,
                    "Animated lights aren't supported. Using the start transform.");

149
        Light l = Light::Create(
M
Matt Pharr 已提交
150 151 152 153
            light.name, light.parameters, light.renderFromObject.startTransform,
            scene.camera.cameraTransform, outsideMedium, &light.loc, alloc);

        if (l.Is<UniformInfiniteLight>() || l.Is<ImageInfiniteLight>() ||
154
            l.Is<PortalImageInfiniteLight>())
155
            infiniteLights->push_back(l);
M
Matt Pharr 已提交
156 157 158 159 160

        allLights.push_back(l);
    }

    // Area lights...
161
    std::map<int, pstd::vector<Light> *> shapeIndexToAreaLights;
M
Matt Pharr 已提交
162 163 164 165
    for (size_t i = 0; i < scene.shapes.size(); ++i) {
        const auto &shape = scene.shapes[i];
        if (shape.lightIndex == -1)
            continue;
166 167

        auto isInterface = [&]() {
168
            std::string materialName;
169
            if (shape.materialIndex != -1)
170
                materialName = scene.materials[shape.materialIndex].name;
171 172 173 174
            else {
                for (auto iter = scene.namedMaterials.begin();
                     iter != scene.namedMaterials.end(); ++iter)
                    if (iter->first == shape.materialName) {
175
                        materialName = iter->second.parameters.GetOneString("type", "");
176 177 178
                        break;
                    }
            }
179 180
            return (materialName == "interface" || materialName == "none" ||
                    materialName.empty());
181 182 183 184
        };
        if (isInterface())
            continue;

M
Matt Pharr 已提交
185 186 187 188
        CHECK_LT(shape.lightIndex, scene.areaLights.size());
        const auto &areaLightEntity = scene.areaLights[shape.lightIndex];
        AnimatedTransform renderFromLight(*shape.renderFromObject);

189
        pstd::vector<Shape> shapes =
190
            Shape::Create(shape.name, shape.renderFromObject, shape.objectFromRender,
191 192
                          shape.reverseOrientation, shape.parameters,
                          textures.floatTextures, &shape.loc, alloc);
M
Matt Pharr 已提交
193

194
        if (shapes.empty())
M
Matt Pharr 已提交
195 196
            continue;

197
        Medium outsideMedium = findMedium(shape.outsideMedium, &shape.loc);
M
Matt Pharr 已提交
198

199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
        FloatTexture alphaTex;
        std::string alphaTexName = shape.parameters.GetTexture("alpha");
        if (!alphaTexName.empty()) {
            if (textures.floatTextures.find(alphaTexName) !=
                textures.floatTextures.end()) {
                alphaTex = textures.floatTextures[alphaTexName];
                if (!BasicTextureEvaluator().CanEvaluate({alphaTex}, {}))
                    // A warning will be issued elsewhere...
                    alphaTex = nullptr;
            } else
                ErrorExit(&shape.loc,
                          "%s: couldn't find float texture for \"alpha\" parameter.",
                          alphaTexName);
        } else if (Float alpha = shape.parameters.GetOneFloat("alpha", 1.f); alpha < 1.f)
            alphaTex = alloc.new_object<FloatConstantTexture>(alpha);

215 216
        pstd::vector<Light> *lightsForShape =
            alloc.new_object<pstd::vector<Light>>(alloc);
217
        for (Shape sh : shapes) {
M
Matt Pharr 已提交
218
            if (renderFromLight.IsAnimated())
219
                ErrorExit(&shape.loc, "Animated lights are not supported.");
M
Matt Pharr 已提交
220 221
            DiffuseAreaLight *area = DiffuseAreaLight::Create(
                renderFromLight.startTransform, outsideMedium, areaLightEntity.parameters,
222 223
                areaLightEntity.parameters.ColorSpace(), &areaLightEntity.loc, alloc, sh,
                alphaTex);
M
Matt Pharr 已提交
224 225 226 227 228 229
            allLights.push_back(area);
            lightsForShape->push_back(area);
        }
        shapeIndexToAreaLights[i] = lightsForShape;
    }

230 231 232 233 234
    LOG_VERBOSE("Starting to create materials");
    std::map<std::string, pbrt::Material> namedMaterials;
    std::vector<pbrt::Material> materials;
    scene.CreateMaterials(textures, alloc, &namedMaterials, &materials);

M
Matt Pharr 已提交
235 236 237
    haveBasicEvalMaterial.fill(false);
    haveUniversalEvalMaterial.fill(false);
    haveSubsurface = false;
238 239 240 241 242 243 244 245
    for (Material m : materials)
        updateMaterialNeeds(m, &haveBasicEvalMaterial, &haveUniversalEvalMaterial,
                            &haveSubsurface);
    for (const auto &m : namedMaterials)
        updateMaterialNeeds(m.second, &haveBasicEvalMaterial, &haveUniversalEvalMaterial,
                            &haveSubsurface);
    LOG_VERBOSE("Finished creating materials");

246 247
    if (Options->useGPU) {
#ifdef PBRT_BUILD_GPU_RENDERER
248
        aggregate = new OptiXAggregate(scene, alloc, textures, shapeIndexToAreaLights,
249
                                       media, namedMaterials, materials);
250 251 252 253
#else
        LOG_FATAL("Options->useGPU was set without PBRT_BUILD_GPU_RENDERER enabled");
#endif
    } else
254
        aggregate = new CPUAggregate(scene, alloc, textures, shapeIndexToAreaLights,
255
                                     media, namedMaterials, materials);
M
Matt Pharr 已提交
256 257

    // Preprocess the light sources
258
    for (Light light : allLights)
259
        light.Preprocess(aggregate->Bounds());
M
Matt Pharr 已提交
260 261 262 263 264 265 266 267 268 269 270

    bool haveLights = !allLights.empty();
    for (const auto &m : media)
        haveLights |= m.second.IsEmissive();
    if (!haveLights)
        ErrorExit("No light sources specified");

    std::string lightSamplerName =
        scene.integrator.parameters.GetOneString("lightsampler", "bvh");
    if (allLights.size() == 1)
        lightSamplerName = "uniform";
271
    lightSampler = LightSampler::Create(lightSamplerName, allLights, alloc);
M
Matt Pharr 已提交
272

273
    if (scene.integrator.name != "path" && scene.integrator.name != "volpath")
274
        Warning(&scene.integrator.loc,
275
                "Ignoring specified integrator \"%s\": the wavefront integrator "
276 277
                "always uses a \"volpath\" integrator.",
                scene.integrator.name);
278

M
Matt Pharr 已提交
279 280 281 282
    // Integrator parameters
    regularize = scene.integrator.parameters.GetOneBool("regularize", false);
    maxDepth = scene.integrator.parameters.GetOneInt("maxdepth", 5);

283 284
    // Warn about unsupported stuff...
    if (Options->forceDiffuse)
285
        Warning("The wavefront integrator does not support --force-diffuse.");
286
    if (Options->writePartialImages)
287
        Warning("The wavefront integrator does not support --write-partial-images.");
288
    if (Options->recordPixelStatistics)
289
        Warning("The wavefront integrator does not support --pixelstats.");
290
    if (!Options->mseReferenceImage.empty())
291
        Warning("The wavefront integrator does not support --mse-reference-image.");
292
    if (!Options->mseReferenceOutput.empty())
293
        Warning("The wavefront integrator does not support --mse-reference-out.");
294

295 296
        ///////////////////////////////////////////////////////////////////////////
        // Allocate storage for all of the queues/buffers...
M
Matt Pharr 已提交
297

298
#ifdef PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
299 300
    CUDATrackedMemoryResource *mr =
        dynamic_cast<CUDATrackedMemoryResource *>(gpuMemoryAllocator.resource());
M
Matt Pharr 已提交
301
    CHECK(mr);
M
Matt Pharr 已提交
302
    size_t startSize = mr->BytesAllocated();
303
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
304

305
    // Compute number of scanlines to render per pass
M
Matt Pharr 已提交
306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
    Vector2i resolution = film.PixelBounds().Diagonal();
    // TODO: make this configurable. Base it on the amount of GPU memory?
    int maxSamples = 1024 * 1024;
    scanlinesPerPass = std::max(1, maxSamples / resolution.x);
    int nPasses = (resolution.y + scanlinesPerPass - 1) / scanlinesPerPass;
    scanlinesPerPass = (resolution.y + nPasses - 1) / nPasses;
    maxQueueSize = resolution.x * scanlinesPerPass;
    LOG_VERBOSE("Will render in %d passes %d scanlines per pass\n", nPasses,
                scanlinesPerPass);

    pixelSampleState = SOA<PixelSampleState>(maxQueueSize, alloc);

    rayQueues[0] = alloc.new_object<RayQueue>(maxQueueSize, alloc);
    rayQueues[1] = alloc.new_object<RayQueue>(maxQueueSize, alloc);

    shadowRayQueue = alloc.new_object<ShadowRayQueue>(maxQueueSize, alloc);

    if (haveSubsurface) {
        bssrdfEvalQueue =
            alloc.new_object<GetBSSRDFAndProbeRayQueue>(maxQueueSize, alloc);
        subsurfaceScatterQueue =
            alloc.new_object<SubsurfaceScatterQueue>(maxQueueSize, alloc);
    }

330
    if (infiniteLights->size())
M
Matt Pharr 已提交
331 332 333 334 335 336 337 338 339 340 341 342 343
        escapedRayQueue = alloc.new_object<EscapedRayQueue>(maxQueueSize, alloc);
    hitAreaLightQueue = alloc.new_object<HitAreaLightQueue>(maxQueueSize, alloc);

    basicEvalMaterialQueue = alloc.new_object<MaterialEvalQueue>(
        maxQueueSize, alloc,
        pstd::MakeConstSpan(&haveBasicEvalMaterial[1], haveBasicEvalMaterial.size() - 1));
    universalEvalMaterialQueue = alloc.new_object<MaterialEvalQueue>(
        maxQueueSize, alloc,
        pstd::MakeConstSpan(&haveUniversalEvalMaterial[1],
                            haveUniversalEvalMaterial.size() - 1));

    if (haveMedia) {
        mediumSampleQueue = alloc.new_object<MediumSampleQueue>(maxQueueSize, alloc);
344 345 346 347 348 349 350 351

        // TODO: in the presence of multiple PhaseFunction implementations,
        // it could be worthwhile to see which are present in the scene and
        // then initialize havePhase accordingly...
        pstd::array<bool, PhaseFunction::NumTags()> havePhase;
        havePhase.fill(true);
        mediumScatterQueue =
            alloc.new_object<MediumScatterQueue>(maxQueueSize, alloc, havePhase);
M
Matt Pharr 已提交
352 353 354 355
    }

    stats = alloc.new_object<Stats>(maxDepth, alloc);

356
#ifdef PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
357 358
    size_t endSize = mr->BytesAllocated();
    pathIntegratorBytes += endSize - startSize;
359
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
360 361
}

362 363
// WavefrontPathIntegrator Method Definitions
Float WavefrontPathIntegrator::Render() {
364 365 366
    Bounds2i pixelBounds = film.PixelBounds();
    Vector2i resolution = pixelBounds.Diagonal();
    Timer timer;
367
    // Prefetch allocations to GPU memory
368 369 370 371 372 373
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU) {
        int deviceIndex;
        CUDA_CHECK(cudaGetDevice(&deviceIndex));
        int hasConcurrentManagedAccess;
        CUDA_CHECK(cudaDeviceGetAttribute(&hasConcurrentManagedAccess,
374 375
                                          cudaDevAttrConcurrentManagedAccess,
                                          deviceIndex));
376 377 378 379 380 381 382 383 384 385

        // Copy all of the scene data structures over to GPU memory.  This
        // ensures that there isn't a big performance hitch for the first batch
        // of rays as that stuff is copied over on demand.
        if (hasConcurrentManagedAccess) {
            // Set things up so that we can still have read from the
            // WavefrontPathIntegrator struct on the CPU without hurting
            // performance. (This makes it possible to use the values of things
            // like WavefrontPathIntegrator::haveSubsurface to conditionally launch
            // kernels according to what's in the scene...)
386 387
            CUDA_CHECK(cudaMemAdvise(this, sizeof(*this), cudaMemAdviseSetReadMostly,
                                     /* ignored argument */ 0));
388 389 390 391 392 393 394 395
            CUDA_CHECK(cudaMemAdvise(this, sizeof(*this),
                                     cudaMemAdviseSetPreferredLocation, deviceIndex));

            // Copy all of the scene data structures over to GPU memory.  This
            // ensures that there isn't a big performance hitch for the first batch
            // of rays as that stuff is copied over on demand.
            CUDATrackedMemoryResource *mr =
                dynamic_cast<CUDATrackedMemoryResource *>(gpuMemoryAllocator.resource());
M
Matt Pharr 已提交
396
            CHECK(mr);
397 398 399 400 401 402 403
            mr->PrefetchToGPU();
        } else {
            // TODO: on systems with basic unified memory, just launching a
            // kernel should cause everything to be copied over. Is an empty
            // kernel sufficient?
        }
    }
404
#endif  // PBRT_BUILD_GPU_RENDERER
405

406
    // Launch thread to copy image for display server, if enabled
M
Matt Pharr 已提交
407 408 409 410 411
    RGB *displayRGB = nullptr, *displayRGBHost = nullptr;
    std::atomic<bool> exitCopyThread{false};
    std::thread copyThread;

    if (!Options->displayServer.empty()) {
412 413 414 415
#ifdef PBRT_BUILD_GPU_RENDERER
        if (Options->useGPU) {
            // Allocate staging memory on the GPU to store the current WIP
            // image.
416 417 418 419
            CUDA_CHECK(
                cudaMalloc(&displayRGB, resolution.x * resolution.y * sizeof(RGB)));
            CUDA_CHECK(
                cudaMemset(displayRGB, 0, resolution.x * resolution.y * sizeof(RGB)));
420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445

            // Host-side memory for the WIP Image.  We'll just let this leak so
            // that the lambda passed to DisplayDynamic below doesn't access
            // freed memory after Render() returns...
            displayRGBHost = new RGB[resolution.x * resolution.y];

            copyThread = std::thread([&]() {
                GPURegisterThread("DISPLAY_SERVER_COPY_THREAD");

                // Copy back to the CPU using a separate stream so that we can
                // periodically but asynchronously pick up the latest results
                // from the GPU.
                cudaStream_t memcpyStream;
                CUDA_CHECK(cudaStreamCreate(&memcpyStream));
                GPUNameStream(memcpyStream, "DISPLAY_SERVER_COPY_STREAM");

                // Copy back to the host from the GPU buffer, without any
                // synthronization.
                while (!exitCopyThread) {
                    CUDA_CHECK(cudaMemcpyAsync(displayRGBHost, displayRGB,
                                               resolution.x * resolution.y * sizeof(RGB),
                                               cudaMemcpyDeviceToHost, memcpyStream));
                    std::this_thread::sleep_for(std::chrono::milliseconds(50));

                    CUDA_CHECK(cudaStreamSynchronize(memcpyStream));
                }
M
Matt Pharr 已提交
446

447 448 449 450 451 452 453 454 455 456
                // Copy one more time to get the final image before exiting.
                CUDA_CHECK(cudaMemcpy(displayRGBHost, displayRGB,
                                      resolution.x * resolution.y * sizeof(RGB),
                                      cudaMemcpyDeviceToHost));
                CUDA_CHECK(cudaDeviceSynchronize());
            });

            // Now on the CPU side, give the display system a lambda that
            // copies values from |displayRGBHost| into its buffers used for
            // sending messages to the display program (i.e., tev).
457 458
            DisplayDynamic(film.GetFilename(), {resolution.x, resolution.y},
                           {"R", "G", "B"},
459
                           [resolution, displayRGBHost](
460
                               Bounds2i b, pstd::span<pstd::span<Float>> displayValue) {
461 462 463 464 465 466 467 468 469 470
                               int index = 0;
                               for (Point2i p : b) {
                                   RGB rgb = displayRGBHost[p.x + p.y * resolution.x];
                                   displayValue[0][index] = rgb.r;
                                   displayValue[1][index] = rgb.g;
                                   displayValue[2][index] = rgb.b;
                                   ++index;
                               }
                           });
        } else
471 472 473 474 475 476 477 478 479 480 481 482 483 484
#endif  // PBRT_BUILD_GPU_RENDERER
            DisplayDynamic(
                film.GetFilename(), Point2i(pixelBounds.Diagonal()), {"R", "G", "B"},
                [pixelBounds, this](Bounds2i b,
                                    pstd::span<pstd::span<Float>> displayValue) {
                    int index = 0;
                    for (Point2i p : b) {
                        RGB rgb =
                            film.GetPixelRGB(pixelBounds.pMin + p, 1.f /* splat scale */);
                        for (int c = 0; c < 3; ++c)
                            displayValue[c][index] = rgb[c];
                        ++index;
                    }
                });
M
Matt Pharr 已提交
485 486
    }

487
    // Loop over sample indices and evaluate pixel samples
488
    int firstSampleIndex = 0, lastSampleIndex = samplesPerPixel;
489
    // Update sample index range based on debug start, if provided
M
Matt Pharr 已提交
490
    if (!Options->debugStart.empty()) {
491
        std::vector<int> values = SplitStringToInts(Options->debugStart, ',');
492 493
        if (values.size() != 1 && values.size() != 2)
            ErrorExit("Expected either one or two integer values for --debugstart.");
494

495
        firstSampleIndex = values[0];
496 497 498 499
        if (values.size() == 2)
            lastSampleIndex = firstSampleIndex + values[1];
        else
            lastSampleIndex = firstSampleIndex + 1;
M
Matt Pharr 已提交
500 501
    }

502
    ProgressReporter progress(lastSampleIndex - firstSampleIndex, "Rendering",
503
                              Options->quiet, Options->useGPU);
504 505
    for (int sampleIndex = firstSampleIndex; sampleIndex < lastSampleIndex;
         ++sampleIndex) {
M
Matt Pharr 已提交
506 507 508
        // Attempt to work around issue #145.
#if !(defined(PBRT_IS_WINDOWS) && defined(PBRT_BUILD_GPU_RENDERER) && \
      __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINIOR__ == 1)
509
        CheckCallbackScope _([&]() {
510
            return StringPrintf("Wavefront rendering failed at sample %d. Debug with "
511 512 513
                                "\"--debugstart %d\"\n",
                                sampleIndex, sampleIndex);
        });
M
Matt Pharr 已提交
514
#endif
515

516
        // Render image for sample _sampleIndex_
M
Matt Pharr 已提交
517
        LOG_VERBOSE("Starting to submit work for sample %d", sampleIndex);
M
Matt Pharr 已提交
518 519
        for (int y0 = pixelBounds.pMin.y; y0 < pixelBounds.pMax.y;
             y0 += scanlinesPerPass) {
520
            // Generate camera rays for current scanline range
521
            RayQueue *cameraRayQueue = CurrentRayQueue(0);
522 523
            Do(
                "Reset ray queue", PBRT_CPU_GPU_LAMBDA() {
524
                    PBRT_DBG("Starting scanlines at y0 = %d, sample %d / %d\n", y0,
525
                             sampleIndex, samplesPerPixel);
526 527
                    cameraRayQueue->Reset();
                });
M
Matt Pharr 已提交
528
            GenerateCameraRays(y0, sampleIndex);
529
            Do(
530
                "Update camera ray stats",
531
                PBRT_CPU_GPU_LAMBDA() { stats->cameraRays += cameraRayQueue->Size(); });
M
Matt Pharr 已提交
532

M
Matt Pharr 已提交
533
            // Trace rays and estimate radiance up to maximum ray depth
534
            for (int wavefrontDepth = 0; true; ++wavefrontDepth) {
535
                // Reset queues before tracing rays
536
                RayQueue *nextQueue = NextRayQueue(wavefrontDepth);
537 538
                Do(
                    "Reset queues before tracing rays", PBRT_CPU_GPU_LAMBDA() {
M
Matt Pharr 已提交
539
                        nextQueue->Reset();
540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
                        // Reset queues before tracing next batch of rays
                        if (mediumSampleQueue)
                            mediumSampleQueue->Reset();
                        if (mediumScatterQueue)
                            mediumScatterQueue->Reset();

                        if (escapedRayQueue)
                            escapedRayQueue->Reset();
                        hitAreaLightQueue->Reset();

                        basicEvalMaterialQueue->Reset();
                        universalEvalMaterialQueue->Reset();

                        if (bssrdfEvalQueue)
                            bssrdfEvalQueue->Reset();
                        if (subsurfaceScatterQueue)
                            subsurfaceScatterQueue->Reset();
                    });

                // Follow active ray paths and accumulate radiance estimates
560
                GenerateRaySamples(wavefrontDepth, sampleIndex);
561

562
                // Find closest intersections along active rays
563
                aggregate->IntersectClosest(
564 565 566
                    maxQueueSize, CurrentRayQueue(wavefrontDepth), escapedRayQueue,
                    hitAreaLightQueue, basicEvalMaterialQueue, universalEvalMaterialQueue,
                    mediumSampleQueue, NextRayQueue(wavefrontDepth));
M
Matt Pharr 已提交
567

568
                if (wavefrontDepth > 0) {
569
                    // As above, with the indexing...
570
                    RayQueue *statsQueue = CurrentRayQueue(wavefrontDepth);
571 572
                    Do(
                        "Update indirect ray stats", PBRT_CPU_GPU_LAMBDA() {
573
                            stats->indirectRays[wavefrontDepth] += statsQueue->Size();
574
                        });
575
                }
576 577 578 579 580 581 582

                SampleMediumInteraction(wavefrontDepth);

                HandleEscapedRays();

                HandleEmissiveIntersection();

583
                if (wavefrontDepth == maxDepth)
M
Matt Pharr 已提交
584
                    break;
585

586
                EvaluateMaterialsAndBSDFs(wavefrontDepth);
587

588
                // Do immediately so that we have space for shadow rays for subsurface..
589
                TraceShadowRays(wavefrontDepth);
590 591

                SampleSubsurface(wavefrontDepth);
M
Matt Pharr 已提交
592
            }
M
Matt Pharr 已提交
593

M
Matt Pharr 已提交
594
            UpdateFilm();
595
            // Copy updated film pixels to buffer for display
596
#ifdef PBRT_BUILD_GPU_RENDERER
597
            if (Options->useGPU && !Options->displayServer.empty())
598 599
                GPUParallelFor(
                    "Update Display RGB Buffer", maxQueueSize,
600
                    PBRT_CPU_GPU_LAMBDA(int pixelIndex) {
601 602 603 604 605 606 607
                        Point2i pPixel = pixelSampleState.pPixel[pixelIndex];
                        if (!InsideExclusive(pPixel, film.PixelBounds()))
                            return;

                        Point2i p(pPixel - film.PixelBounds().pMin);
                        displayRGB[p.x + p.y * resolution.x] = film.GetPixelRGB(pPixel);
                    });
608
#endif  //  PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
609 610 611 612 613
        }

        progress.Update();
    }
    progress.Done();
614

615 616 617
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU)
        GPUWait();
618
#endif  // PBRT_BUILD_GPU_RENDERER
619
    Float seconds = timer.ElapsedSeconds();
620
    // Shut down display server thread, if active
621 622 623 624 625 626 627 628
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU) {
        // Wait until rendering is all done before we start to shut down the
        // display stuff..
        if (!Options->displayServer.empty()) {
            exitCopyThread = true;
            copyThread.join();
        }
629

630 631 632
        // Another synchronization to make sure no kernels are running on the
        // GPU so that we can safely access unified memory from the CPU.
        GPUWait();
M
Matt Pharr 已提交
633
    }
634
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
635

636
    return seconds;
M
Matt Pharr 已提交
637 638
}

639
void WavefrontPathIntegrator::HandleEscapedRays() {
640 641
    if (!escapedRayQueue)
        return;
642 643
    ForAllQueued(
        "Handle escaped rays", escapedRayQueue, maxQueueSize,
644
        PBRT_CPU_GPU_LAMBDA(const EscapedRayWorkItem w) {
645
            // Compute weighted radiance for escaped ray
646
            SampledSpectrum L(0.f);
647
            for (const auto &light : *infiniteLights) {
648 649
                if (SampledSpectrum Le = light.Le(Ray(w.rayo, w.rayd), w.lambda); Le) {
                    // Compute path radiance contribution from infinite light
650 651 652
                    PBRT_DBG("L %f %f %f %f T_hat %f %f %f %f Le %f %f %f %f", L[0], L[1],
                             L[2], L[3], w.T_hat[0], w.T_hat[1], w.T_hat[2], w.T_hat[3],
                             Le[0], Le[1], Le[2], Le[3]);
653
                    PBRT_DBG("pdf uni %f %f %f %f pdf nee %f %f %f %f", w.uniPathPDF[0],
654 655 656
                             w.uniPathPDF[1], w.uniPathPDF[2], w.uniPathPDF[3],
                             w.lightPathPDF[0], w.lightPathPDF[1], w.lightPathPDF[2],
                             w.lightPathPDF[3]);
657

658
                    if (w.depth == 0 || w.specularBounce) {
659 660 661 662 663
                        L += w.T_hat * Le / w.uniPathPDF.Average();
                    } else {
                        // Compute MIS-weighted radiance contribution from infinite light
                        LightSampleContext ctx = w.prevIntrCtx;
                        Float lightChoicePDF = lightSampler.PDF(ctx, light);
664 665
                        SampledSpectrum lightPathPDF = w.lightPathPDF * lightChoicePDF *
                                                       light.PDF_Li(ctx, w.rayd, true);
666 667 668
                        L += w.T_hat * Le / (w.uniPathPDF + lightPathPDF).Average();
                    }
                }
669
            }
670

671
            // Update pixel radiance if ray's radiance is nonzero
672
            if (L) {
673 674
                PBRT_DBG("Added L %f %f %f %f for escaped ray pixel index %d\n", L[0],
                         L[1], L[2], L[3], w.pixelIndex);
675

676 677 678
                L += pixelSampleState.L[w.pixelIndex];
                pixelSampleState.L[w.pixelIndex] = L;
            }
679
        });
M
Matt Pharr 已提交
680 681
}

682
void WavefrontPathIntegrator::HandleEmissiveIntersection() {
M
Matt Pharr 已提交
683 684
    ForAllQueued(
        "Handle emitters hit by indirect rays", hitAreaLightQueue, maxQueueSize,
685
        PBRT_CPU_GPU_LAMBDA(const HitAreaLightWorkItem w) {
686
            // Find emitted radiance from surface that ray hit
M
Matt Pharr 已提交
687
            SampledSpectrum Le = w.areaLight.L(w.p, w.n, w.uv, w.wo, w.lambda);
M
Matt Pharr 已提交
688 689
            if (!Le)
                return;
690
            PBRT_DBG("Got Le %f %f %f %f from hit area light at depth %d\n", Le[0], Le[1],
691
                     Le[2], Le[3], w.depth);
M
Matt Pharr 已提交
692

693
            // Compute area light's weighted radiance contribution to the path
694
            SampledSpectrum L(0.f);
695
            if (w.depth == 0 || w.isSpecularBounce) {
696
                L = w.T_hat * Le / w.uniPathPDF.Average();
M
Matt Pharr 已提交
697
            } else {
698
                // Compute MIS-weighted radiance contribution from area light
699 700
                Vector3f wi = -w.wo;
                LightSampleContext ctx = w.prevIntrCtx;
701
                Float lightChoicePDF = lightSampler.PDF(ctx, w.areaLight);
702
                Float lightPDF = lightChoicePDF * w.areaLight.PDF_Li(ctx, wi, true);
M
Matt Pharr 已提交
703

704 705 706
                SampledSpectrum uniPathPDF = w.uniPathPDF;
                SampledSpectrum lightPathPDF = w.lightPathPDF * lightPDF;
                L = w.T_hat * Le / (uniPathPDF + lightPathPDF).Average();
M
Matt Pharr 已提交
707 708
            }

709
            PBRT_DBG("Added L %f %f %f %f for pixel index %d\n", L[0], L[1], L[2], L[3],
710
                     w.pixelIndex);
711

712
            // Update _L_ in _PixelSampleState_ for area light's radiance
713 714
            L += pixelSampleState.L[w.pixelIndex];
            pixelSampleState.L[w.pixelIndex] = L;
M
Matt Pharr 已提交
715 716 717
        });
}

718
void WavefrontPathIntegrator::TraceShadowRays(int wavefrontDepth) {
719
    if (haveMedia)
720
        aggregate->IntersectShadowTr(maxQueueSize, shadowRayQueue, &pixelSampleState);
721
    else
722
        aggregate->IntersectShadow(maxQueueSize, shadowRayQueue, &pixelSampleState);
723
    // Reset shadow ray queue
724 725
    Do(
        "Reset shadowRayQueue", PBRT_CPU_GPU_LAMBDA() {
726
            stats->shadowRays[wavefrontDepth] += shadowRayQueue->Size();
727 728 729 730
            shadowRayQueue->Reset();
        });
}

731
WavefrontPathIntegrator::Stats::Stats(int maxDepth, Allocator alloc)
M
Matt Pharr 已提交
732 733
    : indirectRays(maxDepth + 1, alloc), shadowRays(maxDepth, alloc) {}

734
std::string WavefrontPathIntegrator::Stats::Print() const {
M
Matt Pharr 已提交
735 736 737 738 739 740 741 742 743 744 745 746 747
    std::string s;
    s += StringPrintf("    %-42s               %12" PRIu64 "\n", "Camera rays",
                      cameraRays);
    for (int i = 1; i < indirectRays.size(); ++i)
        s += StringPrintf("    %-42s               %12" PRIu64 "\n",
                          StringPrintf("Indirect rays, depth %-3d", i), indirectRays[i]);
    for (int i = 0; i < shadowRays.size(); ++i)
        s += StringPrintf("    %-42s               %12" PRIu64 "\n",
                          StringPrintf("Shadow rays, depth %-3d", i), shadowRays[i]);
    return s;
}

}  // namespace pbrt