integrator.cpp 28.1 KB
Newer Older
M
Matt Pharr 已提交
1 2 3 4
// pbrt is Copyright(c) 1998-2020 Matt Pharr, Wenzel Jakob, and Greg Humphreys.
// The pbrt source code is licensed under the Apache License, Version 2.0.
// SPDX: Apache-2.0

5
#include <pbrt/wavefront/integrator.h>
M
Matt Pharr 已提交
6 7 8 9 10

#include <pbrt/base/medium.h>
#include <pbrt/cameras.h>
#include <pbrt/film.h>
#include <pbrt/filters.h>
11 12 13
#ifdef PBRT_BUILD_GPU_RENDERER
#include <pbrt/gpu/aggregate.h>
#include <pbrt/gpu/memory.h>
14
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
15 16 17 18 19 20 21 22 23 24 25
#include <pbrt/lights.h>
#include <pbrt/lightsamplers.h>
#include <pbrt/util/color.h>
#include <pbrt/util/colorspace.h>
#include <pbrt/util/display.h>
#include <pbrt/util/file.h>
#include <pbrt/util/image.h>
#include <pbrt/util/log.h>
#include <pbrt/util/print.h>
#include <pbrt/util/progressreporter.h>
#include <pbrt/util/pstd.h>
26
#include <pbrt/util/spectrum.h>
M
Matt Pharr 已提交
27
#include <pbrt/util/stats.h>
M
Matt Pharr 已提交
28
#include <pbrt/util/string.h>
M
Matt Pharr 已提交
29
#include <pbrt/util/taggedptr.h>
30
#include <pbrt/wavefront/aggregate.h>
M
Matt Pharr 已提交
31

32
#include <atomic>
M
Matt Pharr 已提交
33 34 35 36
#include <cstring>
#include <iostream>
#include <map>

37
#ifdef PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
38 39
#include <cuda.h>
#include <cuda_runtime.h>
40
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
41 42 43

namespace pbrt {

44
STAT_MEMORY_COUNTER("Memory/Wavefront integrator pixel state", pathIntegratorBytes);
M
Matt Pharr 已提交
45

46 47 48
static void updateMaterialNeeds(
    Material m, pstd::array<bool, Material::NumTags()> *haveBasicEvalMaterial,
    pstd::array<bool, Material::NumTags()> *haveUniversalEvalMaterial,
49
    bool *haveSubsurface, bool *haveMedia) {
50 51 52 53 54 55 56 57 58 59 60 61
    if (!m)
        return;

    if (MixMaterial *mix = m.CastOrNullptr<MixMaterial>(); mix) {
        // This is a somewhat odd place for this check, but it's convenient...
        if (!m.CanEvaluateTextures(BasicTextureEvaluator()))
            ErrorExit("\"mix\" material has a texture that can't be evaluated with the "
                      "BasicTextureEvaluator, which is all that is currently supported "
                      "int the wavefront renderer--sorry! %s",
                      *mix);

        updateMaterialNeeds(mix->GetMaterial(0), haveBasicEvalMaterial,
62
                            haveUniversalEvalMaterial, haveSubsurface, haveMedia);
63
        updateMaterialNeeds(mix->GetMaterial(1), haveBasicEvalMaterial,
64
                            haveUniversalEvalMaterial, haveSubsurface, haveMedia);
65 66 67 68
        return;
    }

    *haveSubsurface |= m.HasSubsurfaceScattering();
69
    *haveMedia |= (m == nullptr);  // interface material
70 71 72 73 74 75 76 77

    FloatTexture displace = m.GetDisplacement();
    if (m.CanEvaluateTextures(BasicTextureEvaluator()) &&
        (!displace || BasicTextureEvaluator().CanEvaluate({displace}, {})))
        (*haveBasicEvalMaterial)[m.Tag()] = true;
    else
        (*haveUniversalEvalMaterial)[m.Tag()] = true;
}
78

79
WavefrontPathIntegrator::WavefrontPathIntegrator(
M
Matt Pharr 已提交
80
    pstd::pmr::memory_resource *memoryResource, BasicScene &scene)
81
    : memoryResource(memoryResource) {
82 83
    ThreadLocal<Allocator> threadAllocators(
        [memoryResource]() { return Allocator(memoryResource); });
84

85
    Allocator alloc = threadAllocators.Get();
86

M
Matt Pharr 已提交
87
    // Allocate all of the data structures that represent the scene...
88
    std::map<std::string, Medium> media = scene.CreateMedia();
M
Matt Pharr 已提交
89

90 91 92 93 94
    // "haveMedia" is a bit of a misnomer in that determines both whether
    // queues are allocated for the medium sampling kernels and they are
    // launched as well as whether the ray marching shadow ray kernel is
    // launched... Thus, it will be true if there actually are no media,
    // but some "interface" materials are present in the scene.
M
Matt Pharr 已提交
95 96 97 98 99 100 101 102 103
    haveMedia = false;
    // Check the shapes...
    for (const auto &shape : scene.shapes)
        if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
            haveMedia = true;
    for (const auto &shape : scene.animatedShapes)
        if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
            haveMedia = true;

104
    auto findMedium = [&](const std::string &s, const FileLoc *loc) -> Medium {
M
Matt Pharr 已提交
105 106 107 108 109 110 111 112 113 114
        if (s.empty())
            return nullptr;

        auto iter = media.find(s);
        if (iter == media.end())
            ErrorExit(loc, "%s: medium not defined", s);
        haveMedia = true;
        return iter->second;
    };

115 116
    // Textures
    LOG_VERBOSE("Starting to create textures");
117
    NamedTextures textures = scene.CreateTextures();
118 119
    LOG_VERBOSE("Done creating textures");

120
    LOG_VERBOSE("Starting to create lights");
121
    pstd::vector<Light> allLights;
122
    std::map<int, pstd::vector<Light> *> shapeIndexToAreaLights;
123

124
    infiniteLights = alloc.new_object<pstd::vector<Light>>(alloc);
M
Matt Pharr 已提交
125

126
    for (Light l : scene.CreateLights(textures, &shapeIndexToAreaLights)) {
M
Matt Pharr 已提交
127
        if (l.Is<UniformInfiniteLight>() || l.Is<ImageInfiniteLight>() ||
128
            l.Is<PortalImageInfiniteLight>())
129
            infiniteLights->push_back(l);
M
Matt Pharr 已提交
130 131 132

        allLights.push_back(l);
    }
133
    LOG_VERBOSE("Done creating lights");
M
Matt Pharr 已提交
134

135 136 137
    LOG_VERBOSE("Starting to create materials");
    std::map<std::string, pbrt::Material> namedMaterials;
    std::vector<pbrt::Material> materials;
M
Matt Pharr 已提交
138
    scene.CreateMaterials(textures, &namedMaterials, &materials);
139

M
Matt Pharr 已提交
140 141 142
    haveBasicEvalMaterial.fill(false);
    haveUniversalEvalMaterial.fill(false);
    haveSubsurface = false;
143 144
    for (Material m : materials)
        updateMaterialNeeds(m, &haveBasicEvalMaterial, &haveUniversalEvalMaterial,
145
                            &haveSubsurface, &haveMedia);
146 147
    for (const auto &m : namedMaterials)
        updateMaterialNeeds(m.second, &haveBasicEvalMaterial, &haveUniversalEvalMaterial,
148
                            &haveSubsurface, &haveMedia);
149 150
    LOG_VERBOSE("Finished creating materials");

151 152
    if (Options->useGPU) {
#ifdef PBRT_BUILD_GPU_RENDERER
153 154 155
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
156 157
        aggregate = new OptiXAggregate(scene, mr, textures, shapeIndexToAreaLights, media,
                                       namedMaterials, materials);
158 159 160 161
#else
        LOG_FATAL("Options->useGPU was set without PBRT_BUILD_GPU_RENDERER enabled");
#endif
    } else
162 163
        aggregate = new CPUAggregate(scene, textures, shapeIndexToAreaLights, media,
                                     namedMaterials, materials);
M
Matt Pharr 已提交
164 165

    // Preprocess the light sources
166
    for (Light light : allLights)
167
        light.Preprocess(aggregate->Bounds());
M
Matt Pharr 已提交
168 169 170 171 172 173 174

    bool haveLights = !allLights.empty();
    for (const auto &m : media)
        haveLights |= m.second.IsEmissive();
    if (!haveLights)
        ErrorExit("No light sources specified");

175
    LOG_VERBOSE("Starting to create light sampler");
M
Matt Pharr 已提交
176 177 178 179
    std::string lightSamplerName =
        scene.integrator.parameters.GetOneString("lightsampler", "bvh");
    if (allLights.size() == 1)
        lightSamplerName = "uniform";
180
    lightSampler = LightSampler::Create(lightSamplerName, allLights, alloc);
181
    LOG_VERBOSE("Finished creating light sampler");
M
Matt Pharr 已提交
182

183
    if (scene.integrator.name != "path" && scene.integrator.name != "volpath")
184
        Warning(&scene.integrator.loc,
185
                "Ignoring specified integrator \"%s\": the wavefront integrator "
186 187
                "always uses a \"volpath\" integrator.",
                scene.integrator.name);
188

M
Matt Pharr 已提交
189 190 191 192
    // Integrator parameters
    regularize = scene.integrator.parameters.GetOneBool("regularize", false);
    maxDepth = scene.integrator.parameters.GetOneInt("maxdepth", 5);

193 194 195 196 197 198 199 200
    camera = scene.GetCamera();
    film = camera.GetFilm();
    filter = film.GetFilter();
    sampler = scene.GetSampler();

    initializeVisibleSurface = film.UsesVisibleSurface();
    samplesPerPixel = sampler.SamplesPerPixel();

201 202
    // Warn about unsupported stuff...
    if (Options->forceDiffuse)
203
        ErrorExit("The wavefront integrator does not support --force-diffuse.");
204
    if (Options->writePartialImages)
205
        Warning("The wavefront integrator does not support --write-partial-images.");
206
    if (Options->recordPixelStatistics)
207
        ErrorExit("The wavefront integrator does not support --pixelstats.");
208
    if (!Options->mseReferenceImage.empty())
209
        ErrorExit("The wavefront integrator does not support --mse-reference-image.");
210
    if (!Options->mseReferenceOutput.empty())
211
        ErrorExit("The wavefront integrator does not support --mse-reference-out.");
212

213 214
    ///////////////////////////////////////////////////////////////////////////
    // Allocate storage for all of the queues/buffers...
M
Matt Pharr 已提交
215

216
#ifdef PBRT_BUILD_GPU_RENDERER
217 218 219 220 221 222 223
    size_t startSize = 0;
    if (Options->useGPU) {
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
        startSize = mr->BytesAllocated();
    }
224
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
225

226
    // Compute number of scanlines to render per pass
M
Matt Pharr 已提交
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
    Vector2i resolution = film.PixelBounds().Diagonal();
    // TODO: make this configurable. Base it on the amount of GPU memory?
    int maxSamples = 1024 * 1024;
    scanlinesPerPass = std::max(1, maxSamples / resolution.x);
    int nPasses = (resolution.y + scanlinesPerPass - 1) / scanlinesPerPass;
    scanlinesPerPass = (resolution.y + nPasses - 1) / nPasses;
    maxQueueSize = resolution.x * scanlinesPerPass;
    LOG_VERBOSE("Will render in %d passes %d scanlines per pass\n", nPasses,
                scanlinesPerPass);

    pixelSampleState = SOA<PixelSampleState>(maxQueueSize, alloc);

    rayQueues[0] = alloc.new_object<RayQueue>(maxQueueSize, alloc);
    rayQueues[1] = alloc.new_object<RayQueue>(maxQueueSize, alloc);

    shadowRayQueue = alloc.new_object<ShadowRayQueue>(maxQueueSize, alloc);

    if (haveSubsurface) {
        bssrdfEvalQueue =
            alloc.new_object<GetBSSRDFAndProbeRayQueue>(maxQueueSize, alloc);
        subsurfaceScatterQueue =
            alloc.new_object<SubsurfaceScatterQueue>(maxQueueSize, alloc);
    }

251
    if (infiniteLights->size())
M
Matt Pharr 已提交
252 253 254 255 256 257 258 259 260 261 262 263 264
        escapedRayQueue = alloc.new_object<EscapedRayQueue>(maxQueueSize, alloc);
    hitAreaLightQueue = alloc.new_object<HitAreaLightQueue>(maxQueueSize, alloc);

    basicEvalMaterialQueue = alloc.new_object<MaterialEvalQueue>(
        maxQueueSize, alloc,
        pstd::MakeConstSpan(&haveBasicEvalMaterial[1], haveBasicEvalMaterial.size() - 1));
    universalEvalMaterialQueue = alloc.new_object<MaterialEvalQueue>(
        maxQueueSize, alloc,
        pstd::MakeConstSpan(&haveUniversalEvalMaterial[1],
                            haveUniversalEvalMaterial.size() - 1));

    if (haveMedia) {
        mediumSampleQueue = alloc.new_object<MediumSampleQueue>(maxQueueSize, alloc);
265 266 267 268 269 270 271 272

        // TODO: in the presence of multiple PhaseFunction implementations,
        // it could be worthwhile to see which are present in the scene and
        // then initialize havePhase accordingly...
        pstd::array<bool, PhaseFunction::NumTags()> havePhase;
        havePhase.fill(true);
        mediumScatterQueue =
            alloc.new_object<MediumScatterQueue>(maxQueueSize, alloc, havePhase);
M
Matt Pharr 已提交
273 274 275 276
    }

    stats = alloc.new_object<Stats>(maxDepth, alloc);

277
#ifdef PBRT_BUILD_GPU_RENDERER
278 279 280 281 282 283 284
    if (Options->useGPU) {
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
        size_t endSize = mr->BytesAllocated();
        pathIntegratorBytes += endSize - startSize;
    }
285
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
286 287
}

288 289
// WavefrontPathIntegrator Method Definitions
Float WavefrontPathIntegrator::Render() {
290 291 292
    Bounds2i pixelBounds = film.PixelBounds();
    Vector2i resolution = pixelBounds.Diagonal();
    Timer timer;
293
    // Prefetch allocations to GPU memory
294 295 296 297 298 299
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU) {
        int deviceIndex;
        CUDA_CHECK(cudaGetDevice(&deviceIndex));
        int hasConcurrentManagedAccess;
        CUDA_CHECK(cudaDeviceGetAttribute(&hasConcurrentManagedAccess,
300 301
                                          cudaDevAttrConcurrentManagedAccess,
                                          deviceIndex));
302 303 304 305 306 307 308 309 310 311

        // Copy all of the scene data structures over to GPU memory.  This
        // ensures that there isn't a big performance hitch for the first batch
        // of rays as that stuff is copied over on demand.
        if (hasConcurrentManagedAccess) {
            // Set things up so that we can still have read from the
            // WavefrontPathIntegrator struct on the CPU without hurting
            // performance. (This makes it possible to use the values of things
            // like WavefrontPathIntegrator::haveSubsurface to conditionally launch
            // kernels according to what's in the scene...)
312 313
            CUDA_CHECK(cudaMemAdvise(this, sizeof(*this), cudaMemAdviseSetReadMostly,
                                     /* ignored argument */ 0));
314 315 316 317 318 319 320
            CUDA_CHECK(cudaMemAdvise(this, sizeof(*this),
                                     cudaMemAdviseSetPreferredLocation, deviceIndex));

            // Copy all of the scene data structures over to GPU memory.  This
            // ensures that there isn't a big performance hitch for the first batch
            // of rays as that stuff is copied over on demand.
            CUDATrackedMemoryResource *mr =
321
                dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
M
Matt Pharr 已提交
322
            CHECK(mr);
323 324 325 326 327 328 329
            mr->PrefetchToGPU();
        } else {
            // TODO: on systems with basic unified memory, just launching a
            // kernel should cause everything to be copied over. Is an empty
            // kernel sufficient?
        }
    }
330
#endif  // PBRT_BUILD_GPU_RENDERER
331

332
    // Launch thread to copy image for display server, if enabled
M
Matt Pharr 已提交
333 334 335 336 337
    RGB *displayRGB = nullptr, *displayRGBHost = nullptr;
    std::atomic<bool> exitCopyThread{false};
    std::thread copyThread;

    if (!Options->displayServer.empty()) {
338 339 340 341
#ifdef PBRT_BUILD_GPU_RENDERER
        if (Options->useGPU) {
            // Allocate staging memory on the GPU to store the current WIP
            // image.
342 343 344 345
            CUDA_CHECK(
                cudaMalloc(&displayRGB, resolution.x * resolution.y * sizeof(RGB)));
            CUDA_CHECK(
                cudaMemset(displayRGB, 0, resolution.x * resolution.y * sizeof(RGB)));
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371

            // Host-side memory for the WIP Image.  We'll just let this leak so
            // that the lambda passed to DisplayDynamic below doesn't access
            // freed memory after Render() returns...
            displayRGBHost = new RGB[resolution.x * resolution.y];

            copyThread = std::thread([&]() {
                GPURegisterThread("DISPLAY_SERVER_COPY_THREAD");

                // Copy back to the CPU using a separate stream so that we can
                // periodically but asynchronously pick up the latest results
                // from the GPU.
                cudaStream_t memcpyStream;
                CUDA_CHECK(cudaStreamCreate(&memcpyStream));
                GPUNameStream(memcpyStream, "DISPLAY_SERVER_COPY_STREAM");

                // Copy back to the host from the GPU buffer, without any
                // synthronization.
                while (!exitCopyThread) {
                    CUDA_CHECK(cudaMemcpyAsync(displayRGBHost, displayRGB,
                                               resolution.x * resolution.y * sizeof(RGB),
                                               cudaMemcpyDeviceToHost, memcpyStream));
                    std::this_thread::sleep_for(std::chrono::milliseconds(50));

                    CUDA_CHECK(cudaStreamSynchronize(memcpyStream));
                }
M
Matt Pharr 已提交
372

373 374 375 376 377 378 379 380 381 382
                // Copy one more time to get the final image before exiting.
                CUDA_CHECK(cudaMemcpy(displayRGBHost, displayRGB,
                                      resolution.x * resolution.y * sizeof(RGB),
                                      cudaMemcpyDeviceToHost));
                CUDA_CHECK(cudaDeviceSynchronize());
            });

            // Now on the CPU side, give the display system a lambda that
            // copies values from |displayRGBHost| into its buffers used for
            // sending messages to the display program (i.e., tev).
383 384
            DisplayDynamic(film.GetFilename(), {resolution.x, resolution.y},
                           {"R", "G", "B"},
385
                           [resolution, displayRGBHost](
386
                               Bounds2i b, pstd::span<pstd::span<Float>> displayValue) {
387 388 389 390 391 392 393 394 395 396
                               int index = 0;
                               for (Point2i p : b) {
                                   RGB rgb = displayRGBHost[p.x + p.y * resolution.x];
                                   displayValue[0][index] = rgb.r;
                                   displayValue[1][index] = rgb.g;
                                   displayValue[2][index] = rgb.b;
                                   ++index;
                               }
                           });
        } else
397 398 399 400 401 402 403 404 405 406 407 408 409 410
#endif  // PBRT_BUILD_GPU_RENDERER
            DisplayDynamic(
                film.GetFilename(), Point2i(pixelBounds.Diagonal()), {"R", "G", "B"},
                [pixelBounds, this](Bounds2i b,
                                    pstd::span<pstd::span<Float>> displayValue) {
                    int index = 0;
                    for (Point2i p : b) {
                        RGB rgb =
                            film.GetPixelRGB(pixelBounds.pMin + p, 1.f /* splat scale */);
                        for (int c = 0; c < 3; ++c)
                            displayValue[c][index] = rgb[c];
                        ++index;
                    }
                });
M
Matt Pharr 已提交
411 412
    }

413
    // Loop over sample indices and evaluate pixel samples
414
    int firstSampleIndex = 0, lastSampleIndex = samplesPerPixel;
415
    // Update sample index range based on debug start, if provided
M
Matt Pharr 已提交
416
    if (!Options->debugStart.empty()) {
417
        std::vector<int> values = SplitStringToInts(Options->debugStart, ',');
418 419
        if (values.size() != 1 && values.size() != 2)
            ErrorExit("Expected either one or two integer values for --debugstart.");
420

421
        firstSampleIndex = values[0];
422 423 424 425
        if (values.size() == 2)
            lastSampleIndex = firstSampleIndex + values[1];
        else
            lastSampleIndex = firstSampleIndex + 1;
M
Matt Pharr 已提交
426 427
    }

428
    ProgressReporter progress(lastSampleIndex - firstSampleIndex, "Rendering",
429
                              Options->quiet, Options->useGPU);
430 431
    for (int sampleIndex = firstSampleIndex; sampleIndex < lastSampleIndex;
         ++sampleIndex) {
M
Matt Pharr 已提交
432 433
        // Attempt to work around issue #145.
#if !(defined(PBRT_IS_WINDOWS) && defined(PBRT_BUILD_GPU_RENDERER) && \
M
Matt Pharr 已提交
434
      __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 1)
435
        CheckCallbackScope _([&]() {
436
            return StringPrintf("Wavefront rendering failed at sample %d. Debug with "
437 438 439
                                "\"--debugstart %d\"\n",
                                sampleIndex, sampleIndex);
        });
M
Matt Pharr 已提交
440
#endif
441

442
        // Render image for sample _sampleIndex_
M
Matt Pharr 已提交
443
        LOG_VERBOSE("Starting to submit work for sample %d", sampleIndex);
M
Matt Pharr 已提交
444 445
        for (int y0 = pixelBounds.pMin.y; y0 < pixelBounds.pMax.y;
             y0 += scanlinesPerPass) {
446
            // Generate camera rays for current scanline range
447
            RayQueue *cameraRayQueue = CurrentRayQueue(0);
448 449
            Do(
                "Reset ray queue", PBRT_CPU_GPU_LAMBDA() {
450
                    PBRT_DBG("Starting scanlines at y0 = %d, sample %d / %d\n", y0,
451
                             sampleIndex, samplesPerPixel);
452 453
                    cameraRayQueue->Reset();
                });
M
Matt Pharr 已提交
454
            GenerateCameraRays(y0, sampleIndex);
455
            Do(
456
                "Update camera ray stats",
457
                PBRT_CPU_GPU_LAMBDA() { stats->cameraRays += cameraRayQueue->Size(); });
M
Matt Pharr 已提交
458

M
Matt Pharr 已提交
459
            // Trace rays and estimate radiance up to maximum ray depth
460
            for (int wavefrontDepth = 0; true; ++wavefrontDepth) {
461
                // Reset queues before tracing rays
462
                RayQueue *nextQueue = NextRayQueue(wavefrontDepth);
463 464
                Do(
                    "Reset queues before tracing rays", PBRT_CPU_GPU_LAMBDA() {
M
Matt Pharr 已提交
465
                        nextQueue->Reset();
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
                        // Reset queues before tracing next batch of rays
                        if (mediumSampleQueue)
                            mediumSampleQueue->Reset();
                        if (mediumScatterQueue)
                            mediumScatterQueue->Reset();

                        if (escapedRayQueue)
                            escapedRayQueue->Reset();
                        hitAreaLightQueue->Reset();

                        basicEvalMaterialQueue->Reset();
                        universalEvalMaterialQueue->Reset();

                        if (bssrdfEvalQueue)
                            bssrdfEvalQueue->Reset();
                        if (subsurfaceScatterQueue)
                            subsurfaceScatterQueue->Reset();
                    });

                // Follow active ray paths and accumulate radiance estimates
486
                GenerateRaySamples(wavefrontDepth, sampleIndex);
487

488
                // Find closest intersections along active rays
489
                aggregate->IntersectClosest(
490 491 492
                    maxQueueSize, CurrentRayQueue(wavefrontDepth), escapedRayQueue,
                    hitAreaLightQueue, basicEvalMaterialQueue, universalEvalMaterialQueue,
                    mediumSampleQueue, NextRayQueue(wavefrontDepth));
M
Matt Pharr 已提交
493

494
                if (wavefrontDepth > 0) {
495
                    // As above, with the indexing...
496
                    RayQueue *statsQueue = CurrentRayQueue(wavefrontDepth);
497 498
                    Do(
                        "Update indirect ray stats", PBRT_CPU_GPU_LAMBDA() {
499
                            stats->indirectRays[wavefrontDepth] += statsQueue->Size();
500
                        });
501
                }
502 503 504 505 506 507 508

                SampleMediumInteraction(wavefrontDepth);

                HandleEscapedRays();

                HandleEmissiveIntersection();

509
                if (wavefrontDepth == maxDepth)
M
Matt Pharr 已提交
510
                    break;
511

512
                EvaluateMaterialsAndBSDFs(wavefrontDepth);
513

514
                // Do immediately so that we have space for shadow rays for subsurface..
515
                TraceShadowRays(wavefrontDepth);
516 517

                SampleSubsurface(wavefrontDepth);
M
Matt Pharr 已提交
518
            }
M
Matt Pharr 已提交
519

M
Matt Pharr 已提交
520
            UpdateFilm();
521
            // Copy updated film pixels to buffer for display
522
#ifdef PBRT_BUILD_GPU_RENDERER
523
            if (Options->useGPU && !Options->displayServer.empty())
524 525
                GPUParallelFor(
                    "Update Display RGB Buffer", maxQueueSize,
526
                    PBRT_CPU_GPU_LAMBDA(int pixelIndex) {
527 528 529 530 531 532 533
                        Point2i pPixel = pixelSampleState.pPixel[pixelIndex];
                        if (!InsideExclusive(pPixel, film.PixelBounds()))
                            return;

                        Point2i p(pPixel - film.PixelBounds().pMin);
                        displayRGB[p.x + p.y * resolution.x] = film.GetPixelRGB(pPixel);
                    });
534
#endif  //  PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
535 536 537 538 539
        }

        progress.Update();
    }
    progress.Done();
540

541 542 543
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU)
        GPUWait();
544
#endif  // PBRT_BUILD_GPU_RENDERER
545
    Float seconds = timer.ElapsedSeconds();
546
    // Shut down display server thread, if active
547 548 549 550 551 552 553 554
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU) {
        // Wait until rendering is all done before we start to shut down the
        // display stuff..
        if (!Options->displayServer.empty()) {
            exitCopyThread = true;
            copyThread.join();
        }
555

556 557 558
        // Another synchronization to make sure no kernels are running on the
        // GPU so that we can safely access unified memory from the CPU.
        GPUWait();
M
Matt Pharr 已提交
559
    }
560
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
561

562
    return seconds;
M
Matt Pharr 已提交
563 564
}

565
void WavefrontPathIntegrator::HandleEscapedRays() {
566 567
    if (!escapedRayQueue)
        return;
568 569
    ForAllQueued(
        "Handle escaped rays", escapedRayQueue, maxQueueSize,
570
        PBRT_CPU_GPU_LAMBDA(const EscapedRayWorkItem w) {
571
            // Compute weighted radiance for escaped ray
572
            SampledSpectrum L(0.f);
573
            for (const auto &light : *infiniteLights) {
574 575
                if (SampledSpectrum Le = light.Le(Ray(w.rayo, w.rayd), w.lambda); Le) {
                    // Compute path radiance contribution from infinite light
576 577
                    PBRT_DBG("L %f %f %f %f beta %f %f %f %f Le %f %f %f %f", L[0], L[1],
                             L[2], L[3], w.beta[0], w.beta[1], w.beta[2], w.beta[3],
578
                             Le[0], Le[1], Le[2], Le[3]);
579
                    PBRT_DBG("pdf uni %f %f %f %f pdf nee %f %f %f %f", w.inv_w_u[0],
M
Matt Pharr 已提交
580 581
                             w.inv_w_u[1], w.inv_w_u[2], w.inv_w_u[3], w.inv_w_l[0],
                             w.inv_w_l[1], w.inv_w_l[2], w.inv_w_l[3]);
582

583
                    if (w.depth == 0 || w.specularBounce) {
584
                        L += w.beta * Le / w.inv_w_u.Average();
585 586 587
                    } else {
                        // Compute MIS-weighted radiance contribution from infinite light
                        LightSampleContext ctx = w.prevIntrCtx;
588
                        Float lightChoicePDF = lightSampler.PMF(ctx, light);
M
Matt Pharr 已提交
589 590
                        SampledSpectrum inv_w_l =
                            w.inv_w_l * lightChoicePDF * light.PDF_Li(ctx, w.rayd, true);
591
                        L += w.beta * Le / (w.inv_w_u + inv_w_l).Average();
592 593
                    }
                }
594
            }
595

596
            // Update pixel radiance if ray's radiance is nonzero
597
            if (L) {
598 599
                PBRT_DBG("Added L %f %f %f %f for escaped ray pixel index %d\n", L[0],
                         L[1], L[2], L[3], w.pixelIndex);
600

601 602 603
                L += pixelSampleState.L[w.pixelIndex];
                pixelSampleState.L[w.pixelIndex] = L;
            }
604
        });
M
Matt Pharr 已提交
605 606
}

607
void WavefrontPathIntegrator::HandleEmissiveIntersection() {
M
Matt Pharr 已提交
608 609
    ForAllQueued(
        "Handle emitters hit by indirect rays", hitAreaLightQueue, maxQueueSize,
610
        PBRT_CPU_GPU_LAMBDA(const HitAreaLightWorkItem w) {
611
            // Find emitted radiance from surface that ray hit
M
Matt Pharr 已提交
612
            SampledSpectrum Le = w.areaLight.L(w.p, w.n, w.uv, w.wo, w.lambda);
M
Matt Pharr 已提交
613 614
            if (!Le)
                return;
615
            PBRT_DBG("Got Le %f %f %f %f from hit area light at depth %d\n", Le[0], Le[1],
616
                     Le[2], Le[3], w.depth);
M
Matt Pharr 已提交
617

618
            // Compute area light's weighted radiance contribution to the path
619
            SampledSpectrum L(0.f);
620
            if (w.depth == 0 || w.specularBounce) {
621
                L = w.beta * Le / w.inv_w_u.Average();
M
Matt Pharr 已提交
622
            } else {
623
                // Compute MIS-weighted radiance contribution from area light
624 625
                Vector3f wi = -w.wo;
                LightSampleContext ctx = w.prevIntrCtx;
626
                Float lightChoicePDF = lightSampler.PMF(ctx, w.areaLight);
627
                Float lightPDF = lightChoicePDF * w.areaLight.PDF_Li(ctx, wi, true);
M
Matt Pharr 已提交
628

629 630 631
                SampledSpectrum inv_w_u = w.inv_w_u;
                SampledSpectrum inv_w_l = w.inv_w_l * lightPDF;
                L = w.beta * Le / (inv_w_u + inv_w_l).Average();
M
Matt Pharr 已提交
632 633
            }

634
            PBRT_DBG("Added L %f %f %f %f for pixel index %d\n", L[0], L[1], L[2], L[3],
635
                     w.pixelIndex);
636

637
            // Update _L_ in _PixelSampleState_ for area light's radiance
638 639
            L += pixelSampleState.L[w.pixelIndex];
            pixelSampleState.L[w.pixelIndex] = L;
M
Matt Pharr 已提交
640 641 642
        });
}

643
void WavefrontPathIntegrator::TraceShadowRays(int wavefrontDepth) {
644
    if (haveMedia)
645
        aggregate->IntersectShadowTr(maxQueueSize, shadowRayQueue, &pixelSampleState);
646
    else
647
        aggregate->IntersectShadow(maxQueueSize, shadowRayQueue, &pixelSampleState);
648
    // Reset shadow ray queue
649 650
    Do(
        "Reset shadowRayQueue", PBRT_CPU_GPU_LAMBDA() {
651
            stats->shadowRays[wavefrontDepth] += shadowRayQueue->Size();
652 653 654 655
            shadowRayQueue->Reset();
        });
}

656
WavefrontPathIntegrator::Stats::Stats(int maxDepth, Allocator alloc)
M
Matt Pharr 已提交
657 658
    : indirectRays(maxDepth + 1, alloc), shadowRays(maxDepth, alloc) {}

659
std::string WavefrontPathIntegrator::Stats::Print() const {
M
Matt Pharr 已提交
660 661 662 663 664 665 666 667 668 669 670 671 672
    std::string s;
    s += StringPrintf("    %-42s               %12" PRIu64 "\n", "Camera rays",
                      cameraRays);
    for (int i = 1; i < indirectRays.size(); ++i)
        s += StringPrintf("    %-42s               %12" PRIu64 "\n",
                          StringPrintf("Indirect rays, depth %-3d", i), indirectRays[i]);
    for (int i = 0; i < shadowRays.size(); ++i)
        s += StringPrintf("    %-42s               %12" PRIu64 "\n",
                          StringPrintf("Shadow rays, depth %-3d", i), shadowRays[i]);
    return s;
}

}  // namespace pbrt