integrator.cpp 28.2 KB
Newer Older
M
Matt Pharr 已提交
1 2 3 4
// pbrt is Copyright(c) 1998-2020 Matt Pharr, Wenzel Jakob, and Greg Humphreys.
// The pbrt source code is licensed under the Apache License, Version 2.0.
// SPDX: Apache-2.0

5
#include <pbrt/wavefront/integrator.h>
M
Matt Pharr 已提交
6 7 8 9 10

#include <pbrt/base/medium.h>
#include <pbrt/cameras.h>
#include <pbrt/film.h>
#include <pbrt/filters.h>
11 12 13
#ifdef PBRT_BUILD_GPU_RENDERER
#include <pbrt/gpu/aggregate.h>
#include <pbrt/gpu/memory.h>
14
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
15 16 17 18 19 20 21 22 23 24 25
#include <pbrt/lights.h>
#include <pbrt/lightsamplers.h>
#include <pbrt/util/color.h>
#include <pbrt/util/colorspace.h>
#include <pbrt/util/display.h>
#include <pbrt/util/file.h>
#include <pbrt/util/image.h>
#include <pbrt/util/log.h>
#include <pbrt/util/print.h>
#include <pbrt/util/progressreporter.h>
#include <pbrt/util/pstd.h>
26
#include <pbrt/util/spectrum.h>
M
Matt Pharr 已提交
27
#include <pbrt/util/stats.h>
M
Matt Pharr 已提交
28
#include <pbrt/util/string.h>
M
Matt Pharr 已提交
29
#include <pbrt/util/taggedptr.h>
30
#include <pbrt/wavefront/aggregate.h>
M
Matt Pharr 已提交
31

32
#include <atomic>
M
Matt Pharr 已提交
33 34 35 36
#include <cstring>
#include <iostream>
#include <map>

37
#ifdef PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
38 39
#include <cuda.h>
#include <cuda_runtime.h>
40
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
41 42 43

namespace pbrt {

44
STAT_MEMORY_COUNTER("Memory/Wavefront integrator pixel state", pathIntegratorBytes);
M
Matt Pharr 已提交
45

46 47 48
static void updateMaterialNeeds(
    Material m, pstd::array<bool, Material::NumTags()> *haveBasicEvalMaterial,
    pstd::array<bool, Material::NumTags()> *haveUniversalEvalMaterial,
49
    bool *haveSubsurface, bool *haveMedia) {
50 51 52 53 54 55 56 57 58 59 60 61
    if (!m)
        return;

    if (MixMaterial *mix = m.CastOrNullptr<MixMaterial>(); mix) {
        // This is a somewhat odd place for this check, but it's convenient...
        if (!m.CanEvaluateTextures(BasicTextureEvaluator()))
            ErrorExit("\"mix\" material has a texture that can't be evaluated with the "
                      "BasicTextureEvaluator, which is all that is currently supported "
                      "int the wavefront renderer--sorry! %s",
                      *mix);

        updateMaterialNeeds(mix->GetMaterial(0), haveBasicEvalMaterial,
62
                            haveUniversalEvalMaterial, haveSubsurface, haveMedia);
63
        updateMaterialNeeds(mix->GetMaterial(1), haveBasicEvalMaterial,
64
                            haveUniversalEvalMaterial, haveSubsurface, haveMedia);
65 66 67 68
        return;
    }

    *haveSubsurface |= m.HasSubsurfaceScattering();
69
    *haveMedia |= (m == nullptr);  // interface material
70 71 72 73 74 75 76 77

    FloatTexture displace = m.GetDisplacement();
    if (m.CanEvaluateTextures(BasicTextureEvaluator()) &&
        (!displace || BasicTextureEvaluator().CanEvaluate({displace}, {})))
        (*haveBasicEvalMaterial)[m.Tag()] = true;
    else
        (*haveUniversalEvalMaterial)[m.Tag()] = true;
}
78

79
WavefrontPathIntegrator::WavefrontPathIntegrator(
M
Matt Pharr 已提交
80
    pstd::pmr::memory_resource *memoryResource, BasicScene &scene)
81
    : memoryResource(memoryResource) {
82 83
    ThreadLocal<Allocator> threadAllocators(
        [memoryResource]() { return Allocator(memoryResource); });
84

85
    Allocator alloc = threadAllocators.Get();
86

M
Matt Pharr 已提交
87
    // Allocate all of the data structures that represent the scene...
88
    std::map<std::string, Medium> media = scene.CreateMedia();
M
Matt Pharr 已提交
89

90 91 92 93 94
    // "haveMedia" is a bit of a misnomer in that determines both whether
    // queues are allocated for the medium sampling kernels and they are
    // launched as well as whether the ray marching shadow ray kernel is
    // launched... Thus, it will be true if there actually are no media,
    // but some "interface" materials are present in the scene.
M
Matt Pharr 已提交
95
    haveMedia = false;
96
    // Check the shapes and instance definitions...
M
Matt Pharr 已提交
97 98 99 100 101 102
    for (const auto &shape : scene.shapes)
        if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
            haveMedia = true;
    for (const auto &shape : scene.animatedShapes)
        if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
            haveMedia = true;
103 104 105 106 107 108 109 110
    for (const auto &instanceDefinition: scene.instanceDefinitions) {
        for (const auto &shape : instanceDefinition.second->shapes)
            if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
                haveMedia = true;
        for (const auto &shape : instanceDefinition.second->animatedShapes)
            if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
                haveMedia = true;
    }
M
Matt Pharr 已提交
111

112 113
    // Textures
    LOG_VERBOSE("Starting to create textures");
114
    NamedTextures textures = scene.CreateTextures();
115 116
    LOG_VERBOSE("Done creating textures");

117
    LOG_VERBOSE("Starting to create lights");
118
    pstd::vector<Light> allLights;
119
    std::map<int, pstd::vector<Light> *> shapeIndexToAreaLights;
120

121
    infiniteLights = alloc.new_object<pstd::vector<Light>>(alloc);
M
Matt Pharr 已提交
122

123
    for (Light l : scene.CreateLights(textures, &shapeIndexToAreaLights)) {
M
Matt Pharr 已提交
124
        if (l.Is<UniformInfiniteLight>() || l.Is<ImageInfiniteLight>() ||
125
            l.Is<PortalImageInfiniteLight>())
126
            infiniteLights->push_back(l);
M
Matt Pharr 已提交
127 128 129

        allLights.push_back(l);
    }
130
    LOG_VERBOSE("Done creating lights");
M
Matt Pharr 已提交
131

132 133 134
    LOG_VERBOSE("Starting to create materials");
    std::map<std::string, pbrt::Material> namedMaterials;
    std::vector<pbrt::Material> materials;
M
Matt Pharr 已提交
135
    scene.CreateMaterials(textures, &namedMaterials, &materials);
136

M
Matt Pharr 已提交
137 138 139
    haveBasicEvalMaterial.fill(false);
    haveUniversalEvalMaterial.fill(false);
    haveSubsurface = false;
140 141
    for (Material m : materials)
        updateMaterialNeeds(m, &haveBasicEvalMaterial, &haveUniversalEvalMaterial,
142
                            &haveSubsurface, &haveMedia);
143 144
    for (const auto &m : namedMaterials)
        updateMaterialNeeds(m.second, &haveBasicEvalMaterial, &haveUniversalEvalMaterial,
145
                            &haveSubsurface, &haveMedia);
146 147
    LOG_VERBOSE("Finished creating materials");

148 149
    if (Options->useGPU) {
#ifdef PBRT_BUILD_GPU_RENDERER
150 151 152
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
153 154
        aggregate = new OptiXAggregate(scene, mr, textures, shapeIndexToAreaLights, media,
                                       namedMaterials, materials);
155 156 157 158
#else
        LOG_FATAL("Options->useGPU was set without PBRT_BUILD_GPU_RENDERER enabled");
#endif
    } else
159 160
        aggregate = new CPUAggregate(scene, textures, shapeIndexToAreaLights, media,
                                     namedMaterials, materials);
M
Matt Pharr 已提交
161 162

    // Preprocess the light sources
163
    for (Light light : allLights)
164
        light.Preprocess(aggregate->Bounds());
M
Matt Pharr 已提交
165 166 167 168 169 170 171

    bool haveLights = !allLights.empty();
    for (const auto &m : media)
        haveLights |= m.second.IsEmissive();
    if (!haveLights)
        ErrorExit("No light sources specified");

172
    LOG_VERBOSE("Starting to create light sampler");
M
Matt Pharr 已提交
173 174 175 176
    std::string lightSamplerName =
        scene.integrator.parameters.GetOneString("lightsampler", "bvh");
    if (allLights.size() == 1)
        lightSamplerName = "uniform";
177
    lightSampler = LightSampler::Create(lightSamplerName, allLights, alloc);
178
    LOG_VERBOSE("Finished creating light sampler");
M
Matt Pharr 已提交
179

180
    if (scene.integrator.name != "path" && scene.integrator.name != "volpath")
181
        Warning(&scene.integrator.loc,
182
                "Ignoring specified integrator \"%s\": the wavefront integrator "
183 184
                "always uses a \"volpath\" integrator.",
                scene.integrator.name);
185

M
Matt Pharr 已提交
186 187 188 189
    // Integrator parameters
    regularize = scene.integrator.parameters.GetOneBool("regularize", false);
    maxDepth = scene.integrator.parameters.GetOneInt("maxdepth", 5);

190 191 192 193 194 195 196 197
    camera = scene.GetCamera();
    film = camera.GetFilm();
    filter = film.GetFilter();
    sampler = scene.GetSampler();

    initializeVisibleSurface = film.UsesVisibleSurface();
    samplesPerPixel = sampler.SamplesPerPixel();

198 199
    // Warn about unsupported stuff...
    if (Options->forceDiffuse)
200
        ErrorExit("The wavefront integrator does not support --force-diffuse.");
201
    if (Options->writePartialImages)
202
        Warning("The wavefront integrator does not support --write-partial-images.");
203
    if (Options->recordPixelStatistics)
204
        ErrorExit("The wavefront integrator does not support --pixelstats.");
205
    if (!Options->mseReferenceImage.empty())
206
        ErrorExit("The wavefront integrator does not support --mse-reference-image.");
207
    if (!Options->mseReferenceOutput.empty())
208
        ErrorExit("The wavefront integrator does not support --mse-reference-out.");
209

210 211
        ///////////////////////////////////////////////////////////////////////////
        // Allocate storage for all of the queues/buffers...
M
Matt Pharr 已提交
212

213
#ifdef PBRT_BUILD_GPU_RENDERER
214 215 216 217 218 219 220
    size_t startSize = 0;
    if (Options->useGPU) {
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
        startSize = mr->BytesAllocated();
    }
221
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
222

223
    // Compute number of scanlines to render per pass
M
Matt Pharr 已提交
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
    Vector2i resolution = film.PixelBounds().Diagonal();
    // TODO: make this configurable. Base it on the amount of GPU memory?
    int maxSamples = 1024 * 1024;
    scanlinesPerPass = std::max(1, maxSamples / resolution.x);
    int nPasses = (resolution.y + scanlinesPerPass - 1) / scanlinesPerPass;
    scanlinesPerPass = (resolution.y + nPasses - 1) / nPasses;
    maxQueueSize = resolution.x * scanlinesPerPass;
    LOG_VERBOSE("Will render in %d passes %d scanlines per pass\n", nPasses,
                scanlinesPerPass);

    pixelSampleState = SOA<PixelSampleState>(maxQueueSize, alloc);

    rayQueues[0] = alloc.new_object<RayQueue>(maxQueueSize, alloc);
    rayQueues[1] = alloc.new_object<RayQueue>(maxQueueSize, alloc);

    shadowRayQueue = alloc.new_object<ShadowRayQueue>(maxQueueSize, alloc);

    if (haveSubsurface) {
        bssrdfEvalQueue =
            alloc.new_object<GetBSSRDFAndProbeRayQueue>(maxQueueSize, alloc);
        subsurfaceScatterQueue =
            alloc.new_object<SubsurfaceScatterQueue>(maxQueueSize, alloc);
    }

248
    if (infiniteLights->size())
M
Matt Pharr 已提交
249 250 251 252 253 254 255 256 257 258 259 260 261
        escapedRayQueue = alloc.new_object<EscapedRayQueue>(maxQueueSize, alloc);
    hitAreaLightQueue = alloc.new_object<HitAreaLightQueue>(maxQueueSize, alloc);

    basicEvalMaterialQueue = alloc.new_object<MaterialEvalQueue>(
        maxQueueSize, alloc,
        pstd::MakeConstSpan(&haveBasicEvalMaterial[1], haveBasicEvalMaterial.size() - 1));
    universalEvalMaterialQueue = alloc.new_object<MaterialEvalQueue>(
        maxQueueSize, alloc,
        pstd::MakeConstSpan(&haveUniversalEvalMaterial[1],
                            haveUniversalEvalMaterial.size() - 1));

    if (haveMedia) {
        mediumSampleQueue = alloc.new_object<MediumSampleQueue>(maxQueueSize, alloc);
262 263 264 265 266 267 268 269

        // TODO: in the presence of multiple PhaseFunction implementations,
        // it could be worthwhile to see which are present in the scene and
        // then initialize havePhase accordingly...
        pstd::array<bool, PhaseFunction::NumTags()> havePhase;
        havePhase.fill(true);
        mediumScatterQueue =
            alloc.new_object<MediumScatterQueue>(maxQueueSize, alloc, havePhase);
M
Matt Pharr 已提交
270 271 272 273
    }

    stats = alloc.new_object<Stats>(maxDepth, alloc);

274
#ifdef PBRT_BUILD_GPU_RENDERER
275 276 277 278 279 280 281
    if (Options->useGPU) {
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
        size_t endSize = mr->BytesAllocated();
        pathIntegratorBytes += endSize - startSize;
    }
282
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
283 284
}

285 286
// WavefrontPathIntegrator Method Definitions
Float WavefrontPathIntegrator::Render() {
287 288 289
    Bounds2i pixelBounds = film.PixelBounds();
    Vector2i resolution = pixelBounds.Diagonal();
    Timer timer;
290
    // Prefetch allocations to GPU memory
291 292 293 294 295 296
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU) {
        int deviceIndex;
        CUDA_CHECK(cudaGetDevice(&deviceIndex));
        int hasConcurrentManagedAccess;
        CUDA_CHECK(cudaDeviceGetAttribute(&hasConcurrentManagedAccess,
297 298
                                          cudaDevAttrConcurrentManagedAccess,
                                          deviceIndex));
299 300 301 302 303 304 305 306 307 308

        // Copy all of the scene data structures over to GPU memory.  This
        // ensures that there isn't a big performance hitch for the first batch
        // of rays as that stuff is copied over on demand.
        if (hasConcurrentManagedAccess) {
            // Set things up so that we can still have read from the
            // WavefrontPathIntegrator struct on the CPU without hurting
            // performance. (This makes it possible to use the values of things
            // like WavefrontPathIntegrator::haveSubsurface to conditionally launch
            // kernels according to what's in the scene...)
309 310
            CUDA_CHECK(cudaMemAdvise(this, sizeof(*this), cudaMemAdviseSetReadMostly,
                                     /* ignored argument */ 0));
311 312 313 314 315 316 317
            CUDA_CHECK(cudaMemAdvise(this, sizeof(*this),
                                     cudaMemAdviseSetPreferredLocation, deviceIndex));

            // Copy all of the scene data structures over to GPU memory.  This
            // ensures that there isn't a big performance hitch for the first batch
            // of rays as that stuff is copied over on demand.
            CUDATrackedMemoryResource *mr =
318
                dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
M
Matt Pharr 已提交
319
            CHECK(mr);
320 321 322 323 324 325 326
            mr->PrefetchToGPU();
        } else {
            // TODO: on systems with basic unified memory, just launching a
            // kernel should cause everything to be copied over. Is an empty
            // kernel sufficient?
        }
    }
327
#endif  // PBRT_BUILD_GPU_RENDERER
328

329
    // Launch thread to copy image for display server, if enabled
M
Matt Pharr 已提交
330 331 332 333 334
    RGB *displayRGB = nullptr, *displayRGBHost = nullptr;
    std::atomic<bool> exitCopyThread{false};
    std::thread copyThread;

    if (!Options->displayServer.empty()) {
335 336 337 338
#ifdef PBRT_BUILD_GPU_RENDERER
        if (Options->useGPU) {
            // Allocate staging memory on the GPU to store the current WIP
            // image.
339 340 341 342
            CUDA_CHECK(
                cudaMalloc(&displayRGB, resolution.x * resolution.y * sizeof(RGB)));
            CUDA_CHECK(
                cudaMemset(displayRGB, 0, resolution.x * resolution.y * sizeof(RGB)));
343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368

            // Host-side memory for the WIP Image.  We'll just let this leak so
            // that the lambda passed to DisplayDynamic below doesn't access
            // freed memory after Render() returns...
            displayRGBHost = new RGB[resolution.x * resolution.y];

            copyThread = std::thread([&]() {
                GPURegisterThread("DISPLAY_SERVER_COPY_THREAD");

                // Copy back to the CPU using a separate stream so that we can
                // periodically but asynchronously pick up the latest results
                // from the GPU.
                cudaStream_t memcpyStream;
                CUDA_CHECK(cudaStreamCreate(&memcpyStream));
                GPUNameStream(memcpyStream, "DISPLAY_SERVER_COPY_STREAM");

                // Copy back to the host from the GPU buffer, without any
                // synthronization.
                while (!exitCopyThread) {
                    CUDA_CHECK(cudaMemcpyAsync(displayRGBHost, displayRGB,
                                               resolution.x * resolution.y * sizeof(RGB),
                                               cudaMemcpyDeviceToHost, memcpyStream));
                    std::this_thread::sleep_for(std::chrono::milliseconds(50));

                    CUDA_CHECK(cudaStreamSynchronize(memcpyStream));
                }
M
Matt Pharr 已提交
369

370 371 372 373 374 375 376 377 378 379
                // Copy one more time to get the final image before exiting.
                CUDA_CHECK(cudaMemcpy(displayRGBHost, displayRGB,
                                      resolution.x * resolution.y * sizeof(RGB),
                                      cudaMemcpyDeviceToHost));
                CUDA_CHECK(cudaDeviceSynchronize());
            });

            // Now on the CPU side, give the display system a lambda that
            // copies values from |displayRGBHost| into its buffers used for
            // sending messages to the display program (i.e., tev).
380 381
            DisplayDynamic(film.GetFilename(), {resolution.x, resolution.y},
                           {"R", "G", "B"},
382
                           [resolution, displayRGBHost](
383
                               Bounds2i b, pstd::span<pstd::span<Float>> displayValue) {
384 385 386 387 388 389 390 391 392 393
                               int index = 0;
                               for (Point2i p : b) {
                                   RGB rgb = displayRGBHost[p.x + p.y * resolution.x];
                                   displayValue[0][index] = rgb.r;
                                   displayValue[1][index] = rgb.g;
                                   displayValue[2][index] = rgb.b;
                                   ++index;
                               }
                           });
        } else
394 395 396 397 398 399 400 401 402 403 404 405 406 407
#endif  // PBRT_BUILD_GPU_RENDERER
            DisplayDynamic(
                film.GetFilename(), Point2i(pixelBounds.Diagonal()), {"R", "G", "B"},
                [pixelBounds, this](Bounds2i b,
                                    pstd::span<pstd::span<Float>> displayValue) {
                    int index = 0;
                    for (Point2i p : b) {
                        RGB rgb =
                            film.GetPixelRGB(pixelBounds.pMin + p, 1.f /* splat scale */);
                        for (int c = 0; c < 3; ++c)
                            displayValue[c][index] = rgb[c];
                        ++index;
                    }
                });
M
Matt Pharr 已提交
408 409
    }

410
    // Loop over sample indices and evaluate pixel samples
411
    int firstSampleIndex = 0, lastSampleIndex = samplesPerPixel;
412
    // Update sample index range based on debug start, if provided
M
Matt Pharr 已提交
413
    if (!Options->debugStart.empty()) {
414
        std::vector<int> values = SplitStringToInts(Options->debugStart, ',');
415 416
        if (values.size() != 1 && values.size() != 2)
            ErrorExit("Expected either one or two integer values for --debugstart.");
417

418
        firstSampleIndex = values[0];
419 420 421 422
        if (values.size() == 2)
            lastSampleIndex = firstSampleIndex + values[1];
        else
            lastSampleIndex = firstSampleIndex + 1;
M
Matt Pharr 已提交
423 424
    }

425
    ProgressReporter progress(lastSampleIndex - firstSampleIndex, "Rendering",
426
                              Options->quiet, Options->useGPU);
427 428
    for (int sampleIndex = firstSampleIndex; sampleIndex < lastSampleIndex;
         ++sampleIndex) {
M
Matt Pharr 已提交
429 430
        // Attempt to work around issue #145.
#if !(defined(PBRT_IS_WINDOWS) && defined(PBRT_BUILD_GPU_RENDERER) && \
M
Matt Pharr 已提交
431
      __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 1)
432
        CheckCallbackScope _([&]() {
433
            return StringPrintf("Wavefront rendering failed at sample %d. Debug with "
434 435 436
                                "\"--debugstart %d\"\n",
                                sampleIndex, sampleIndex);
        });
M
Matt Pharr 已提交
437
#endif
438

439
        // Render image for sample _sampleIndex_
M
Matt Pharr 已提交
440
        LOG_VERBOSE("Starting to submit work for sample %d", sampleIndex);
M
Matt Pharr 已提交
441 442
        for (int y0 = pixelBounds.pMin.y; y0 < pixelBounds.pMax.y;
             y0 += scanlinesPerPass) {
443
            // Generate camera rays for current scanline range
444
            RayQueue *cameraRayQueue = CurrentRayQueue(0);
445 446
            Do(
                "Reset ray queue", PBRT_CPU_GPU_LAMBDA() {
447
                    PBRT_DBG("Starting scanlines at y0 = %d, sample %d / %d\n", y0,
448
                             sampleIndex, samplesPerPixel);
449 450
                    cameraRayQueue->Reset();
                });
M
Matt Pharr 已提交
451
            GenerateCameraRays(y0, sampleIndex);
452
            Do(
453
                "Update camera ray stats",
454
                PBRT_CPU_GPU_LAMBDA() { stats->cameraRays += cameraRayQueue->Size(); });
M
Matt Pharr 已提交
455

M
Matt Pharr 已提交
456
            // Trace rays and estimate radiance up to maximum ray depth
457
            for (int wavefrontDepth = 0; true; ++wavefrontDepth) {
458
                // Reset queues before tracing rays
459
                RayQueue *nextQueue = NextRayQueue(wavefrontDepth);
460 461
                Do(
                    "Reset queues before tracing rays", PBRT_CPU_GPU_LAMBDA() {
M
Matt Pharr 已提交
462
                        nextQueue->Reset();
463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
                        // Reset queues before tracing next batch of rays
                        if (mediumSampleQueue)
                            mediumSampleQueue->Reset();
                        if (mediumScatterQueue)
                            mediumScatterQueue->Reset();

                        if (escapedRayQueue)
                            escapedRayQueue->Reset();
                        hitAreaLightQueue->Reset();

                        basicEvalMaterialQueue->Reset();
                        universalEvalMaterialQueue->Reset();

                        if (bssrdfEvalQueue)
                            bssrdfEvalQueue->Reset();
                        if (subsurfaceScatterQueue)
                            subsurfaceScatterQueue->Reset();
                    });

                // Follow active ray paths and accumulate radiance estimates
483
                GenerateRaySamples(wavefrontDepth, sampleIndex);
484

485
                // Find closest intersections along active rays
486
                aggregate->IntersectClosest(
487 488 489
                    maxQueueSize, CurrentRayQueue(wavefrontDepth), escapedRayQueue,
                    hitAreaLightQueue, basicEvalMaterialQueue, universalEvalMaterialQueue,
                    mediumSampleQueue, NextRayQueue(wavefrontDepth));
M
Matt Pharr 已提交
490

491
                if (wavefrontDepth > 0) {
492
                    // As above, with the indexing...
493
                    RayQueue *statsQueue = CurrentRayQueue(wavefrontDepth);
494 495
                    Do(
                        "Update indirect ray stats", PBRT_CPU_GPU_LAMBDA() {
496
                            stats->indirectRays[wavefrontDepth] += statsQueue->Size();
497
                        });
498
                }
499 500 501 502 503 504 505

                SampleMediumInteraction(wavefrontDepth);

                HandleEscapedRays();

                HandleEmissiveIntersection();

506
                if (wavefrontDepth == maxDepth)
M
Matt Pharr 已提交
507
                    break;
508

509
                EvaluateMaterialsAndBSDFs(wavefrontDepth);
510

511
                // Do immediately so that we have space for shadow rays for subsurface..
512
                TraceShadowRays(wavefrontDepth);
513 514

                SampleSubsurface(wavefrontDepth);
M
Matt Pharr 已提交
515
            }
M
Matt Pharr 已提交
516

M
Matt Pharr 已提交
517
            UpdateFilm();
518
            // Copy updated film pixels to buffer for display
519
#ifdef PBRT_BUILD_GPU_RENDERER
520
            if (Options->useGPU && !Options->displayServer.empty())
521 522
                GPUParallelFor(
                    "Update Display RGB Buffer", maxQueueSize,
523
                    PBRT_CPU_GPU_LAMBDA(int pixelIndex) {
524 525 526 527 528 529 530
                        Point2i pPixel = pixelSampleState.pPixel[pixelIndex];
                        if (!InsideExclusive(pPixel, film.PixelBounds()))
                            return;

                        Point2i p(pPixel - film.PixelBounds().pMin);
                        displayRGB[p.x + p.y * resolution.x] = film.GetPixelRGB(pPixel);
                    });
531
#endif  //  PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
532 533 534 535 536
        }

        progress.Update();
    }
    progress.Done();
537

538 539 540
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU)
        GPUWait();
541
#endif  // PBRT_BUILD_GPU_RENDERER
542
    Float seconds = timer.ElapsedSeconds();
543
    // Shut down display server thread, if active
544 545 546 547 548 549 550 551
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU) {
        // Wait until rendering is all done before we start to shut down the
        // display stuff..
        if (!Options->displayServer.empty()) {
            exitCopyThread = true;
            copyThread.join();
        }
552

553 554 555
        // Another synchronization to make sure no kernels are running on the
        // GPU so that we can safely access unified memory from the CPU.
        GPUWait();
M
Matt Pharr 已提交
556
    }
557
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
558

559
    return seconds;
M
Matt Pharr 已提交
560 561
}

562
void WavefrontPathIntegrator::HandleEscapedRays() {
563 564
    if (!escapedRayQueue)
        return;
565 566
    ForAllQueued(
        "Handle escaped rays", escapedRayQueue, maxQueueSize,
567
        PBRT_CPU_GPU_LAMBDA(const EscapedRayWorkItem w) {
568
            // Compute weighted radiance for escaped ray
569
            SampledSpectrum L(0.f);
570
            for (const auto &light : *infiniteLights) {
571 572
                if (SampledSpectrum Le = light.Le(Ray(w.rayo, w.rayd), w.lambda); Le) {
                    // Compute path radiance contribution from infinite light
573 574
                    PBRT_DBG("L %f %f %f %f beta %f %f %f %f Le %f %f %f %f", L[0], L[1],
                             L[2], L[3], w.beta[0], w.beta[1], w.beta[2], w.beta[3],
575
                             Le[0], Le[1], Le[2], Le[3]);
576
                    PBRT_DBG("pdf uni %f %f %f %f pdf nee %f %f %f %f", w.inv_w_u[0],
M
Matt Pharr 已提交
577 578
                             w.inv_w_u[1], w.inv_w_u[2], w.inv_w_u[3], w.inv_w_l[0],
                             w.inv_w_l[1], w.inv_w_l[2], w.inv_w_l[3]);
579

580
                    if (w.depth == 0 || w.specularBounce) {
581
                        L += w.beta * Le / w.inv_w_u.Average();
582 583 584
                    } else {
                        // Compute MIS-weighted radiance contribution from infinite light
                        LightSampleContext ctx = w.prevIntrCtx;
585
                        Float lightChoicePDF = lightSampler.PMF(ctx, light);
M
Matt Pharr 已提交
586 587
                        SampledSpectrum inv_w_l =
                            w.inv_w_l * lightChoicePDF * light.PDF_Li(ctx, w.rayd, true);
588
                        L += w.beta * Le / (w.inv_w_u + inv_w_l).Average();
589 590
                    }
                }
591
            }
592

593
            // Update pixel radiance if ray's radiance is nonzero
594
            if (L) {
595 596
                PBRT_DBG("Added L %f %f %f %f for escaped ray pixel index %d\n", L[0],
                         L[1], L[2], L[3], w.pixelIndex);
597

598 599 600
                L += pixelSampleState.L[w.pixelIndex];
                pixelSampleState.L[w.pixelIndex] = L;
            }
601
        });
M
Matt Pharr 已提交
602 603
}

604
void WavefrontPathIntegrator::HandleEmissiveIntersection() {
M
Matt Pharr 已提交
605 606
    ForAllQueued(
        "Handle emitters hit by indirect rays", hitAreaLightQueue, maxQueueSize,
607
        PBRT_CPU_GPU_LAMBDA(const HitAreaLightWorkItem w) {
608
            // Find emitted radiance from surface that ray hit
M
Matt Pharr 已提交
609
            SampledSpectrum Le = w.areaLight.L(w.p, w.n, w.uv, w.wo, w.lambda);
M
Matt Pharr 已提交
610 611
            if (!Le)
                return;
612
            PBRT_DBG("Got Le %f %f %f %f from hit area light at depth %d\n", Le[0], Le[1],
613
                     Le[2], Le[3], w.depth);
M
Matt Pharr 已提交
614

615
            // Compute area light's weighted radiance contribution to the path
616
            SampledSpectrum L(0.f);
617
            if (w.depth == 0 || w.specularBounce) {
618
                L = w.beta * Le / w.inv_w_u.Average();
M
Matt Pharr 已提交
619
            } else {
620
                // Compute MIS-weighted radiance contribution from area light
621 622
                Vector3f wi = -w.wo;
                LightSampleContext ctx = w.prevIntrCtx;
623
                Float lightChoicePDF = lightSampler.PMF(ctx, w.areaLight);
624
                Float lightPDF = lightChoicePDF * w.areaLight.PDF_Li(ctx, wi, true);
M
Matt Pharr 已提交
625

626 627 628
                SampledSpectrum inv_w_u = w.inv_w_u;
                SampledSpectrum inv_w_l = w.inv_w_l * lightPDF;
                L = w.beta * Le / (inv_w_u + inv_w_l).Average();
M
Matt Pharr 已提交
629 630
            }

631
            PBRT_DBG("Added L %f %f %f %f for pixel index %d\n", L[0], L[1], L[2], L[3],
632
                     w.pixelIndex);
633

634
            // Update _L_ in _PixelSampleState_ for area light's radiance
635 636
            L += pixelSampleState.L[w.pixelIndex];
            pixelSampleState.L[w.pixelIndex] = L;
M
Matt Pharr 已提交
637 638 639
        });
}

640
void WavefrontPathIntegrator::TraceShadowRays(int wavefrontDepth) {
641
    if (haveMedia)
642
        aggregate->IntersectShadowTr(maxQueueSize, shadowRayQueue, &pixelSampleState);
643
    else
644
        aggregate->IntersectShadow(maxQueueSize, shadowRayQueue, &pixelSampleState);
645
    // Reset shadow ray queue
646 647
    Do(
        "Reset shadowRayQueue", PBRT_CPU_GPU_LAMBDA() {
648
            stats->shadowRays[wavefrontDepth] += shadowRayQueue->Size();
649 650 651 652
            shadowRayQueue->Reset();
        });
}

653
WavefrontPathIntegrator::Stats::Stats(int maxDepth, Allocator alloc)
M
Matt Pharr 已提交
654 655
    : indirectRays(maxDepth + 1, alloc), shadowRays(maxDepth, alloc) {}

656
std::string WavefrontPathIntegrator::Stats::Print() const {
M
Matt Pharr 已提交
657 658 659 660 661 662 663 664 665 666 667 668 669
    std::string s;
    s += StringPrintf("    %-42s               %12" PRIu64 "\n", "Camera rays",
                      cameraRays);
    for (int i = 1; i < indirectRays.size(); ++i)
        s += StringPrintf("    %-42s               %12" PRIu64 "\n",
                          StringPrintf("Indirect rays, depth %-3d", i), indirectRays[i]);
    for (int i = 0; i < shadowRays.size(); ++i)
        s += StringPrintf("    %-42s               %12" PRIu64 "\n",
                          StringPrintf("Shadow rays, depth %-3d", i), shadowRays[i]);
    return s;
}

}  // namespace pbrt