integrator.cpp 28.0 KB
Newer Older
M
Matt Pharr 已提交
1 2 3 4
// pbrt is Copyright(c) 1998-2020 Matt Pharr, Wenzel Jakob, and Greg Humphreys.
// The pbrt source code is licensed under the Apache License, Version 2.0.
// SPDX: Apache-2.0

5
#include <pbrt/wavefront/integrator.h>
M
Matt Pharr 已提交
6 7 8 9 10

#include <pbrt/base/medium.h>
#include <pbrt/cameras.h>
#include <pbrt/film.h>
#include <pbrt/filters.h>
11 12 13
#ifdef PBRT_BUILD_GPU_RENDERER
#include <pbrt/gpu/aggregate.h>
#include <pbrt/gpu/memory.h>
14
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
15 16 17 18 19 20 21 22 23 24 25
#include <pbrt/lights.h>
#include <pbrt/lightsamplers.h>
#include <pbrt/util/color.h>
#include <pbrt/util/colorspace.h>
#include <pbrt/util/display.h>
#include <pbrt/util/file.h>
#include <pbrt/util/image.h>
#include <pbrt/util/log.h>
#include <pbrt/util/print.h>
#include <pbrt/util/progressreporter.h>
#include <pbrt/util/pstd.h>
26
#include <pbrt/util/spectrum.h>
M
Matt Pharr 已提交
27
#include <pbrt/util/stats.h>
M
Matt Pharr 已提交
28
#include <pbrt/util/string.h>
M
Matt Pharr 已提交
29
#include <pbrt/util/taggedptr.h>
30
#include <pbrt/wavefront/aggregate.h>
M
Matt Pharr 已提交
31

32
#include <atomic>
M
Matt Pharr 已提交
33 34 35 36
#include <cstring>
#include <iostream>
#include <map>

37
#ifdef PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
38 39
#include <cuda.h>
#include <cuda_runtime.h>
40
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
41 42 43

namespace pbrt {

44
STAT_MEMORY_COUNTER("Memory/Wavefront integrator pixel state", pathIntegratorBytes);
M
Matt Pharr 已提交
45

46 47 48
static void updateMaterialNeeds(
    Material m, pstd::array<bool, Material::NumTags()> *haveBasicEvalMaterial,
    pstd::array<bool, Material::NumTags()> *haveUniversalEvalMaterial,
49
    bool *haveSubsurface, bool *haveMedia) {
50 51 52 53 54 55 56 57 58 59 60 61
    if (!m)
        return;

    if (MixMaterial *mix = m.CastOrNullptr<MixMaterial>(); mix) {
        // This is a somewhat odd place for this check, but it's convenient...
        if (!m.CanEvaluateTextures(BasicTextureEvaluator()))
            ErrorExit("\"mix\" material has a texture that can't be evaluated with the "
                      "BasicTextureEvaluator, which is all that is currently supported "
                      "int the wavefront renderer--sorry! %s",
                      *mix);

        updateMaterialNeeds(mix->GetMaterial(0), haveBasicEvalMaterial,
62
                            haveUniversalEvalMaterial, haveSubsurface, haveMedia);
63
        updateMaterialNeeds(mix->GetMaterial(1), haveBasicEvalMaterial,
64
                            haveUniversalEvalMaterial, haveSubsurface, haveMedia);
65 66 67 68
        return;
    }

    *haveSubsurface |= m.HasSubsurfaceScattering();
69
    *haveMedia |= (m == nullptr);  // interface material
70 71 72 73 74 75 76 77

    FloatTexture displace = m.GetDisplacement();
    if (m.CanEvaluateTextures(BasicTextureEvaluator()) &&
        (!displace || BasicTextureEvaluator().CanEvaluate({displace}, {})))
        (*haveBasicEvalMaterial)[m.Tag()] = true;
    else
        (*haveUniversalEvalMaterial)[m.Tag()] = true;
}
78

79
WavefrontPathIntegrator::WavefrontPathIntegrator(
M
Matt Pharr 已提交
80
    pstd::pmr::memory_resource *memoryResource, BasicScene &scene)
81
    : memoryResource(memoryResource) {
82 83
    ThreadLocal<Allocator> threadAllocators(
        [memoryResource]() { return Allocator(memoryResource); });
84

85
    Allocator alloc = threadAllocators.Get();
86

M
Matt Pharr 已提交
87
    // Allocate all of the data structures that represent the scene...
88
    std::map<std::string, Medium> media = scene.CreateMedia();
M
Matt Pharr 已提交
89

90 91 92 93 94
    // "haveMedia" is a bit of a misnomer in that determines both whether
    // queues are allocated for the medium sampling kernels and they are
    // launched as well as whether the ray marching shadow ray kernel is
    // launched... Thus, it will be true if there actually are no media,
    // but some "interface" materials are present in the scene.
M
Matt Pharr 已提交
95 96 97 98 99 100 101 102 103
    haveMedia = false;
    // Check the shapes...
    for (const auto &shape : scene.shapes)
        if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
            haveMedia = true;
    for (const auto &shape : scene.animatedShapes)
        if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
            haveMedia = true;

104
    auto findMedium = [&](const std::string &s, const FileLoc *loc) -> Medium {
M
Matt Pharr 已提交
105 106 107 108 109 110 111 112 113 114
        if (s.empty())
            return nullptr;

        auto iter = media.find(s);
        if (iter == media.end())
            ErrorExit(loc, "%s: medium not defined", s);
        haveMedia = true;
        return iter->second;
    };

115 116
    // Textures
    LOG_VERBOSE("Starting to create textures");
117
    NamedTextures textures = scene.CreateTextures();
118 119
    LOG_VERBOSE("Done creating textures");

120
    LOG_VERBOSE("Starting to create lights");
121
    pstd::vector<Light> allLights;
122
    std::map<int, pstd::vector<Light> *> shapeIndexToAreaLights;
123

124
    infiniteLights = alloc.new_object<pstd::vector<Light>>(alloc);
M
Matt Pharr 已提交
125

126
    for (Light l : scene.CreateLights(textures, &shapeIndexToAreaLights)) {
M
Matt Pharr 已提交
127
        if (l.Is<UniformInfiniteLight>() || l.Is<ImageInfiniteLight>() ||
128
            l.Is<PortalImageInfiniteLight>())
129
            infiniteLights->push_back(l);
M
Matt Pharr 已提交
130 131 132

        allLights.push_back(l);
    }
133
    LOG_VERBOSE("Done creating lights");
M
Matt Pharr 已提交
134

135 136 137
    LOG_VERBOSE("Starting to create materials");
    std::map<std::string, pbrt::Material> namedMaterials;
    std::vector<pbrt::Material> materials;
M
Matt Pharr 已提交
138
    scene.CreateMaterials(textures, &namedMaterials, &materials);
139

M
Matt Pharr 已提交
140 141 142
    haveBasicEvalMaterial.fill(false);
    haveUniversalEvalMaterial.fill(false);
    haveSubsurface = false;
143 144
    for (Material m : materials)
        updateMaterialNeeds(m, &haveBasicEvalMaterial, &haveUniversalEvalMaterial,
145
                            &haveSubsurface, &haveMedia);
146 147
    for (const auto &m : namedMaterials)
        updateMaterialNeeds(m.second, &haveBasicEvalMaterial, &haveUniversalEvalMaterial,
148
                            &haveSubsurface, &haveMedia);
149 150
    LOG_VERBOSE("Finished creating materials");

151 152
    if (Options->useGPU) {
#ifdef PBRT_BUILD_GPU_RENDERER
153 154 155
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
156 157
        aggregate = new OptiXAggregate(scene, mr, textures, shapeIndexToAreaLights, media,
                                       namedMaterials, materials);
158 159 160 161
#else
        LOG_FATAL("Options->useGPU was set without PBRT_BUILD_GPU_RENDERER enabled");
#endif
    } else
162 163
        aggregate = new CPUAggregate(scene, textures, shapeIndexToAreaLights, media,
                                     namedMaterials, materials);
M
Matt Pharr 已提交
164 165

    // Preprocess the light sources
166
    for (Light light : allLights)
167
        light.Preprocess(aggregate->Bounds());
M
Matt Pharr 已提交
168 169 170 171 172 173 174 175 176 177 178

    bool haveLights = !allLights.empty();
    for (const auto &m : media)
        haveLights |= m.second.IsEmissive();
    if (!haveLights)
        ErrorExit("No light sources specified");

    std::string lightSamplerName =
        scene.integrator.parameters.GetOneString("lightsampler", "bvh");
    if (allLights.size() == 1)
        lightSamplerName = "uniform";
179
    lightSampler = LightSampler::Create(lightSamplerName, allLights, alloc);
M
Matt Pharr 已提交
180

181
    if (scene.integrator.name != "path" && scene.integrator.name != "volpath")
182
        Warning(&scene.integrator.loc,
183
                "Ignoring specified integrator \"%s\": the wavefront integrator "
184 185
                "always uses a \"volpath\" integrator.",
                scene.integrator.name);
186

M
Matt Pharr 已提交
187 188 189 190
    // Integrator parameters
    regularize = scene.integrator.parameters.GetOneBool("regularize", false);
    maxDepth = scene.integrator.parameters.GetOneInt("maxdepth", 5);

191 192 193 194 195 196 197 198
    camera = scene.GetCamera();
    film = camera.GetFilm();
    filter = film.GetFilter();
    sampler = scene.GetSampler();

    initializeVisibleSurface = film.UsesVisibleSurface();
    samplesPerPixel = sampler.SamplesPerPixel();

199 200
    // Warn about unsupported stuff...
    if (Options->forceDiffuse)
201
        Warning("The wavefront integrator does not support --force-diffuse.");
202
    if (Options->writePartialImages)
203
        Warning("The wavefront integrator does not support --write-partial-images.");
204
    if (Options->recordPixelStatistics)
205
        Warning("The wavefront integrator does not support --pixelstats.");
206
    if (!Options->mseReferenceImage.empty())
207
        Warning("The wavefront integrator does not support --mse-reference-image.");
208
    if (!Options->mseReferenceOutput.empty())
209
        Warning("The wavefront integrator does not support --mse-reference-out.");
210

211 212
        ///////////////////////////////////////////////////////////////////////////
        // Allocate storage for all of the queues/buffers...
M
Matt Pharr 已提交
213

214
#ifdef PBRT_BUILD_GPU_RENDERER
215 216 217 218 219 220 221
    size_t startSize = 0;
    if (Options->useGPU) {
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
        startSize = mr->BytesAllocated();
    }
222
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
223

224
    // Compute number of scanlines to render per pass
M
Matt Pharr 已提交
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
    Vector2i resolution = film.PixelBounds().Diagonal();
    // TODO: make this configurable. Base it on the amount of GPU memory?
    int maxSamples = 1024 * 1024;
    scanlinesPerPass = std::max(1, maxSamples / resolution.x);
    int nPasses = (resolution.y + scanlinesPerPass - 1) / scanlinesPerPass;
    scanlinesPerPass = (resolution.y + nPasses - 1) / nPasses;
    maxQueueSize = resolution.x * scanlinesPerPass;
    LOG_VERBOSE("Will render in %d passes %d scanlines per pass\n", nPasses,
                scanlinesPerPass);

    pixelSampleState = SOA<PixelSampleState>(maxQueueSize, alloc);

    rayQueues[0] = alloc.new_object<RayQueue>(maxQueueSize, alloc);
    rayQueues[1] = alloc.new_object<RayQueue>(maxQueueSize, alloc);

    shadowRayQueue = alloc.new_object<ShadowRayQueue>(maxQueueSize, alloc);

    if (haveSubsurface) {
        bssrdfEvalQueue =
            alloc.new_object<GetBSSRDFAndProbeRayQueue>(maxQueueSize, alloc);
        subsurfaceScatterQueue =
            alloc.new_object<SubsurfaceScatterQueue>(maxQueueSize, alloc);
    }

249
    if (infiniteLights->size())
M
Matt Pharr 已提交
250 251 252 253 254 255 256 257 258 259 260 261 262
        escapedRayQueue = alloc.new_object<EscapedRayQueue>(maxQueueSize, alloc);
    hitAreaLightQueue = alloc.new_object<HitAreaLightQueue>(maxQueueSize, alloc);

    basicEvalMaterialQueue = alloc.new_object<MaterialEvalQueue>(
        maxQueueSize, alloc,
        pstd::MakeConstSpan(&haveBasicEvalMaterial[1], haveBasicEvalMaterial.size() - 1));
    universalEvalMaterialQueue = alloc.new_object<MaterialEvalQueue>(
        maxQueueSize, alloc,
        pstd::MakeConstSpan(&haveUniversalEvalMaterial[1],
                            haveUniversalEvalMaterial.size() - 1));

    if (haveMedia) {
        mediumSampleQueue = alloc.new_object<MediumSampleQueue>(maxQueueSize, alloc);
263 264 265 266 267 268 269 270

        // TODO: in the presence of multiple PhaseFunction implementations,
        // it could be worthwhile to see which are present in the scene and
        // then initialize havePhase accordingly...
        pstd::array<bool, PhaseFunction::NumTags()> havePhase;
        havePhase.fill(true);
        mediumScatterQueue =
            alloc.new_object<MediumScatterQueue>(maxQueueSize, alloc, havePhase);
M
Matt Pharr 已提交
271 272 273 274
    }

    stats = alloc.new_object<Stats>(maxDepth, alloc);

275
#ifdef PBRT_BUILD_GPU_RENDERER
276 277 278 279 280 281 282
    if (Options->useGPU) {
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
        size_t endSize = mr->BytesAllocated();
        pathIntegratorBytes += endSize - startSize;
    }
283
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
284 285
}

286 287
// WavefrontPathIntegrator Method Definitions
Float WavefrontPathIntegrator::Render() {
288 289 290
    Bounds2i pixelBounds = film.PixelBounds();
    Vector2i resolution = pixelBounds.Diagonal();
    Timer timer;
291
    // Prefetch allocations to GPU memory
292 293 294 295 296 297
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU) {
        int deviceIndex;
        CUDA_CHECK(cudaGetDevice(&deviceIndex));
        int hasConcurrentManagedAccess;
        CUDA_CHECK(cudaDeviceGetAttribute(&hasConcurrentManagedAccess,
298 299
                                          cudaDevAttrConcurrentManagedAccess,
                                          deviceIndex));
300 301 302 303 304 305 306 307 308 309

        // Copy all of the scene data structures over to GPU memory.  This
        // ensures that there isn't a big performance hitch for the first batch
        // of rays as that stuff is copied over on demand.
        if (hasConcurrentManagedAccess) {
            // Set things up so that we can still have read from the
            // WavefrontPathIntegrator struct on the CPU without hurting
            // performance. (This makes it possible to use the values of things
            // like WavefrontPathIntegrator::haveSubsurface to conditionally launch
            // kernels according to what's in the scene...)
310 311
            CUDA_CHECK(cudaMemAdvise(this, sizeof(*this), cudaMemAdviseSetReadMostly,
                                     /* ignored argument */ 0));
312 313 314 315 316 317 318
            CUDA_CHECK(cudaMemAdvise(this, sizeof(*this),
                                     cudaMemAdviseSetPreferredLocation, deviceIndex));

            // Copy all of the scene data structures over to GPU memory.  This
            // ensures that there isn't a big performance hitch for the first batch
            // of rays as that stuff is copied over on demand.
            CUDATrackedMemoryResource *mr =
319
                dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
M
Matt Pharr 已提交
320
            CHECK(mr);
321 322 323 324 325 326 327
            mr->PrefetchToGPU();
        } else {
            // TODO: on systems with basic unified memory, just launching a
            // kernel should cause everything to be copied over. Is an empty
            // kernel sufficient?
        }
    }
328
#endif  // PBRT_BUILD_GPU_RENDERER
329

330
    // Launch thread to copy image for display server, if enabled
M
Matt Pharr 已提交
331 332 333 334 335
    RGB *displayRGB = nullptr, *displayRGBHost = nullptr;
    std::atomic<bool> exitCopyThread{false};
    std::thread copyThread;

    if (!Options->displayServer.empty()) {
336 337 338 339
#ifdef PBRT_BUILD_GPU_RENDERER
        if (Options->useGPU) {
            // Allocate staging memory on the GPU to store the current WIP
            // image.
340 341 342 343
            CUDA_CHECK(
                cudaMalloc(&displayRGB, resolution.x * resolution.y * sizeof(RGB)));
            CUDA_CHECK(
                cudaMemset(displayRGB, 0, resolution.x * resolution.y * sizeof(RGB)));
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369

            // Host-side memory for the WIP Image.  We'll just let this leak so
            // that the lambda passed to DisplayDynamic below doesn't access
            // freed memory after Render() returns...
            displayRGBHost = new RGB[resolution.x * resolution.y];

            copyThread = std::thread([&]() {
                GPURegisterThread("DISPLAY_SERVER_COPY_THREAD");

                // Copy back to the CPU using a separate stream so that we can
                // periodically but asynchronously pick up the latest results
                // from the GPU.
                cudaStream_t memcpyStream;
                CUDA_CHECK(cudaStreamCreate(&memcpyStream));
                GPUNameStream(memcpyStream, "DISPLAY_SERVER_COPY_STREAM");

                // Copy back to the host from the GPU buffer, without any
                // synthronization.
                while (!exitCopyThread) {
                    CUDA_CHECK(cudaMemcpyAsync(displayRGBHost, displayRGB,
                                               resolution.x * resolution.y * sizeof(RGB),
                                               cudaMemcpyDeviceToHost, memcpyStream));
                    std::this_thread::sleep_for(std::chrono::milliseconds(50));

                    CUDA_CHECK(cudaStreamSynchronize(memcpyStream));
                }
M
Matt Pharr 已提交
370

371 372 373 374 375 376 377 378 379 380
                // Copy one more time to get the final image before exiting.
                CUDA_CHECK(cudaMemcpy(displayRGBHost, displayRGB,
                                      resolution.x * resolution.y * sizeof(RGB),
                                      cudaMemcpyDeviceToHost));
                CUDA_CHECK(cudaDeviceSynchronize());
            });

            // Now on the CPU side, give the display system a lambda that
            // copies values from |displayRGBHost| into its buffers used for
            // sending messages to the display program (i.e., tev).
381 382
            DisplayDynamic(film.GetFilename(), {resolution.x, resolution.y},
                           {"R", "G", "B"},
383
                           [resolution, displayRGBHost](
384
                               Bounds2i b, pstd::span<pstd::span<Float>> displayValue) {
385 386 387 388 389 390 391 392 393 394
                               int index = 0;
                               for (Point2i p : b) {
                                   RGB rgb = displayRGBHost[p.x + p.y * resolution.x];
                                   displayValue[0][index] = rgb.r;
                                   displayValue[1][index] = rgb.g;
                                   displayValue[2][index] = rgb.b;
                                   ++index;
                               }
                           });
        } else
395 396 397 398 399 400 401 402 403 404 405 406 407 408
#endif  // PBRT_BUILD_GPU_RENDERER
            DisplayDynamic(
                film.GetFilename(), Point2i(pixelBounds.Diagonal()), {"R", "G", "B"},
                [pixelBounds, this](Bounds2i b,
                                    pstd::span<pstd::span<Float>> displayValue) {
                    int index = 0;
                    for (Point2i p : b) {
                        RGB rgb =
                            film.GetPixelRGB(pixelBounds.pMin + p, 1.f /* splat scale */);
                        for (int c = 0; c < 3; ++c)
                            displayValue[c][index] = rgb[c];
                        ++index;
                    }
                });
M
Matt Pharr 已提交
409 410
    }

411
    // Loop over sample indices and evaluate pixel samples
412
    int firstSampleIndex = 0, lastSampleIndex = samplesPerPixel;
413
    // Update sample index range based on debug start, if provided
M
Matt Pharr 已提交
414
    if (!Options->debugStart.empty()) {
415
        std::vector<int> values = SplitStringToInts(Options->debugStart, ',');
416 417
        if (values.size() != 1 && values.size() != 2)
            ErrorExit("Expected either one or two integer values for --debugstart.");
418

419
        firstSampleIndex = values[0];
420 421 422 423
        if (values.size() == 2)
            lastSampleIndex = firstSampleIndex + values[1];
        else
            lastSampleIndex = firstSampleIndex + 1;
M
Matt Pharr 已提交
424 425
    }

426
    ProgressReporter progress(lastSampleIndex - firstSampleIndex, "Rendering",
427
                              Options->quiet, Options->useGPU);
428 429
    for (int sampleIndex = firstSampleIndex; sampleIndex < lastSampleIndex;
         ++sampleIndex) {
M
Matt Pharr 已提交
430 431
        // Attempt to work around issue #145.
#if !(defined(PBRT_IS_WINDOWS) && defined(PBRT_BUILD_GPU_RENDERER) && \
M
Matt Pharr 已提交
432
      __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 1)
433
        CheckCallbackScope _([&]() {
434
            return StringPrintf("Wavefront rendering failed at sample %d. Debug with "
435 436 437
                                "\"--debugstart %d\"\n",
                                sampleIndex, sampleIndex);
        });
M
Matt Pharr 已提交
438
#endif
439

440
        // Render image for sample _sampleIndex_
M
Matt Pharr 已提交
441
        LOG_VERBOSE("Starting to submit work for sample %d", sampleIndex);
M
Matt Pharr 已提交
442 443
        for (int y0 = pixelBounds.pMin.y; y0 < pixelBounds.pMax.y;
             y0 += scanlinesPerPass) {
444
            // Generate camera rays for current scanline range
445
            RayQueue *cameraRayQueue = CurrentRayQueue(0);
446 447
            Do(
                "Reset ray queue", PBRT_CPU_GPU_LAMBDA() {
448
                    PBRT_DBG("Starting scanlines at y0 = %d, sample %d / %d\n", y0,
449
                             sampleIndex, samplesPerPixel);
450 451
                    cameraRayQueue->Reset();
                });
M
Matt Pharr 已提交
452
            GenerateCameraRays(y0, sampleIndex);
453
            Do(
454
                "Update camera ray stats",
455
                PBRT_CPU_GPU_LAMBDA() { stats->cameraRays += cameraRayQueue->Size(); });
M
Matt Pharr 已提交
456

M
Matt Pharr 已提交
457
            // Trace rays and estimate radiance up to maximum ray depth
458
            for (int wavefrontDepth = 0; true; ++wavefrontDepth) {
459
                // Reset queues before tracing rays
460
                RayQueue *nextQueue = NextRayQueue(wavefrontDepth);
461 462
                Do(
                    "Reset queues before tracing rays", PBRT_CPU_GPU_LAMBDA() {
M
Matt Pharr 已提交
463
                        nextQueue->Reset();
464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483
                        // Reset queues before tracing next batch of rays
                        if (mediumSampleQueue)
                            mediumSampleQueue->Reset();
                        if (mediumScatterQueue)
                            mediumScatterQueue->Reset();

                        if (escapedRayQueue)
                            escapedRayQueue->Reset();
                        hitAreaLightQueue->Reset();

                        basicEvalMaterialQueue->Reset();
                        universalEvalMaterialQueue->Reset();

                        if (bssrdfEvalQueue)
                            bssrdfEvalQueue->Reset();
                        if (subsurfaceScatterQueue)
                            subsurfaceScatterQueue->Reset();
                    });

                // Follow active ray paths and accumulate radiance estimates
484
                GenerateRaySamples(wavefrontDepth, sampleIndex);
485

486
                // Find closest intersections along active rays
487
                aggregate->IntersectClosest(
488 489 490
                    maxQueueSize, CurrentRayQueue(wavefrontDepth), escapedRayQueue,
                    hitAreaLightQueue, basicEvalMaterialQueue, universalEvalMaterialQueue,
                    mediumSampleQueue, NextRayQueue(wavefrontDepth));
M
Matt Pharr 已提交
491

492
                if (wavefrontDepth > 0) {
493
                    // As above, with the indexing...
494
                    RayQueue *statsQueue = CurrentRayQueue(wavefrontDepth);
495 496
                    Do(
                        "Update indirect ray stats", PBRT_CPU_GPU_LAMBDA() {
497
                            stats->indirectRays[wavefrontDepth] += statsQueue->Size();
498
                        });
499
                }
500 501 502 503 504 505 506

                SampleMediumInteraction(wavefrontDepth);

                HandleEscapedRays();

                HandleEmissiveIntersection();

507
                if (wavefrontDepth == maxDepth)
M
Matt Pharr 已提交
508
                    break;
509

510
                EvaluateMaterialsAndBSDFs(wavefrontDepth);
511

512
                // Do immediately so that we have space for shadow rays for subsurface..
513
                TraceShadowRays(wavefrontDepth);
514 515

                SampleSubsurface(wavefrontDepth);
M
Matt Pharr 已提交
516
            }
M
Matt Pharr 已提交
517

M
Matt Pharr 已提交
518
            UpdateFilm();
519
            // Copy updated film pixels to buffer for display
520
#ifdef PBRT_BUILD_GPU_RENDERER
521
            if (Options->useGPU && !Options->displayServer.empty())
522 523
                GPUParallelFor(
                    "Update Display RGB Buffer", maxQueueSize,
524
                    PBRT_CPU_GPU_LAMBDA(int pixelIndex) {
525 526 527 528 529 530 531
                        Point2i pPixel = pixelSampleState.pPixel[pixelIndex];
                        if (!InsideExclusive(pPixel, film.PixelBounds()))
                            return;

                        Point2i p(pPixel - film.PixelBounds().pMin);
                        displayRGB[p.x + p.y * resolution.x] = film.GetPixelRGB(pPixel);
                    });
532
#endif  //  PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
533 534 535 536 537
        }

        progress.Update();
    }
    progress.Done();
538

539 540 541
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU)
        GPUWait();
542
#endif  // PBRT_BUILD_GPU_RENDERER
543
    Float seconds = timer.ElapsedSeconds();
544
    // Shut down display server thread, if active
545 546 547 548 549 550 551 552
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU) {
        // Wait until rendering is all done before we start to shut down the
        // display stuff..
        if (!Options->displayServer.empty()) {
            exitCopyThread = true;
            copyThread.join();
        }
553

554 555 556
        // Another synchronization to make sure no kernels are running on the
        // GPU so that we can safely access unified memory from the CPU.
        GPUWait();
M
Matt Pharr 已提交
557
    }
558
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
559

560
    return seconds;
M
Matt Pharr 已提交
561 562
}

563
void WavefrontPathIntegrator::HandleEscapedRays() {
564 565
    if (!escapedRayQueue)
        return;
566 567
    ForAllQueued(
        "Handle escaped rays", escapedRayQueue, maxQueueSize,
568
        PBRT_CPU_GPU_LAMBDA(const EscapedRayWorkItem w) {
569
            // Compute weighted radiance for escaped ray
570
            SampledSpectrum L(0.f);
571
            for (const auto &light : *infiniteLights) {
572 573
                if (SampledSpectrum Le = light.Le(Ray(w.rayo, w.rayd), w.lambda); Le) {
                    // Compute path radiance contribution from infinite light
574 575
                    PBRT_DBG("L %f %f %f %f beta %f %f %f %f Le %f %f %f %f", L[0], L[1],
                             L[2], L[3], w.beta[0], w.beta[1], w.beta[2], w.beta[3],
576
                             Le[0], Le[1], Le[2], Le[3]);
577
                    PBRT_DBG("pdf uni %f %f %f %f pdf nee %f %f %f %f", w.inv_w_u[0],
M
Matt Pharr 已提交
578 579
                             w.inv_w_u[1], w.inv_w_u[2], w.inv_w_u[3], w.inv_w_l[0],
                             w.inv_w_l[1], w.inv_w_l[2], w.inv_w_l[3]);
580

581
                    if (w.depth == 0 || w.specularBounce) {
582
                        L += w.beta * Le / w.inv_w_u.Average();
583 584 585
                    } else {
                        // Compute MIS-weighted radiance contribution from infinite light
                        LightSampleContext ctx = w.prevIntrCtx;
586
                        Float lightChoicePDF = lightSampler.PMF(ctx, light);
M
Matt Pharr 已提交
587 588
                        SampledSpectrum inv_w_l =
                            w.inv_w_l * lightChoicePDF * light.PDF_Li(ctx, w.rayd, true);
589
                        L += w.beta * Le / (w.inv_w_u + inv_w_l).Average();
590 591
                    }
                }
592
            }
593

594
            // Update pixel radiance if ray's radiance is nonzero
595
            if (L) {
596 597
                PBRT_DBG("Added L %f %f %f %f for escaped ray pixel index %d\n", L[0],
                         L[1], L[2], L[3], w.pixelIndex);
598

599 600 601
                L += pixelSampleState.L[w.pixelIndex];
                pixelSampleState.L[w.pixelIndex] = L;
            }
602
        });
M
Matt Pharr 已提交
603 604
}

605
void WavefrontPathIntegrator::HandleEmissiveIntersection() {
M
Matt Pharr 已提交
606 607
    ForAllQueued(
        "Handle emitters hit by indirect rays", hitAreaLightQueue, maxQueueSize,
608
        PBRT_CPU_GPU_LAMBDA(const HitAreaLightWorkItem w) {
609
            // Find emitted radiance from surface that ray hit
M
Matt Pharr 已提交
610
            SampledSpectrum Le = w.areaLight.L(w.p, w.n, w.uv, w.wo, w.lambda);
M
Matt Pharr 已提交
611 612
            if (!Le)
                return;
613
            PBRT_DBG("Got Le %f %f %f %f from hit area light at depth %d\n", Le[0], Le[1],
614
                     Le[2], Le[3], w.depth);
M
Matt Pharr 已提交
615

616
            // Compute area light's weighted radiance contribution to the path
617
            SampledSpectrum L(0.f);
618
            if (w.depth == 0 || w.specularBounce) {
619
                L = w.beta * Le / w.inv_w_u.Average();
M
Matt Pharr 已提交
620
            } else {
621
                // Compute MIS-weighted radiance contribution from area light
622 623
                Vector3f wi = -w.wo;
                LightSampleContext ctx = w.prevIntrCtx;
624
                Float lightChoicePDF = lightSampler.PMF(ctx, w.areaLight);
625
                Float lightPDF = lightChoicePDF * w.areaLight.PDF_Li(ctx, wi, true);
M
Matt Pharr 已提交
626

627 628 629
                SampledSpectrum inv_w_u = w.inv_w_u;
                SampledSpectrum inv_w_l = w.inv_w_l * lightPDF;
                L = w.beta * Le / (inv_w_u + inv_w_l).Average();
M
Matt Pharr 已提交
630 631
            }

632
            PBRT_DBG("Added L %f %f %f %f for pixel index %d\n", L[0], L[1], L[2], L[3],
633
                     w.pixelIndex);
634

635
            // Update _L_ in _PixelSampleState_ for area light's radiance
636 637
            L += pixelSampleState.L[w.pixelIndex];
            pixelSampleState.L[w.pixelIndex] = L;
M
Matt Pharr 已提交
638 639 640
        });
}

641
void WavefrontPathIntegrator::TraceShadowRays(int wavefrontDepth) {
642
    if (haveMedia)
643
        aggregate->IntersectShadowTr(maxQueueSize, shadowRayQueue, &pixelSampleState);
644
    else
645
        aggregate->IntersectShadow(maxQueueSize, shadowRayQueue, &pixelSampleState);
646
    // Reset shadow ray queue
647 648
    Do(
        "Reset shadowRayQueue", PBRT_CPU_GPU_LAMBDA() {
649
            stats->shadowRays[wavefrontDepth] += shadowRayQueue->Size();
650 651 652 653
            shadowRayQueue->Reset();
        });
}

654
WavefrontPathIntegrator::Stats::Stats(int maxDepth, Allocator alloc)
M
Matt Pharr 已提交
655 656
    : indirectRays(maxDepth + 1, alloc), shadowRays(maxDepth, alloc) {}

657
std::string WavefrontPathIntegrator::Stats::Print() const {
M
Matt Pharr 已提交
658 659 660 661 662 663 664 665 666 667 668 669 670
    std::string s;
    s += StringPrintf("    %-42s               %12" PRIu64 "\n", "Camera rays",
                      cameraRays);
    for (int i = 1; i < indirectRays.size(); ++i)
        s += StringPrintf("    %-42s               %12" PRIu64 "\n",
                          StringPrintf("Indirect rays, depth %-3d", i), indirectRays[i]);
    for (int i = 0; i < shadowRays.size(); ++i)
        s += StringPrintf("    %-42s               %12" PRIu64 "\n",
                          StringPrintf("Shadow rays, depth %-3d", i), shadowRays[i]);
    return s;
}

}  // namespace pbrt