integrator.cpp 28.3 KB
Newer Older
M
Matt Pharr 已提交
1 2 3 4
// pbrt is Copyright(c) 1998-2020 Matt Pharr, Wenzel Jakob, and Greg Humphreys.
// The pbrt source code is licensed under the Apache License, Version 2.0.
// SPDX: Apache-2.0

5
#include <pbrt/wavefront/integrator.h>
M
Matt Pharr 已提交
6 7 8 9 10

#include <pbrt/base/medium.h>
#include <pbrt/cameras.h>
#include <pbrt/film.h>
#include <pbrt/filters.h>
11 12 13
#ifdef PBRT_BUILD_GPU_RENDERER
#include <pbrt/gpu/aggregate.h>
#include <pbrt/gpu/memory.h>
14
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
15 16 17 18 19 20 21 22 23 24 25
#include <pbrt/lights.h>
#include <pbrt/lightsamplers.h>
#include <pbrt/util/color.h>
#include <pbrt/util/colorspace.h>
#include <pbrt/util/display.h>
#include <pbrt/util/file.h>
#include <pbrt/util/image.h>
#include <pbrt/util/log.h>
#include <pbrt/util/print.h>
#include <pbrt/util/progressreporter.h>
#include <pbrt/util/pstd.h>
26
#include <pbrt/util/spectrum.h>
M
Matt Pharr 已提交
27
#include <pbrt/util/stats.h>
M
Matt Pharr 已提交
28
#include <pbrt/util/string.h>
M
Matt Pharr 已提交
29
#include <pbrt/util/taggedptr.h>
30
#include <pbrt/wavefront/aggregate.h>
M
Matt Pharr 已提交
31

32
#include <atomic>
M
Matt Pharr 已提交
33 34 35 36
#include <cstring>
#include <iostream>
#include <map>

37
#ifdef PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
38 39
#include <cuda.h>
#include <cuda_runtime.h>
40
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
41 42 43

namespace pbrt {

44
STAT_MEMORY_COUNTER("Memory/Wavefront integrator pixel state", pathIntegratorBytes);
M
Matt Pharr 已提交
45

46 47 48
static void updateMaterialNeeds(
    Material m, pstd::array<bool, Material::NumTags()> *haveBasicEvalMaterial,
    pstd::array<bool, Material::NumTags()> *haveUniversalEvalMaterial,
49
    bool *haveSubsurface, bool *haveMedia) {
50 51 52 53 54 55 56 57 58 59 60 61
    if (!m)
        return;

    if (MixMaterial *mix = m.CastOrNullptr<MixMaterial>(); mix) {
        // This is a somewhat odd place for this check, but it's convenient...
        if (!m.CanEvaluateTextures(BasicTextureEvaluator()))
            ErrorExit("\"mix\" material has a texture that can't be evaluated with the "
                      "BasicTextureEvaluator, which is all that is currently supported "
                      "int the wavefront renderer--sorry! %s",
                      *mix);

        updateMaterialNeeds(mix->GetMaterial(0), haveBasicEvalMaterial,
62
                            haveUniversalEvalMaterial, haveSubsurface, haveMedia);
63
        updateMaterialNeeds(mix->GetMaterial(1), haveBasicEvalMaterial,
64
                            haveUniversalEvalMaterial, haveSubsurface, haveMedia);
65 66 67 68
        return;
    }

    *haveSubsurface |= m.HasSubsurfaceScattering();
69
    *haveMedia |= (m == nullptr);  // interface material
70 71 72 73 74 75 76 77

    FloatTexture displace = m.GetDisplacement();
    if (m.CanEvaluateTextures(BasicTextureEvaluator()) &&
        (!displace || BasicTextureEvaluator().CanEvaluate({displace}, {})))
        (*haveBasicEvalMaterial)[m.Tag()] = true;
    else
        (*haveUniversalEvalMaterial)[m.Tag()] = true;
}
78

79
WavefrontPathIntegrator::WavefrontPathIntegrator(
M
Matt Pharr 已提交
80
    pstd::pmr::memory_resource *memoryResource, BasicScene &scene)
81
    : memoryResource(memoryResource) {
82 83
    ThreadLocal<Allocator> threadAllocators(
        [memoryResource]() { return Allocator(memoryResource); });
84

85
    Allocator alloc = threadAllocators.Get();
86

M
Matt Pharr 已提交
87
    // Allocate all of the data structures that represent the scene...
88
    std::map<std::string, Medium> media = scene.CreateMedia();
M
Matt Pharr 已提交
89

90 91 92 93 94
    // "haveMedia" is a bit of a misnomer in that determines both whether
    // queues are allocated for the medium sampling kernels and they are
    // launched as well as whether the ray marching shadow ray kernel is
    // launched... Thus, it will be true if there actually are no media,
    // but some "interface" materials are present in the scene.
M
Matt Pharr 已提交
95
    haveMedia = false;
96
    // Check the shapes and instance definitions...
M
Matt Pharr 已提交
97 98 99 100 101 102
    for (const auto &shape : scene.shapes)
        if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
            haveMedia = true;
    for (const auto &shape : scene.animatedShapes)
        if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
            haveMedia = true;
103
    for (const auto &instanceDefinition : scene.instanceDefinitions) {
104 105 106 107 108 109 110
        for (const auto &shape : instanceDefinition.second->shapes)
            if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
                haveMedia = true;
        for (const auto &shape : instanceDefinition.second->animatedShapes)
            if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
                haveMedia = true;
    }
M
Matt Pharr 已提交
111

112 113
    // Textures
    LOG_VERBOSE("Starting to create textures");
114
    NamedTextures textures = scene.CreateTextures();
115 116
    LOG_VERBOSE("Done creating textures");

117
    LOG_VERBOSE("Starting to create lights");
118
    pstd::vector<Light> allLights;
119
    std::map<int, pstd::vector<Light> *> shapeIndexToAreaLights;
120

121
    infiniteLights = alloc.new_object<pstd::vector<Light>>(alloc);
M
Matt Pharr 已提交
122

123
    for (Light l : scene.CreateLights(textures, &shapeIndexToAreaLights)) {
M
Matt Pharr 已提交
124
        if (l.Is<UniformInfiniteLight>() || l.Is<ImageInfiniteLight>() ||
125
            l.Is<PortalImageInfiniteLight>())
126
            infiniteLights->push_back(l);
M
Matt Pharr 已提交
127 128 129

        allLights.push_back(l);
    }
130
    LOG_VERBOSE("Done creating lights");
M
Matt Pharr 已提交
131

132 133 134
    LOG_VERBOSE("Starting to create materials");
    std::map<std::string, pbrt::Material> namedMaterials;
    std::vector<pbrt::Material> materials;
M
Matt Pharr 已提交
135
    scene.CreateMaterials(textures, &namedMaterials, &materials);
136

M
Matt Pharr 已提交
137 138 139
    haveBasicEvalMaterial.fill(false);
    haveUniversalEvalMaterial.fill(false);
    haveSubsurface = false;
140 141
    for (Material m : materials)
        updateMaterialNeeds(m, &haveBasicEvalMaterial, &haveUniversalEvalMaterial,
142
                            &haveSubsurface, &haveMedia);
143 144
    for (const auto &m : namedMaterials)
        updateMaterialNeeds(m.second, &haveBasicEvalMaterial, &haveUniversalEvalMaterial,
145
                            &haveSubsurface, &haveMedia);
146 147
    LOG_VERBOSE("Finished creating materials");

148 149
    if (Options->useGPU) {
#ifdef PBRT_BUILD_GPU_RENDERER
150 151 152
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
153 154
        aggregate = new OptiXAggregate(scene, mr, textures, shapeIndexToAreaLights, media,
                                       namedMaterials, materials);
155 156 157 158
#else
        LOG_FATAL("Options->useGPU was set without PBRT_BUILD_GPU_RENDERER enabled");
#endif
    } else
159 160
        aggregate = new CPUAggregate(scene, textures, shapeIndexToAreaLights, media,
                                     namedMaterials, materials);
M
Matt Pharr 已提交
161 162

    // Preprocess the light sources
163
    for (Light light : allLights)
164
        light.Preprocess(aggregate->Bounds());
M
Matt Pharr 已提交
165 166 167 168 169 170 171

    bool haveLights = !allLights.empty();
    for (const auto &m : media)
        haveLights |= m.second.IsEmissive();
    if (!haveLights)
        ErrorExit("No light sources specified");

172
    LOG_VERBOSE("Starting to create light sampler");
M
Matt Pharr 已提交
173 174 175 176
    std::string lightSamplerName =
        scene.integrator.parameters.GetOneString("lightsampler", "bvh");
    if (allLights.size() == 1)
        lightSamplerName = "uniform";
177
    lightSampler = LightSampler::Create(lightSamplerName, allLights, alloc);
178
    LOG_VERBOSE("Finished creating light sampler");
M
Matt Pharr 已提交
179

180
    if (scene.integrator.name != "path" && scene.integrator.name != "volpath")
181
        Warning(&scene.integrator.loc,
182
                "Ignoring specified integrator \"%s\": the wavefront integrator "
183 184
                "always uses a \"volpath\" integrator.",
                scene.integrator.name);
185

M
Matt Pharr 已提交
186 187 188 189
    // Integrator parameters
    regularize = scene.integrator.parameters.GetOneBool("regularize", false);
    maxDepth = scene.integrator.parameters.GetOneInt("maxdepth", 5);

190 191 192 193 194 195 196 197
    camera = scene.GetCamera();
    film = camera.GetFilm();
    filter = film.GetFilter();
    sampler = scene.GetSampler();

    initializeVisibleSurface = film.UsesVisibleSurface();
    samplesPerPixel = sampler.SamplesPerPixel();

198 199
    // Warn about unsupported stuff...
    if (Options->forceDiffuse)
200
        ErrorExit("The wavefront integrator does not support --force-diffuse.");
201
    if (Options->writePartialImages)
202
        Warning("The wavefront integrator does not support --write-partial-images.");
203
    if (Options->recordPixelStatistics)
204
        ErrorExit("The wavefront integrator does not support --pixelstats.");
205
    if (!Options->mseReferenceImage.empty())
206
        ErrorExit("The wavefront integrator does not support --mse-reference-image.");
207
    if (!Options->mseReferenceOutput.empty())
208
        ErrorExit("The wavefront integrator does not support --mse-reference-out.");
209

210 211
        ///////////////////////////////////////////////////////////////////////////
        // Allocate storage for all of the queues/buffers...
M
Matt Pharr 已提交
212

213
#ifdef PBRT_BUILD_GPU_RENDERER
214 215 216 217 218 219 220
    size_t startSize = 0;
    if (Options->useGPU) {
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
        startSize = mr->BytesAllocated();
    }
221
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
222

223
    // Compute number of scanlines to render per pass
M
Matt Pharr 已提交
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
    Vector2i resolution = film.PixelBounds().Diagonal();
    // TODO: make this configurable. Base it on the amount of GPU memory?
    int maxSamples = 1024 * 1024;
    scanlinesPerPass = std::max(1, maxSamples / resolution.x);
    int nPasses = (resolution.y + scanlinesPerPass - 1) / scanlinesPerPass;
    scanlinesPerPass = (resolution.y + nPasses - 1) / nPasses;
    maxQueueSize = resolution.x * scanlinesPerPass;
    LOG_VERBOSE("Will render in %d passes %d scanlines per pass\n", nPasses,
                scanlinesPerPass);

    pixelSampleState = SOA<PixelSampleState>(maxQueueSize, alloc);

    rayQueues[0] = alloc.new_object<RayQueue>(maxQueueSize, alloc);
    rayQueues[1] = alloc.new_object<RayQueue>(maxQueueSize, alloc);

    shadowRayQueue = alloc.new_object<ShadowRayQueue>(maxQueueSize, alloc);

    if (haveSubsurface) {
        bssrdfEvalQueue =
            alloc.new_object<GetBSSRDFAndProbeRayQueue>(maxQueueSize, alloc);
        subsurfaceScatterQueue =
            alloc.new_object<SubsurfaceScatterQueue>(maxQueueSize, alloc);
    }

248
    if (infiniteLights->size())
M
Matt Pharr 已提交
249 250 251 252 253 254 255 256 257 258 259 260 261
        escapedRayQueue = alloc.new_object<EscapedRayQueue>(maxQueueSize, alloc);
    hitAreaLightQueue = alloc.new_object<HitAreaLightQueue>(maxQueueSize, alloc);

    basicEvalMaterialQueue = alloc.new_object<MaterialEvalQueue>(
        maxQueueSize, alloc,
        pstd::MakeConstSpan(&haveBasicEvalMaterial[1], haveBasicEvalMaterial.size() - 1));
    universalEvalMaterialQueue = alloc.new_object<MaterialEvalQueue>(
        maxQueueSize, alloc,
        pstd::MakeConstSpan(&haveUniversalEvalMaterial[1],
                            haveUniversalEvalMaterial.size() - 1));

    if (haveMedia) {
        mediumSampleQueue = alloc.new_object<MediumSampleQueue>(maxQueueSize, alloc);
262 263 264 265 266 267 268 269

        // TODO: in the presence of multiple PhaseFunction implementations,
        // it could be worthwhile to see which are present in the scene and
        // then initialize havePhase accordingly...
        pstd::array<bool, PhaseFunction::NumTags()> havePhase;
        havePhase.fill(true);
        mediumScatterQueue =
            alloc.new_object<MediumScatterQueue>(maxQueueSize, alloc, havePhase);
M
Matt Pharr 已提交
270 271 272 273
    }

    stats = alloc.new_object<Stats>(maxDepth, alloc);

274
#ifdef PBRT_BUILD_GPU_RENDERER
275 276 277 278 279 280 281
    if (Options->useGPU) {
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
        size_t endSize = mr->BytesAllocated();
        pathIntegratorBytes += endSize - startSize;
    }
282
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
283 284
}

285 286
// WavefrontPathIntegrator Method Definitions
Float WavefrontPathIntegrator::Render() {
287 288 289
    Bounds2i pixelBounds = film.PixelBounds();
    Vector2i resolution = pixelBounds.Diagonal();
    Timer timer;
290
    // Prefetch allocations to GPU memory
291
#ifdef PBRT_BUILD_GPU_RENDERER
292 293
    if (Options->useGPU)
        PrefetchGPUAllocations();
294
#endif  // PBRT_BUILD_GPU_RENDERER
295

296
    // Launch thread to copy image for display server, if enabled
M
Matt Pharr 已提交
297 298 299 300 301
    RGB *displayRGB = nullptr, *displayRGBHost = nullptr;
    std::atomic<bool> exitCopyThread{false};
    std::thread copyThread;

    if (!Options->displayServer.empty()) {
302 303 304 305
#ifdef PBRT_BUILD_GPU_RENDERER
        if (Options->useGPU) {
            // Allocate staging memory on the GPU to store the current WIP
            // image.
306 307 308 309
            CUDA_CHECK(
                cudaMalloc(&displayRGB, resolution.x * resolution.y * sizeof(RGB)));
            CUDA_CHECK(
                cudaMemset(displayRGB, 0, resolution.x * resolution.y * sizeof(RGB)));
310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335

            // Host-side memory for the WIP Image.  We'll just let this leak so
            // that the lambda passed to DisplayDynamic below doesn't access
            // freed memory after Render() returns...
            displayRGBHost = new RGB[resolution.x * resolution.y];

            copyThread = std::thread([&]() {
                GPURegisterThread("DISPLAY_SERVER_COPY_THREAD");

                // Copy back to the CPU using a separate stream so that we can
                // periodically but asynchronously pick up the latest results
                // from the GPU.
                cudaStream_t memcpyStream;
                CUDA_CHECK(cudaStreamCreate(&memcpyStream));
                GPUNameStream(memcpyStream, "DISPLAY_SERVER_COPY_STREAM");

                // Copy back to the host from the GPU buffer, without any
                // synthronization.
                while (!exitCopyThread) {
                    CUDA_CHECK(cudaMemcpyAsync(displayRGBHost, displayRGB,
                                               resolution.x * resolution.y * sizeof(RGB),
                                               cudaMemcpyDeviceToHost, memcpyStream));
                    std::this_thread::sleep_for(std::chrono::milliseconds(50));

                    CUDA_CHECK(cudaStreamSynchronize(memcpyStream));
                }
M
Matt Pharr 已提交
336

337 338 339 340 341 342 343 344 345 346
                // Copy one more time to get the final image before exiting.
                CUDA_CHECK(cudaMemcpy(displayRGBHost, displayRGB,
                                      resolution.x * resolution.y * sizeof(RGB),
                                      cudaMemcpyDeviceToHost));
                CUDA_CHECK(cudaDeviceSynchronize());
            });

            // Now on the CPU side, give the display system a lambda that
            // copies values from |displayRGBHost| into its buffers used for
            // sending messages to the display program (i.e., tev).
347 348
            DisplayDynamic(film.GetFilename(), {resolution.x, resolution.y},
                           {"R", "G", "B"},
349
                           [resolution, displayRGBHost](
350
                               Bounds2i b, pstd::span<pstd::span<Float>> displayValue) {
351 352 353 354 355 356 357 358 359 360
                               int index = 0;
                               for (Point2i p : b) {
                                   RGB rgb = displayRGBHost[p.x + p.y * resolution.x];
                                   displayValue[0][index] = rgb.r;
                                   displayValue[1][index] = rgb.g;
                                   displayValue[2][index] = rgb.b;
                                   ++index;
                               }
                           });
        } else
361 362 363 364 365 366 367 368 369 370 371 372 373 374
#endif  // PBRT_BUILD_GPU_RENDERER
            DisplayDynamic(
                film.GetFilename(), Point2i(pixelBounds.Diagonal()), {"R", "G", "B"},
                [pixelBounds, this](Bounds2i b,
                                    pstd::span<pstd::span<Float>> displayValue) {
                    int index = 0;
                    for (Point2i p : b) {
                        RGB rgb =
                            film.GetPixelRGB(pixelBounds.pMin + p, 1.f /* splat scale */);
                        for (int c = 0; c < 3; ++c)
                            displayValue[c][index] = rgb[c];
                        ++index;
                    }
                });
M
Matt Pharr 已提交
375 376
    }

377
    // Loop over sample indices and evaluate pixel samples
378
    int firstSampleIndex = 0, lastSampleIndex = samplesPerPixel;
379
    // Update sample index range based on debug start, if provided
M
Matt Pharr 已提交
380
    if (!Options->debugStart.empty()) {
381
        std::vector<int> values = SplitStringToInts(Options->debugStart, ',');
382 383
        if (values.size() != 1 && values.size() != 2)
            ErrorExit("Expected either one or two integer values for --debugstart.");
384

385
        firstSampleIndex = values[0];
386 387 388 389
        if (values.size() == 2)
            lastSampleIndex = firstSampleIndex + values[1];
        else
            lastSampleIndex = firstSampleIndex + 1;
M
Matt Pharr 已提交
390 391
    }

392
    ProgressReporter progress(lastSampleIndex - firstSampleIndex, "Rendering",
393
                              Options->quiet, Options->useGPU);
394 395
    for (int sampleIndex = firstSampleIndex; sampleIndex < lastSampleIndex;
         ++sampleIndex) {
M
Matt Pharr 已提交
396 397
        // Attempt to work around issue #145.
#if !(defined(PBRT_IS_WINDOWS) && defined(PBRT_BUILD_GPU_RENDERER) && \
M
Matt Pharr 已提交
398
      __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 1)
399
        CheckCallbackScope _([&]() {
400
            return StringPrintf("Wavefront rendering failed at sample %d. Debug with "
401 402 403
                                "\"--debugstart %d\"\n",
                                sampleIndex, sampleIndex);
        });
M
Matt Pharr 已提交
404
#endif
405

406
        // Render image for sample _sampleIndex_
M
Matt Pharr 已提交
407
        LOG_VERBOSE("Starting to submit work for sample %d", sampleIndex);
M
Matt Pharr 已提交
408 409
        for (int y0 = pixelBounds.pMin.y; y0 < pixelBounds.pMax.y;
             y0 += scanlinesPerPass) {
410
            // Generate camera rays for current scanline range
411
            RayQueue *cameraRayQueue = CurrentRayQueue(0);
412 413
            Do(
                "Reset ray queue", PBRT_CPU_GPU_LAMBDA() {
414
                    PBRT_DBG("Starting scanlines at y0 = %d, sample %d / %d\n", y0,
415
                             sampleIndex, samplesPerPixel);
416 417
                    cameraRayQueue->Reset();
                });
M
Matt Pharr 已提交
418
            GenerateCameraRays(y0, sampleIndex);
419
            Do(
420
                "Update camera ray stats",
421
                PBRT_CPU_GPU_LAMBDA() { stats->cameraRays += cameraRayQueue->Size(); });
M
Matt Pharr 已提交
422

M
Matt Pharr 已提交
423
            // Trace rays and estimate radiance up to maximum ray depth
424
            for (int wavefrontDepth = 0; true; ++wavefrontDepth) {
425
                // Reset queues before tracing rays
426
                RayQueue *nextQueue = NextRayQueue(wavefrontDepth);
427 428
                Do(
                    "Reset queues before tracing rays", PBRT_CPU_GPU_LAMBDA() {
M
Matt Pharr 已提交
429
                        nextQueue->Reset();
430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
                        // Reset queues before tracing next batch of rays
                        if (mediumSampleQueue)
                            mediumSampleQueue->Reset();
                        if (mediumScatterQueue)
                            mediumScatterQueue->Reset();

                        if (escapedRayQueue)
                            escapedRayQueue->Reset();
                        hitAreaLightQueue->Reset();

                        basicEvalMaterialQueue->Reset();
                        universalEvalMaterialQueue->Reset();

                        if (bssrdfEvalQueue)
                            bssrdfEvalQueue->Reset();
                        if (subsurfaceScatterQueue)
                            subsurfaceScatterQueue->Reset();
                    });

                // Follow active ray paths and accumulate radiance estimates
450
                GenerateRaySamples(wavefrontDepth, sampleIndex);
451

452
                // Find closest intersections along active rays
453
                aggregate->IntersectClosest(
454 455 456
                    maxQueueSize, CurrentRayQueue(wavefrontDepth), escapedRayQueue,
                    hitAreaLightQueue, basicEvalMaterialQueue, universalEvalMaterialQueue,
                    mediumSampleQueue, NextRayQueue(wavefrontDepth));
M
Matt Pharr 已提交
457

458
                if (wavefrontDepth > 0) {
459
                    // As above, with the indexing...
460
                    RayQueue *statsQueue = CurrentRayQueue(wavefrontDepth);
461 462
                    Do(
                        "Update indirect ray stats", PBRT_CPU_GPU_LAMBDA() {
463
                            stats->indirectRays[wavefrontDepth] += statsQueue->Size();
464
                        });
465
                }
466 467 468 469 470 471 472

                SampleMediumInteraction(wavefrontDepth);

                HandleEscapedRays();

                HandleEmissiveIntersection();

473
                if (wavefrontDepth == maxDepth)
M
Matt Pharr 已提交
474
                    break;
475

476
                EvaluateMaterialsAndBSDFs(wavefrontDepth);
477

478
                // Do immediately so that we have space for shadow rays for subsurface..
479
                TraceShadowRays(wavefrontDepth);
480 481

                SampleSubsurface(wavefrontDepth);
M
Matt Pharr 已提交
482
            }
M
Matt Pharr 已提交
483

M
Matt Pharr 已提交
484
            UpdateFilm();
485
            // Copy updated film pixels to buffer for display
486
#ifdef PBRT_BUILD_GPU_RENDERER
487
            if (Options->useGPU && !Options->displayServer.empty())
488 489
                GPUParallelFor(
                    "Update Display RGB Buffer", maxQueueSize,
490
                    PBRT_CPU_GPU_LAMBDA(int pixelIndex) {
491 492 493 494 495 496 497
                        Point2i pPixel = pixelSampleState.pPixel[pixelIndex];
                        if (!InsideExclusive(pPixel, film.PixelBounds()))
                            return;

                        Point2i p(pPixel - film.PixelBounds().pMin);
                        displayRGB[p.x + p.y * resolution.x] = film.GetPixelRGB(pPixel);
                    });
498
#endif  //  PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
499 500 501 502 503
        }

        progress.Update();
    }
    progress.Done();
504

505 506 507
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU)
        GPUWait();
508
#endif  // PBRT_BUILD_GPU_RENDERER
509
    Float seconds = timer.ElapsedSeconds();
510
    // Shut down display server thread, if active
511 512 513 514 515 516 517 518
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU) {
        // Wait until rendering is all done before we start to shut down the
        // display stuff..
        if (!Options->displayServer.empty()) {
            exitCopyThread = true;
            copyThread.join();
        }
519

520 521 522
        // Another synchronization to make sure no kernels are running on the
        // GPU so that we can safely access unified memory from the CPU.
        GPUWait();
M
Matt Pharr 已提交
523
    }
524
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
525

526
    return seconds;
M
Matt Pharr 已提交
527 528
}

529
void WavefrontPathIntegrator::HandleEscapedRays() {
530 531
    if (!escapedRayQueue)
        return;
532 533
    ForAllQueued(
        "Handle escaped rays", escapedRayQueue, maxQueueSize,
534
        PBRT_CPU_GPU_LAMBDA(const EscapedRayWorkItem w) {
535
            // Compute weighted radiance for escaped ray
536
            SampledSpectrum L(0.f);
537
            for (const auto &light : *infiniteLights) {
538 539
                if (SampledSpectrum Le = light.Le(Ray(w.rayo, w.rayd), w.lambda); Le) {
                    // Compute path radiance contribution from infinite light
540 541
                    PBRT_DBG("L %f %f %f %f beta %f %f %f %f Le %f %f %f %f", L[0], L[1],
                             L[2], L[3], w.beta[0], w.beta[1], w.beta[2], w.beta[3],
542
                             Le[0], Le[1], Le[2], Le[3]);
543
                    PBRT_DBG("pdf uni %f %f %f %f pdf nee %f %f %f %f", w.inv_w_u[0],
M
Matt Pharr 已提交
544 545
                             w.inv_w_u[1], w.inv_w_u[2], w.inv_w_u[3], w.inv_w_l[0],
                             w.inv_w_l[1], w.inv_w_l[2], w.inv_w_l[3]);
546

547
                    if (w.depth == 0 || w.specularBounce) {
548
                        L += w.beta * Le / w.inv_w_u.Average();
549 550 551
                    } else {
                        // Compute MIS-weighted radiance contribution from infinite light
                        LightSampleContext ctx = w.prevIntrCtx;
552
                        Float lightChoicePDF = lightSampler.PMF(ctx, light);
M
Matt Pharr 已提交
553 554
                        SampledSpectrum inv_w_l =
                            w.inv_w_l * lightChoicePDF * light.PDF_Li(ctx, w.rayd, true);
555
                        L += w.beta * Le / (w.inv_w_u + inv_w_l).Average();
556 557
                    }
                }
558
            }
559

560
            // Update pixel radiance if ray's radiance is nonzero
561
            if (L) {
562 563
                PBRT_DBG("Added L %f %f %f %f for escaped ray pixel index %d\n", L[0],
                         L[1], L[2], L[3], w.pixelIndex);
564

565 566 567
                L += pixelSampleState.L[w.pixelIndex];
                pixelSampleState.L[w.pixelIndex] = L;
            }
568
        });
M
Matt Pharr 已提交
569 570
}

571
void WavefrontPathIntegrator::HandleEmissiveIntersection() {
M
Matt Pharr 已提交
572 573
    ForAllQueued(
        "Handle emitters hit by indirect rays", hitAreaLightQueue, maxQueueSize,
574
        PBRT_CPU_GPU_LAMBDA(const HitAreaLightWorkItem w) {
575
            // Find emitted radiance from surface that ray hit
M
Matt Pharr 已提交
576
            SampledSpectrum Le = w.areaLight.L(w.p, w.n, w.uv, w.wo, w.lambda);
M
Matt Pharr 已提交
577 578
            if (!Le)
                return;
579
            PBRT_DBG("Got Le %f %f %f %f from hit area light at depth %d\n", Le[0], Le[1],
580
                     Le[2], Le[3], w.depth);
M
Matt Pharr 已提交
581

582
            // Compute area light's weighted radiance contribution to the path
583
            SampledSpectrum L(0.f);
584
            if (w.depth == 0 || w.specularBounce) {
585
                L = w.beta * Le / w.inv_w_u.Average();
M
Matt Pharr 已提交
586
            } else {
587
                // Compute MIS-weighted radiance contribution from area light
588 589
                Vector3f wi = -w.wo;
                LightSampleContext ctx = w.prevIntrCtx;
590
                Float lightChoicePDF = lightSampler.PMF(ctx, w.areaLight);
591
                Float lightPDF = lightChoicePDF * w.areaLight.PDF_Li(ctx, wi, true);
M
Matt Pharr 已提交
592

593 594 595
                SampledSpectrum inv_w_u = w.inv_w_u;
                SampledSpectrum inv_w_l = w.inv_w_l * lightPDF;
                L = w.beta * Le / (inv_w_u + inv_w_l).Average();
M
Matt Pharr 已提交
596 597
            }

598
            PBRT_DBG("Added L %f %f %f %f for pixel index %d\n", L[0], L[1], L[2], L[3],
599
                     w.pixelIndex);
600

601
            // Update _L_ in _PixelSampleState_ for area light's radiance
602 603
            L += pixelSampleState.L[w.pixelIndex];
            pixelSampleState.L[w.pixelIndex] = L;
M
Matt Pharr 已提交
604 605 606
        });
}

607
void WavefrontPathIntegrator::TraceShadowRays(int wavefrontDepth) {
608
    if (haveMedia)
609
        aggregate->IntersectShadowTr(maxQueueSize, shadowRayQueue, &pixelSampleState);
610
    else
611
        aggregate->IntersectShadow(maxQueueSize, shadowRayQueue, &pixelSampleState);
612
    // Reset shadow ray queue
613 614
    Do(
        "Reset shadowRayQueue", PBRT_CPU_GPU_LAMBDA() {
615
            stats->shadowRays[wavefrontDepth] += shadowRayQueue->Size();
616 617 618 619
            shadowRayQueue->Reset();
        });
}

620
WavefrontPathIntegrator::Stats::Stats(int maxDepth, Allocator alloc)
M
Matt Pharr 已提交
621 622
    : indirectRays(maxDepth + 1, alloc), shadowRays(maxDepth, alloc) {}

623
std::string WavefrontPathIntegrator::Stats::Print() const {
M
Matt Pharr 已提交
624 625 626 627 628 629 630 631 632 633 634 635
    std::string s;
    s += StringPrintf("    %-42s               %12" PRIu64 "\n", "Camera rays",
                      cameraRays);
    for (int i = 1; i < indirectRays.size(); ++i)
        s += StringPrintf("    %-42s               %12" PRIu64 "\n",
                          StringPrintf("Indirect rays, depth %-3d", i), indirectRays[i]);
    for (int i = 0; i < shadowRays.size(); ++i)
        s += StringPrintf("    %-42s               %12" PRIu64 "\n",
                          StringPrintf("Shadow rays, depth %-3d", i), shadowRays[i]);
    return s;
}

636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673
#ifdef PBRT_BUILD_GPU_RENDERER
void WavefrontPathIntegrator::PrefetchGPUAllocations() {
    int deviceIndex;
    CUDA_CHECK(cudaGetDevice(&deviceIndex));
    int hasConcurrentManagedAccess;
    CUDA_CHECK(cudaDeviceGetAttribute(&hasConcurrentManagedAccess,
                                      cudaDevAttrConcurrentManagedAccess,
                                      deviceIndex));

    // Copy all of the scene data structures over to GPU memory.  This
    // ensures that there isn't a big performance hitch for the first batch
    // of rays as that stuff is copied over on demand.
    if (hasConcurrentManagedAccess) {
        // Set things up so that we can still have read from the
        // WavefrontPathIntegrator struct on the CPU without hurting
        // performance. (This makes it possible to use the values of things
        // like WavefrontPathIntegrator::haveSubsurface to conditionally launch
        // kernels according to what's in the scene...)
        CUDA_CHECK(cudaMemAdvise(this, sizeof(*this), cudaMemAdviseSetReadMostly,
                                 /* ignored argument */ 0));
        CUDA_CHECK(cudaMemAdvise(this, sizeof(*this),
                                 cudaMemAdviseSetPreferredLocation, deviceIndex));

        // Copy all of the scene data structures over to GPU memory.  This
        // ensures that there isn't a big performance hitch for the first batch
        // of rays as that stuff is copied over on demand.
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
        mr->PrefetchToGPU();
    } else {
        // TODO: on systems with basic unified memory, just launching a
        // kernel should cause everything to be copied over. Is an empty
        // kernel sufficient?
    }
}
#endif // PBRT_BUILD_GPU_RENDERER

M
Matt Pharr 已提交
674
}  // namespace pbrt