integrator.cpp 31.4 KB
Newer Older
M
Matt Pharr 已提交
1 2 3 4
// pbrt is Copyright(c) 1998-2020 Matt Pharr, Wenzel Jakob, and Greg Humphreys.
// The pbrt source code is licensed under the Apache License, Version 2.0.
// SPDX: Apache-2.0

5
#include <pbrt/wavefront/integrator.h>
M
Matt Pharr 已提交
6 7 8 9 10

#include <pbrt/base/medium.h>
#include <pbrt/cameras.h>
#include <pbrt/film.h>
#include <pbrt/filters.h>
11 12 13
#ifdef PBRT_BUILD_GPU_RENDERER
#include <pbrt/gpu/aggregate.h>
#include <pbrt/gpu/memory.h>
14
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
15 16 17 18 19 20
#include <pbrt/lights.h>
#include <pbrt/lightsamplers.h>
#include <pbrt/util/color.h>
#include <pbrt/util/colorspace.h>
#include <pbrt/util/display.h>
#include <pbrt/util/file.h>
M
Matt Pharr 已提交
21
#include <pbrt/util/gui.h>
M
Matt Pharr 已提交
22 23 24 25 26
#include <pbrt/util/image.h>
#include <pbrt/util/log.h>
#include <pbrt/util/print.h>
#include <pbrt/util/progressreporter.h>
#include <pbrt/util/pstd.h>
27
#include <pbrt/util/spectrum.h>
M
Matt Pharr 已提交
28
#include <pbrt/util/stats.h>
M
Matt Pharr 已提交
29
#include <pbrt/util/string.h>
M
Matt Pharr 已提交
30
#include <pbrt/util/taggedptr.h>
31
#include <pbrt/wavefront/aggregate.h>
M
Matt Pharr 已提交
32

33
#include <atomic>
M
Matt Pharr 已提交
34 35 36 37
#include <cstring>
#include <iostream>
#include <map>

38
#ifdef PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
39 40
#include <cuda.h>
#include <cuda_runtime.h>
41
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
42 43 44

namespace pbrt {

45
STAT_MEMORY_COUNTER("Memory/Wavefront integrator pixel state", pathIntegratorBytes);
M
Matt Pharr 已提交
46

47 48 49
static void updateMaterialNeeds(
    Material m, pstd::array<bool, Material::NumTags()> *haveBasicEvalMaterial,
    pstd::array<bool, Material::NumTags()> *haveUniversalEvalMaterial,
50
    bool *haveSubsurface, bool *haveMedia) {
51 52 53 54 55 56 57 58 59 60 61 62
    if (!m)
        return;

    if (MixMaterial *mix = m.CastOrNullptr<MixMaterial>(); mix) {
        // This is a somewhat odd place for this check, but it's convenient...
        if (!m.CanEvaluateTextures(BasicTextureEvaluator()))
            ErrorExit("\"mix\" material has a texture that can't be evaluated with the "
                      "BasicTextureEvaluator, which is all that is currently supported "
                      "int the wavefront renderer--sorry! %s",
                      *mix);

        updateMaterialNeeds(mix->GetMaterial(0), haveBasicEvalMaterial,
63
                            haveUniversalEvalMaterial, haveSubsurface, haveMedia);
64
        updateMaterialNeeds(mix->GetMaterial(1), haveBasicEvalMaterial,
65
                            haveUniversalEvalMaterial, haveSubsurface, haveMedia);
66 67 68 69
        return;
    }

    *haveSubsurface |= m.HasSubsurfaceScattering();
70
    *haveMedia |= (m == nullptr);  // interface material
71 72 73 74 75 76 77 78

    FloatTexture displace = m.GetDisplacement();
    if (m.CanEvaluateTextures(BasicTextureEvaluator()) &&
        (!displace || BasicTextureEvaluator().CanEvaluate({displace}, {})))
        (*haveBasicEvalMaterial)[m.Tag()] = true;
    else
        (*haveUniversalEvalMaterial)[m.Tag()] = true;
}
79

80
WavefrontPathIntegrator::WavefrontPathIntegrator(
M
Matt Pharr 已提交
81
    pstd::pmr::memory_resource *memoryResource, BasicScene &scene)
M
Matt Pharr 已提交
82
    : memoryResource(memoryResource), exitCopyThread(new std::atomic<bool>(false)) {
83 84
    ThreadLocal<Allocator> threadAllocators(
        [memoryResource]() { return Allocator(memoryResource); });
85

86
    Allocator alloc = threadAllocators.Get();
87

M
Matt Pharr 已提交
88
    // Allocate all of the data structures that represent the scene...
89
    std::map<std::string, Medium> media = scene.CreateMedia();
M
Matt Pharr 已提交
90

91 92 93 94 95
    // "haveMedia" is a bit of a misnomer in that determines both whether
    // queues are allocated for the medium sampling kernels and they are
    // launched as well as whether the ray marching shadow ray kernel is
    // launched... Thus, it will be true if there actually are no media,
    // but some "interface" materials are present in the scene.
M
Matt Pharr 已提交
96
    haveMedia = false;
97
    // Check the shapes and instance definitions...
M
Matt Pharr 已提交
98 99 100 101 102 103
    for (const auto &shape : scene.shapes)
        if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
            haveMedia = true;
    for (const auto &shape : scene.animatedShapes)
        if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
            haveMedia = true;
104
    for (const auto &instanceDefinition : scene.instanceDefinitions) {
105 106 107 108 109 110 111
        for (const auto &shape : instanceDefinition.second->shapes)
            if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
                haveMedia = true;
        for (const auto &shape : instanceDefinition.second->animatedShapes)
            if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
                haveMedia = true;
    }
M
Matt Pharr 已提交
112

113 114
    // Textures
    LOG_VERBOSE("Starting to create textures");
115
    NamedTextures textures = scene.CreateTextures();
116 117
    LOG_VERBOSE("Done creating textures");

118
    LOG_VERBOSE("Starting to create lights");
119
    pstd::vector<Light> allLights;
120
    std::map<int, pstd::vector<Light> *> shapeIndexToAreaLights;
121

122
    infiniteLights = alloc.new_object<pstd::vector<Light>>(alloc);
M
Matt Pharr 已提交
123

124
    for (Light l : scene.CreateLights(textures, &shapeIndexToAreaLights)) {
M
Matt Pharr 已提交
125
        if (l.Is<UniformInfiniteLight>() || l.Is<ImageInfiniteLight>() ||
126
            l.Is<PortalImageInfiniteLight>())
127
            infiniteLights->push_back(l);
M
Matt Pharr 已提交
128 129 130

        allLights.push_back(l);
    }
131
    LOG_VERBOSE("Done creating lights");
M
Matt Pharr 已提交
132

133 134 135
    LOG_VERBOSE("Starting to create materials");
    std::map<std::string, pbrt::Material> namedMaterials;
    std::vector<pbrt::Material> materials;
M
Matt Pharr 已提交
136
    scene.CreateMaterials(textures, &namedMaterials, &materials);
137

M
Matt Pharr 已提交
138 139 140
    haveBasicEvalMaterial.fill(false);
    haveUniversalEvalMaterial.fill(false);
    haveSubsurface = false;
141 142
    for (Material m : materials)
        updateMaterialNeeds(m, &haveBasicEvalMaterial, &haveUniversalEvalMaterial,
143
                            &haveSubsurface, &haveMedia);
144 145
    for (const auto &m : namedMaterials)
        updateMaterialNeeds(m.second, &haveBasicEvalMaterial, &haveUniversalEvalMaterial,
146
                            &haveSubsurface, &haveMedia);
147 148
    LOG_VERBOSE("Finished creating materials");

149 150 151 152 153 154 155 156
    // Retrieve these here so that the CPU isn't writing to managed memory
    // concurrently with the OptiX acceleration-structure construction work
    // that follows. (Verbotten on Windows.)
    camera = scene.GetCamera();
    film = camera.GetFilm();
    filter = film.GetFilter();
    sampler = scene.GetSampler();

157 158
    if (Options->useGPU) {
#ifdef PBRT_BUILD_GPU_RENDERER
159 160 161
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
162 163
        aggregate = new OptiXAggregate(scene, mr, textures, shapeIndexToAreaLights, media,
                                       namedMaterials, materials);
164 165 166 167
#else
        LOG_FATAL("Options->useGPU was set without PBRT_BUILD_GPU_RENDERER enabled");
#endif
    } else
168 169
        aggregate = new CPUAggregate(scene, textures, shapeIndexToAreaLights, media,
                                     namedMaterials, materials);
M
Matt Pharr 已提交
170 171

    // Preprocess the light sources
172
    for (Light light : allLights)
173
        light.Preprocess(aggregate->Bounds());
M
Matt Pharr 已提交
174 175 176 177 178 179 180

    bool haveLights = !allLights.empty();
    for (const auto &m : media)
        haveLights |= m.second.IsEmissive();
    if (!haveLights)
        ErrorExit("No light sources specified");

181
    LOG_VERBOSE("Starting to create light sampler");
M
Matt Pharr 已提交
182 183 184 185
    std::string lightSamplerName =
        scene.integrator.parameters.GetOneString("lightsampler", "bvh");
    if (allLights.size() == 1)
        lightSamplerName = "uniform";
186
    lightSampler = LightSampler::Create(lightSamplerName, allLights, alloc);
187
    LOG_VERBOSE("Finished creating light sampler");
M
Matt Pharr 已提交
188

189
    if (scene.integrator.name != "path" && scene.integrator.name != "volpath")
190
        Warning(&scene.integrator.loc,
191
                "Ignoring specified integrator \"%s\": the wavefront integrator "
192 193
                "always uses a \"volpath\" integrator.",
                scene.integrator.name);
194

M
Matt Pharr 已提交
195 196 197 198
    // Integrator parameters
    regularize = scene.integrator.parameters.GetOneBool("regularize", false);
    maxDepth = scene.integrator.parameters.GetOneInt("maxdepth", 5);

199 200 201
    initializeVisibleSurface = film.UsesVisibleSurface();
    samplesPerPixel = sampler.SamplesPerPixel();

202 203
    // Warn about unsupported stuff...
    if (Options->forceDiffuse)
204
        ErrorExit("The wavefront integrator does not support --force-diffuse.");
205
    if (Options->writePartialImages)
206
        Warning("The wavefront integrator does not support --write-partial-images.");
207
    if (Options->recordPixelStatistics)
208
        ErrorExit("The wavefront integrator does not support --pixelstats.");
209
    if (!Options->mseReferenceImage.empty())
210
        ErrorExit("The wavefront integrator does not support --mse-reference-image.");
211
    if (!Options->mseReferenceOutput.empty())
212
        ErrorExit("The wavefront integrator does not support --mse-reference-out.");
213

214 215
        ///////////////////////////////////////////////////////////////////////////
        // Allocate storage for all of the queues/buffers...
M
Matt Pharr 已提交
216

217
#ifdef PBRT_BUILD_GPU_RENDERER
218 219 220 221 222 223 224
    size_t startSize = 0;
    if (Options->useGPU) {
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
        startSize = mr->BytesAllocated();
    }
225
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
226

227
    // Compute number of scanlines to render per pass
M
Matt Pharr 已提交
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
    Vector2i resolution = film.PixelBounds().Diagonal();
    // TODO: make this configurable. Base it on the amount of GPU memory?
    int maxSamples = 1024 * 1024;
    scanlinesPerPass = std::max(1, maxSamples / resolution.x);
    int nPasses = (resolution.y + scanlinesPerPass - 1) / scanlinesPerPass;
    scanlinesPerPass = (resolution.y + nPasses - 1) / nPasses;
    maxQueueSize = resolution.x * scanlinesPerPass;
    LOG_VERBOSE("Will render in %d passes %d scanlines per pass\n", nPasses,
                scanlinesPerPass);

    pixelSampleState = SOA<PixelSampleState>(maxQueueSize, alloc);

    rayQueues[0] = alloc.new_object<RayQueue>(maxQueueSize, alloc);
    rayQueues[1] = alloc.new_object<RayQueue>(maxQueueSize, alloc);

    shadowRayQueue = alloc.new_object<ShadowRayQueue>(maxQueueSize, alloc);

    if (haveSubsurface) {
        bssrdfEvalQueue =
            alloc.new_object<GetBSSRDFAndProbeRayQueue>(maxQueueSize, alloc);
        subsurfaceScatterQueue =
            alloc.new_object<SubsurfaceScatterQueue>(maxQueueSize, alloc);
    }

252
    if (infiniteLights->size())
M
Matt Pharr 已提交
253 254 255 256 257 258 259 260 261 262 263 264 265
        escapedRayQueue = alloc.new_object<EscapedRayQueue>(maxQueueSize, alloc);
    hitAreaLightQueue = alloc.new_object<HitAreaLightQueue>(maxQueueSize, alloc);

    basicEvalMaterialQueue = alloc.new_object<MaterialEvalQueue>(
        maxQueueSize, alloc,
        pstd::MakeConstSpan(&haveBasicEvalMaterial[1], haveBasicEvalMaterial.size() - 1));
    universalEvalMaterialQueue = alloc.new_object<MaterialEvalQueue>(
        maxQueueSize, alloc,
        pstd::MakeConstSpan(&haveUniversalEvalMaterial[1],
                            haveUniversalEvalMaterial.size() - 1));

    if (haveMedia) {
        mediumSampleQueue = alloc.new_object<MediumSampleQueue>(maxQueueSize, alloc);
266 267 268 269 270 271 272 273

        // TODO: in the presence of multiple PhaseFunction implementations,
        // it could be worthwhile to see which are present in the scene and
        // then initialize havePhase accordingly...
        pstd::array<bool, PhaseFunction::NumTags()> havePhase;
        havePhase.fill(true);
        mediumScatterQueue =
            alloc.new_object<MediumScatterQueue>(maxQueueSize, alloc, havePhase);
M
Matt Pharr 已提交
274 275 276 277
    }

    stats = alloc.new_object<Stats>(maxDepth, alloc);

278
#ifdef PBRT_BUILD_GPU_RENDERER
279 280 281 282 283 284 285
    if (Options->useGPU) {
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
        size_t endSize = mr->BytesAllocated();
        pathIntegratorBytes += endSize - startSize;
    }
286
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
287 288
}

289 290
// WavefrontPathIntegrator Method Definitions
Float WavefrontPathIntegrator::Render() {
291 292
    Bounds2i pixelBounds = film.PixelBounds();
    Vector2i resolution = pixelBounds.Diagonal();
M
Matt Pharr 已提交
293 294 295 296 297 298 299 300 301 302 303 304

    GUI *gui = nullptr;
    // FIXME: camera animation; whatever...
    Transform renderFromCamera = camera.GetCameraTransform().RenderFromCamera().startTransform;
    Transform cameraFromRender = Inverse(renderFromCamera);
    Transform cameraFromWorld = camera.GetCameraTransform().CameraFromWorld(camera.SampleTime(0.f));
    if (Options->interactive) {
        if (!Options->displayServer.empty())
            ErrorExit("--interactive and --display-server cannot be used at the same time.");
        gui = new GUI(film.GetFilename(), resolution, aggregate->Bounds());
    }

305
    Timer timer;
306
    // Prefetch allocations to GPU memory
307
#ifdef PBRT_BUILD_GPU_RENDERER
308 309
    if (Options->useGPU)
        PrefetchGPUAllocations();
310
#endif  // PBRT_BUILD_GPU_RENDERER
311

312
    // Launch thread to copy image for display server, if enabled
313 314
    if (!Options->displayServer.empty())
        StartDisplayThread();
M
Matt Pharr 已提交
315

316
    // Loop over sample indices and evaluate pixel samples
317
    int firstSampleIndex = 0, lastSampleIndex = samplesPerPixel;
318
    // Update sample index range based on debug start, if provided
M
Matt Pharr 已提交
319
    if (!Options->debugStart.empty()) {
320
        std::vector<int> values = SplitStringToInts(Options->debugStart, ',');
321 322
        if (values.size() != 1 && values.size() != 2)
            ErrorExit("Expected either one or two integer values for --debugstart.");
323

324
        firstSampleIndex = values[0];
325 326 327 328
        if (values.size() == 2)
            lastSampleIndex = firstSampleIndex + values[1];
        else
            lastSampleIndex = firstSampleIndex + 1;
M
Matt Pharr 已提交
329 330
    }

331
    ProgressReporter progress(lastSampleIndex - firstSampleIndex, "Rendering",
M
Matt Pharr 已提交
332 333
                              Options->quiet || Options->interactive, Options->useGPU);
    for (int sampleIndex = firstSampleIndex; sampleIndex < lastSampleIndex; ++sampleIndex) {
M
Matt Pharr 已提交
334 335
        // Attempt to work around issue #145.
#if !(defined(PBRT_IS_WINDOWS) && defined(PBRT_BUILD_GPU_RENDERER) && \
M
Matt Pharr 已提交
336
      __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 1)
337
        CheckCallbackScope _([&]() {
338
            return StringPrintf("Wavefront rendering failed at sample %d. Debug with "
339 340 341
                                "\"--debugstart %d\"\n",
                                sampleIndex, sampleIndex);
        });
M
Matt Pharr 已提交
342
#endif
343

344
        // Render image for sample _sampleIndex_
M
Matt Pharr 已提交
345
        LOG_VERBOSE("Starting to submit work for sample %d", sampleIndex);
M
Matt Pharr 已提交
346 347
        for (int y0 = pixelBounds.pMin.y; y0 < pixelBounds.pMax.y;
             y0 += scanlinesPerPass) {
348
            // Generate camera rays for current scanline range
349
            RayQueue *cameraRayQueue = CurrentRayQueue(0);
350 351
            Do(
                "Reset ray queue", PBRT_CPU_GPU_LAMBDA() {
352
                    PBRT_DBG("Starting scanlines at y0 = %d, sample %d / %d\n", y0,
353
                             sampleIndex, samplesPerPixel);
354 355
                    cameraRayQueue->Reset();
                });
M
Matt Pharr 已提交
356 357 358 359 360

            Transform cameraMotion;
            if (gui)
                cameraMotion = renderFromCamera * gui->GetCameraTransform() * cameraFromRender;
            GenerateCameraRays(y0, cameraMotion, sampleIndex);
361
            Do(
362
                "Update camera ray stats",
363
                PBRT_CPU_GPU_LAMBDA() { stats->cameraRays += cameraRayQueue->Size(); });
M
Matt Pharr 已提交
364

M
Matt Pharr 已提交
365
            // Trace rays and estimate radiance up to maximum ray depth
366
            for (int wavefrontDepth = 0; true; ++wavefrontDepth) {
367
                // Reset queues before tracing rays
368
                RayQueue *nextQueue = NextRayQueue(wavefrontDepth);
369 370
                Do(
                    "Reset queues before tracing rays", PBRT_CPU_GPU_LAMBDA() {
M
Matt Pharr 已提交
371
                        nextQueue->Reset();
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
                        // Reset queues before tracing next batch of rays
                        if (mediumSampleQueue)
                            mediumSampleQueue->Reset();
                        if (mediumScatterQueue)
                            mediumScatterQueue->Reset();

                        if (escapedRayQueue)
                            escapedRayQueue->Reset();
                        hitAreaLightQueue->Reset();

                        basicEvalMaterialQueue->Reset();
                        universalEvalMaterialQueue->Reset();

                        if (bssrdfEvalQueue)
                            bssrdfEvalQueue->Reset();
                        if (subsurfaceScatterQueue)
                            subsurfaceScatterQueue->Reset();
                    });

                // Follow active ray paths and accumulate radiance estimates
392
                GenerateRaySamples(wavefrontDepth, sampleIndex);
393

394
                // Find closest intersections along active rays
395
                aggregate->IntersectClosest(
396 397 398
                    maxQueueSize, CurrentRayQueue(wavefrontDepth), escapedRayQueue,
                    hitAreaLightQueue, basicEvalMaterialQueue, universalEvalMaterialQueue,
                    mediumSampleQueue, NextRayQueue(wavefrontDepth));
M
Matt Pharr 已提交
399

400
                if (wavefrontDepth > 0) {
401
                    // As above, with the indexing...
402
                    RayQueue *statsQueue = CurrentRayQueue(wavefrontDepth);
403 404
                    Do(
                        "Update indirect ray stats", PBRT_CPU_GPU_LAMBDA() {
405
                            stats->indirectRays[wavefrontDepth] += statsQueue->Size();
406
                        });
407
                }
408 409 410 411 412 413 414

                SampleMediumInteraction(wavefrontDepth);

                HandleEscapedRays();

                HandleEmissiveIntersection();

415
                if (wavefrontDepth == maxDepth)
M
Matt Pharr 已提交
416
                    break;
417

M
Matt Pharr 已提交
418
                EvaluateMaterialsAndBSDFs(wavefrontDepth, cameraMotion);
419

420
                // Do immediately so that we have space for shadow rays for subsurface..
421
                TraceShadowRays(wavefrontDepth);
422 423

                SampleSubsurface(wavefrontDepth);
M
Matt Pharr 已提交
424
            }
M
Matt Pharr 已提交
425

M
Matt Pharr 已提交
426
            UpdateFilm();
M
Matt Pharr 已提交
427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458
        }

        // Copy updated film pixels to buffer for the display server.
        if (Options->useGPU && !Options->displayServer.empty())
            UpdateDisplayRGBFromFilm(pixelBounds);

        if (gui) {
            RGB *rgb = gui->MapFramebuffer();
            UpdateFramebufferFromFilm(pixelBounds, gui->exposure, rgb);
            gui->UnmapFramebuffer();

            if (gui->printCameraTransform) {
                SquareMatrix<4> cfw = (Inverse(gui->GetCameraTransform()) * cameraFromWorld).GetMatrix();
                Printf("Current camera transform:\nTransform [ ");
                for (int i = 0; i < 16; ++i)
                    Printf("%f ", cfw[i % 4][i / 4]);
                Printf("]\n");
                std::fflush(stdout);
                gui->printCameraTransform = false;
            }

            DisplayState state = gui->RefreshDisplay();
            if (state == DisplayState::EXIT)
                break;
            else if (state == DisplayState::RESET) {
                sampleIndex = firstSampleIndex - 1;
                ParallelFor("Reset pixels", resolution.x * resolution.y,
                            PBRT_CPU_GPU_LAMBDA(int i) {
                                int x = i % resolution.x, y = i / resolution.x;
                                film.ResetPixel(pixelBounds.pMin + Vector2i(x, y));
                            });
            }
M
Matt Pharr 已提交
459 460 461 462
        }

        progress.Update();
    }
M
Matt Pharr 已提交
463 464 465 466 467 468

    if (gui) {
        delete gui;
        gui = nullptr;
    }

M
Matt Pharr 已提交
469
    progress.Done();
470

471 472 473
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU)
        GPUWait();
474
#endif  // PBRT_BUILD_GPU_RENDERER
475
    Float seconds = timer.ElapsedSeconds();
476

477 478
    // Shut down display server thread, if active
    StopDisplayThread();
M
Matt Pharr 已提交
479

480
    return seconds;
M
Matt Pharr 已提交
481 482
}

483
void WavefrontPathIntegrator::HandleEscapedRays() {
484 485
    if (!escapedRayQueue)
        return;
486 487
    ForAllQueued(
        "Handle escaped rays", escapedRayQueue, maxQueueSize,
488
        PBRT_CPU_GPU_LAMBDA(const EscapedRayWorkItem w) {
489
            // Compute weighted radiance for escaped ray
490
            SampledSpectrum L(0.f);
491
            for (const auto &light : *infiniteLights) {
492 493
                if (SampledSpectrum Le = light.Le(Ray(w.rayo, w.rayd), w.lambda); Le) {
                    // Compute path radiance contribution from infinite light
494 495
                    PBRT_DBG("L %f %f %f %f beta %f %f %f %f Le %f %f %f %f", L[0], L[1],
                             L[2], L[3], w.beta[0], w.beta[1], w.beta[2], w.beta[3],
496
                             Le[0], Le[1], Le[2], Le[3]);
497
                    PBRT_DBG("pdf uni %f %f %f %f pdf nee %f %f %f %f", w.inv_w_u[0],
M
Matt Pharr 已提交
498 499
                             w.inv_w_u[1], w.inv_w_u[2], w.inv_w_u[3], w.inv_w_l[0],
                             w.inv_w_l[1], w.inv_w_l[2], w.inv_w_l[3]);
500

501
                    if (w.depth == 0 || w.specularBounce) {
502
                        L += w.beta * Le / w.inv_w_u.Average();
503 504 505
                    } else {
                        // Compute MIS-weighted radiance contribution from infinite light
                        LightSampleContext ctx = w.prevIntrCtx;
506
                        Float lightChoicePDF = lightSampler.PMF(ctx, light);
M
Matt Pharr 已提交
507 508
                        SampledSpectrum inv_w_l =
                            w.inv_w_l * lightChoicePDF * light.PDF_Li(ctx, w.rayd, true);
509
                        L += w.beta * Le / (w.inv_w_u + inv_w_l).Average();
510 511
                    }
                }
512
            }
513

514
            // Update pixel radiance if ray's radiance is nonzero
515
            if (L) {
516 517
                PBRT_DBG("Added L %f %f %f %f for escaped ray pixel index %d\n", L[0],
                         L[1], L[2], L[3], w.pixelIndex);
518

519 520 521
                L += pixelSampleState.L[w.pixelIndex];
                pixelSampleState.L[w.pixelIndex] = L;
            }
522
        });
M
Matt Pharr 已提交
523 524
}

525
void WavefrontPathIntegrator::HandleEmissiveIntersection() {
M
Matt Pharr 已提交
526 527
    ForAllQueued(
        "Handle emitters hit by indirect rays", hitAreaLightQueue, maxQueueSize,
528
        PBRT_CPU_GPU_LAMBDA(const HitAreaLightWorkItem w) {
529
            // Find emitted radiance from surface that ray hit
M
Matt Pharr 已提交
530
            SampledSpectrum Le = w.areaLight.L(w.p, w.n, w.uv, w.wo, w.lambda);
M
Matt Pharr 已提交
531 532
            if (!Le)
                return;
533
            PBRT_DBG("Got Le %f %f %f %f from hit area light at depth %d\n", Le[0], Le[1],
534
                     Le[2], Le[3], w.depth);
M
Matt Pharr 已提交
535

536
            // Compute area light's weighted radiance contribution to the path
537
            SampledSpectrum L(0.f);
538
            if (w.depth == 0 || w.specularBounce) {
539
                L = w.beta * Le / w.inv_w_u.Average();
M
Matt Pharr 已提交
540
            } else {
541
                // Compute MIS-weighted radiance contribution from area light
542 543
                Vector3f wi = -w.wo;
                LightSampleContext ctx = w.prevIntrCtx;
544
                Float lightChoicePDF = lightSampler.PMF(ctx, w.areaLight);
545
                Float lightPDF = lightChoicePDF * w.areaLight.PDF_Li(ctx, wi, true);
M
Matt Pharr 已提交
546

547 548 549
                SampledSpectrum inv_w_u = w.inv_w_u;
                SampledSpectrum inv_w_l = w.inv_w_l * lightPDF;
                L = w.beta * Le / (inv_w_u + inv_w_l).Average();
M
Matt Pharr 已提交
550 551
            }

552
            PBRT_DBG("Added L %f %f %f %f for pixel index %d\n", L[0], L[1], L[2], L[3],
553
                     w.pixelIndex);
554

555
            // Update _L_ in _PixelSampleState_ for area light's radiance
556 557
            L += pixelSampleState.L[w.pixelIndex];
            pixelSampleState.L[w.pixelIndex] = L;
M
Matt Pharr 已提交
558 559 560
        });
}

561
void WavefrontPathIntegrator::TraceShadowRays(int wavefrontDepth) {
562
    if (haveMedia)
563
        aggregate->IntersectShadowTr(maxQueueSize, shadowRayQueue, &pixelSampleState);
564
    else
565
        aggregate->IntersectShadow(maxQueueSize, shadowRayQueue, &pixelSampleState);
566
    // Reset shadow ray queue
567 568
    Do(
        "Reset shadowRayQueue", PBRT_CPU_GPU_LAMBDA() {
569
            stats->shadowRays[wavefrontDepth] += shadowRayQueue->Size();
570 571 572 573
            shadowRayQueue->Reset();
        });
}

574
WavefrontPathIntegrator::Stats::Stats(int maxDepth, Allocator alloc)
M
Matt Pharr 已提交
575 576
    : indirectRays(maxDepth + 1, alloc), shadowRays(maxDepth, alloc) {}

577
std::string WavefrontPathIntegrator::Stats::Print() const {
M
Matt Pharr 已提交
578 579 580 581 582 583 584 585 586 587 588 589
    std::string s;
    s += StringPrintf("    %-42s               %12" PRIu64 "\n", "Camera rays",
                      cameraRays);
    for (int i = 1; i < indirectRays.size(); ++i)
        s += StringPrintf("    %-42s               %12" PRIu64 "\n",
                          StringPrintf("Indirect rays, depth %-3d", i), indirectRays[i]);
    for (int i = 0; i < shadowRays.size(); ++i)
        s += StringPrintf("    %-42s               %12" PRIu64 "\n",
                          StringPrintf("Shadow rays, depth %-3d", i), shadowRays[i]);
    return s;
}

590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627
#ifdef PBRT_BUILD_GPU_RENDERER
void WavefrontPathIntegrator::PrefetchGPUAllocations() {
    int deviceIndex;
    CUDA_CHECK(cudaGetDevice(&deviceIndex));
    int hasConcurrentManagedAccess;
    CUDA_CHECK(cudaDeviceGetAttribute(&hasConcurrentManagedAccess,
                                      cudaDevAttrConcurrentManagedAccess,
                                      deviceIndex));

    // Copy all of the scene data structures over to GPU memory.  This
    // ensures that there isn't a big performance hitch for the first batch
    // of rays as that stuff is copied over on demand.
    if (hasConcurrentManagedAccess) {
        // Set things up so that we can still have read from the
        // WavefrontPathIntegrator struct on the CPU without hurting
        // performance. (This makes it possible to use the values of things
        // like WavefrontPathIntegrator::haveSubsurface to conditionally launch
        // kernels according to what's in the scene...)
        CUDA_CHECK(cudaMemAdvise(this, sizeof(*this), cudaMemAdviseSetReadMostly,
                                 /* ignored argument */ 0));
        CUDA_CHECK(cudaMemAdvise(this, sizeof(*this),
                                 cudaMemAdviseSetPreferredLocation, deviceIndex));

        // Copy all of the scene data structures over to GPU memory.  This
        // ensures that there isn't a big performance hitch for the first batch
        // of rays as that stuff is copied over on demand.
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
        mr->PrefetchToGPU();
    } else {
        // TODO: on systems with basic unified memory, just launching a
        // kernel should cause everything to be copied over. Is an empty
        // kernel sufficient?
    }
}
#endif // PBRT_BUILD_GPU_RENDERER

628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646
void WavefrontPathIntegrator::StartDisplayThread() {
    Bounds2i pixelBounds = film.PixelBounds();
    Vector2i resolution = pixelBounds.Diagonal();

#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU) {
        // Allocate staging memory on the GPU to store the current WIP
        // image.
        CUDA_CHECK(cudaMalloc(&displayRGB, resolution.x * resolution.y * sizeof(RGB)));
        CUDA_CHECK(cudaMemset(displayRGB, 0, resolution.x * resolution.y * sizeof(RGB)));

        // Host-side memory for the WIP Image.  We'll just let this leak so
        // that the lambda passed to DisplayDynamic below doesn't access
        // freed memory after Render() returns...
        displayRGBHost = new RGB[resolution.x * resolution.y];

        // Note that we can't just capture |this| for the member variables
        // below because with managed memory on Windows, the CPU and GPU
        // can't be accessing the same memory concurrently...
M
Matt Pharr 已提交
647 648 649
        copyThread = new std::thread([exitCopyThread = this->exitCopyThread,
                                      displayRGBHost = this->displayRGBHost,
                                      displayRGB = this->displayRGB, resolution]() {
650 651 652 653 654 655 656 657 658 659 660
            GPURegisterThread("DISPLAY_SERVER_COPY_THREAD");

            // Copy back to the CPU using a separate stream so that we can
            // periodically but asynchronously pick up the latest results
            // from the GPU.
            cudaStream_t memcpyStream;
            CUDA_CHECK(cudaStreamCreate(&memcpyStream));
            GPUNameStream(memcpyStream, "DISPLAY_SERVER_COPY_STREAM");

            // Copy back to the host from the GPU buffer, without any
            // synthronization.
M
Matt Pharr 已提交
661
            while (!*exitCopyThread) {
662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681
                CUDA_CHECK(cudaMemcpyAsync(displayRGBHost, displayRGB,
                                           resolution.x * resolution.y * sizeof(RGB),
                                           cudaMemcpyDeviceToHost, memcpyStream));
                std::this_thread::sleep_for(std::chrono::milliseconds(50));

                CUDA_CHECK(cudaStreamSynchronize(memcpyStream));
            }

            // Copy one more time to get the final image before exiting.
            CUDA_CHECK(cudaMemcpy(displayRGBHost, displayRGB,
                                  resolution.x * resolution.y * sizeof(RGB),
                                  cudaMemcpyDeviceToHost));
            CUDA_CHECK(cudaDeviceSynchronize());
        });

        // Now on the CPU side, give the display system a lambda that
        // copies values from |displayRGBHost| into its buffers used for
        // sending messages to the display program (i.e., tev).
        DisplayDynamic(film.GetFilename(), {resolution.x, resolution.y},
                       {"R", "G", "B"},
682
                       [resolution, this](Bounds2i b, pstd::span<pstd::span<float>> displayValue) {
683 684 685 686 687 688 689 690 691 692 693 694 695
                           int index = 0;
                           for (Point2i p : b) {
                               RGB rgb = displayRGBHost[p.x + p.y * resolution.x];
                               displayValue[0][index] = rgb.r;
                               displayValue[1][index] = rgb.g;
                               displayValue[2][index] = rgb.b;
                               ++index;
                           }
                       });
    } else
#endif  // PBRT_BUILD_GPU_RENDERER
        DisplayDynamic(film.GetFilename(), Point2i(pixelBounds.Diagonal()), {"R", "G", "B"},
                       [pixelBounds, this](Bounds2i b,
696
                                           pstd::span<pstd::span<float>> displayValue) {
697 698 699 700 701 702 703 704 705 706 707
                           int index = 0;
                           for (Point2i p : b) {
                               RGB rgb =
                                   film.GetPixelRGB(pixelBounds.pMin + p, 1.f /* splat scale */);
                               for (int c = 0; c < 3; ++c)
                                   displayValue[c][index] = rgb[c];
                               ++index;
                           }
                       });
}

M
Matt Pharr 已提交
708
void WavefrontPathIntegrator::UpdateDisplayRGBFromFilm(Bounds2i pixelBounds) {
709
#ifdef PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
710 711 712 713 714 715 716
    Vector2i resolution = pixelBounds.Diagonal();
    GPUParallelFor(
                   "Update Display RGB Buffer", resolution.x * resolution.y,
                   PBRT_CPU_GPU_LAMBDA(int index) {
                       Point2i p(index % resolution.x, index / resolution.x);
                       displayRGB[index] = film.GetPixelRGB(p + pixelBounds.pMin);
                   });
717 718 719 720 721 722 723 724 725
#endif  //  PBRT_BUILD_GPU_RENDERER
}

void WavefrontPathIntegrator::StopDisplayThread() {
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU) {
        // Wait until rendering is all done before we start to shut down the
        // display stuff..
        if (!Options->displayServer.empty()) {
M
Matt Pharr 已提交
726 727 728 729
            *exitCopyThread = true;
            copyThread->join();
            delete copyThread;
            copyThread = nullptr;
730 731 732 733 734 735 736 737 738
        }

        // Another synchronization to make sure no kernels are running on the
        // GPU so that we can safely access unified memory from the CPU.
        GPUWait();
    }
#endif  // PBRT_BUILD_GPU_RENDERER
}

M
Matt Pharr 已提交
739 740 741 742 743 744 745 746 747 748
void WavefrontPathIntegrator::UpdateFramebufferFromFilm(Bounds2i pixelBounds, Float exposure,
                                                        RGB *rgb) {
    Vector2i resolution = pixelBounds.Diagonal();
    ParallelFor("Update framebuffer", resolution.x * resolution.y,
                PBRT_CPU_GPU_LAMBDA(int index) {
                    Point2i p(index % resolution.x, index / resolution.x);
                    rgb[index] = exposure * film.GetPixelRGB(p + film.PixelBounds().pMin);
                });
}

M
Matt Pharr 已提交
749
}  // namespace pbrt