integrator.cpp 31.2 KB
Newer Older
M
Matt Pharr 已提交
1 2 3 4
// pbrt is Copyright(c) 1998-2020 Matt Pharr, Wenzel Jakob, and Greg Humphreys.
// The pbrt source code is licensed under the Apache License, Version 2.0.
// SPDX: Apache-2.0

5
#include <pbrt/wavefront/integrator.h>
M
Matt Pharr 已提交
6 7 8 9 10

#include <pbrt/base/medium.h>
#include <pbrt/cameras.h>
#include <pbrt/film.h>
#include <pbrt/filters.h>
11 12 13
#ifdef PBRT_BUILD_GPU_RENDERER
#include <pbrt/gpu/aggregate.h>
#include <pbrt/gpu/memory.h>
14
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
15 16 17 18 19 20
#include <pbrt/lights.h>
#include <pbrt/lightsamplers.h>
#include <pbrt/util/color.h>
#include <pbrt/util/colorspace.h>
#include <pbrt/util/display.h>
#include <pbrt/util/file.h>
M
Matt Pharr 已提交
21
#include <pbrt/util/gui.h>
M
Matt Pharr 已提交
22 23 24 25 26
#include <pbrt/util/image.h>
#include <pbrt/util/log.h>
#include <pbrt/util/print.h>
#include <pbrt/util/progressreporter.h>
#include <pbrt/util/pstd.h>
27
#include <pbrt/util/spectrum.h>
M
Matt Pharr 已提交
28
#include <pbrt/util/stats.h>
M
Matt Pharr 已提交
29
#include <pbrt/util/string.h>
M
Matt Pharr 已提交
30
#include <pbrt/util/taggedptr.h>
31
#include <pbrt/wavefront/aggregate.h>
M
Matt Pharr 已提交
32

33
#include <atomic>
M
Matt Pharr 已提交
34 35 36 37
#include <cstring>
#include <iostream>
#include <map>

38
#ifdef PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
39 40
#include <cuda.h>
#include <cuda_runtime.h>
41
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
42 43 44

namespace pbrt {

45
STAT_MEMORY_COUNTER("Memory/Wavefront integrator pixel state", pathIntegratorBytes);
M
Matt Pharr 已提交
46

47 48 49
static void updateMaterialNeeds(
    Material m, pstd::array<bool, Material::NumTags()> *haveBasicEvalMaterial,
    pstd::array<bool, Material::NumTags()> *haveUniversalEvalMaterial,
50
    bool *haveSubsurface, bool *haveMedia) {
51 52 53 54 55 56 57 58 59 60 61 62
    if (!m)
        return;

    if (MixMaterial *mix = m.CastOrNullptr<MixMaterial>(); mix) {
        // This is a somewhat odd place for this check, but it's convenient...
        if (!m.CanEvaluateTextures(BasicTextureEvaluator()))
            ErrorExit("\"mix\" material has a texture that can't be evaluated with the "
                      "BasicTextureEvaluator, which is all that is currently supported "
                      "int the wavefront renderer--sorry! %s",
                      *mix);

        updateMaterialNeeds(mix->GetMaterial(0), haveBasicEvalMaterial,
63
                            haveUniversalEvalMaterial, haveSubsurface, haveMedia);
64
        updateMaterialNeeds(mix->GetMaterial(1), haveBasicEvalMaterial,
65
                            haveUniversalEvalMaterial, haveSubsurface, haveMedia);
66 67 68 69
        return;
    }

    *haveSubsurface |= m.HasSubsurfaceScattering();
70
    *haveMedia |= (m == nullptr);  // interface material
71 72 73 74 75 76 77 78

    FloatTexture displace = m.GetDisplacement();
    if (m.CanEvaluateTextures(BasicTextureEvaluator()) &&
        (!displace || BasicTextureEvaluator().CanEvaluate({displace}, {})))
        (*haveBasicEvalMaterial)[m.Tag()] = true;
    else
        (*haveUniversalEvalMaterial)[m.Tag()] = true;
}
79

80
WavefrontPathIntegrator::WavefrontPathIntegrator(
M
Matt Pharr 已提交
81
    pstd::pmr::memory_resource *memoryResource, BasicScene &scene)
M
Matt Pharr 已提交
82
    : memoryResource(memoryResource), exitCopyThread(new std::atomic<bool>(false)) {
83 84
    ThreadLocal<Allocator> threadAllocators(
        [memoryResource]() { return Allocator(memoryResource); });
85

86
    Allocator alloc = threadAllocators.Get();
87

M
Matt Pharr 已提交
88
    // Allocate all of the data structures that represent the scene...
89
    std::map<std::string, Medium> media = scene.CreateMedia();
M
Matt Pharr 已提交
90

91 92 93 94 95
    // "haveMedia" is a bit of a misnomer in that determines both whether
    // queues are allocated for the medium sampling kernels and they are
    // launched as well as whether the ray marching shadow ray kernel is
    // launched... Thus, it will be true if there actually are no media,
    // but some "interface" materials are present in the scene.
M
Matt Pharr 已提交
96
    haveMedia = false;
97
    // Check the shapes and instance definitions...
M
Matt Pharr 已提交
98 99 100 101 102 103
    for (const auto &shape : scene.shapes)
        if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
            haveMedia = true;
    for (const auto &shape : scene.animatedShapes)
        if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
            haveMedia = true;
104
    for (const auto &instanceDefinition : scene.instanceDefinitions) {
105 106 107 108 109 110 111
        for (const auto &shape : instanceDefinition.second->shapes)
            if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
                haveMedia = true;
        for (const auto &shape : instanceDefinition.second->animatedShapes)
            if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
                haveMedia = true;
    }
M
Matt Pharr 已提交
112

113 114
    // Textures
    LOG_VERBOSE("Starting to create textures");
115
    NamedTextures textures = scene.CreateTextures();
116 117
    LOG_VERBOSE("Done creating textures");

118
    LOG_VERBOSE("Starting to create lights");
119
    pstd::vector<Light> allLights;
120
    std::map<int, pstd::vector<Light> *> shapeIndexToAreaLights;
121

122
    infiniteLights = alloc.new_object<pstd::vector<Light>>(alloc);
M
Matt Pharr 已提交
123

124
    for (Light l : scene.CreateLights(textures, &shapeIndexToAreaLights)) {
M
Matt Pharr 已提交
125
        if (l.Is<UniformInfiniteLight>() || l.Is<ImageInfiniteLight>() ||
126
            l.Is<PortalImageInfiniteLight>())
127
            infiniteLights->push_back(l);
M
Matt Pharr 已提交
128 129 130

        allLights.push_back(l);
    }
131
    LOG_VERBOSE("Done creating lights");
M
Matt Pharr 已提交
132

133 134 135
    LOG_VERBOSE("Starting to create materials");
    std::map<std::string, pbrt::Material> namedMaterials;
    std::vector<pbrt::Material> materials;
M
Matt Pharr 已提交
136
    scene.CreateMaterials(textures, &namedMaterials, &materials);
137

M
Matt Pharr 已提交
138 139 140
    haveBasicEvalMaterial.fill(false);
    haveUniversalEvalMaterial.fill(false);
    haveSubsurface = false;
141 142
    for (Material m : materials)
        updateMaterialNeeds(m, &haveBasicEvalMaterial, &haveUniversalEvalMaterial,
143
                            &haveSubsurface, &haveMedia);
144 145
    for (const auto &m : namedMaterials)
        updateMaterialNeeds(m.second, &haveBasicEvalMaterial, &haveUniversalEvalMaterial,
146
                            &haveSubsurface, &haveMedia);
147 148
    LOG_VERBOSE("Finished creating materials");

149 150
    if (Options->useGPU) {
#ifdef PBRT_BUILD_GPU_RENDERER
151 152 153
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
154 155
        aggregate = new OptiXAggregate(scene, mr, textures, shapeIndexToAreaLights, media,
                                       namedMaterials, materials);
156 157 158 159
#else
        LOG_FATAL("Options->useGPU was set without PBRT_BUILD_GPU_RENDERER enabled");
#endif
    } else
160 161
        aggregate = new CPUAggregate(scene, textures, shapeIndexToAreaLights, media,
                                     namedMaterials, materials);
M
Matt Pharr 已提交
162 163

    // Preprocess the light sources
164
    for (Light light : allLights)
165
        light.Preprocess(aggregate->Bounds());
M
Matt Pharr 已提交
166 167 168 169 170 171 172

    bool haveLights = !allLights.empty();
    for (const auto &m : media)
        haveLights |= m.second.IsEmissive();
    if (!haveLights)
        ErrorExit("No light sources specified");

173
    LOG_VERBOSE("Starting to create light sampler");
M
Matt Pharr 已提交
174 175 176 177
    std::string lightSamplerName =
        scene.integrator.parameters.GetOneString("lightsampler", "bvh");
    if (allLights.size() == 1)
        lightSamplerName = "uniform";
178
    lightSampler = LightSampler::Create(lightSamplerName, allLights, alloc);
179
    LOG_VERBOSE("Finished creating light sampler");
M
Matt Pharr 已提交
180

181
    if (scene.integrator.name != "path" && scene.integrator.name != "volpath")
182
        Warning(&scene.integrator.loc,
183
                "Ignoring specified integrator \"%s\": the wavefront integrator "
184 185
                "always uses a \"volpath\" integrator.",
                scene.integrator.name);
186

M
Matt Pharr 已提交
187 188 189 190
    // Integrator parameters
    regularize = scene.integrator.parameters.GetOneBool("regularize", false);
    maxDepth = scene.integrator.parameters.GetOneInt("maxdepth", 5);

191 192 193 194 195 196 197 198
    camera = scene.GetCamera();
    film = camera.GetFilm();
    filter = film.GetFilter();
    sampler = scene.GetSampler();

    initializeVisibleSurface = film.UsesVisibleSurface();
    samplesPerPixel = sampler.SamplesPerPixel();

199 200
    // Warn about unsupported stuff...
    if (Options->forceDiffuse)
201
        ErrorExit("The wavefront integrator does not support --force-diffuse.");
202
    if (Options->writePartialImages)
203
        Warning("The wavefront integrator does not support --write-partial-images.");
204
    if (Options->recordPixelStatistics)
205
        ErrorExit("The wavefront integrator does not support --pixelstats.");
206
    if (!Options->mseReferenceImage.empty())
207
        ErrorExit("The wavefront integrator does not support --mse-reference-image.");
208
    if (!Options->mseReferenceOutput.empty())
209
        ErrorExit("The wavefront integrator does not support --mse-reference-out.");
210

211 212
        ///////////////////////////////////////////////////////////////////////////
        // Allocate storage for all of the queues/buffers...
M
Matt Pharr 已提交
213

214
#ifdef PBRT_BUILD_GPU_RENDERER
215 216 217 218 219 220 221
    size_t startSize = 0;
    if (Options->useGPU) {
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
        startSize = mr->BytesAllocated();
    }
222
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
223

224
    // Compute number of scanlines to render per pass
M
Matt Pharr 已提交
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
    Vector2i resolution = film.PixelBounds().Diagonal();
    // TODO: make this configurable. Base it on the amount of GPU memory?
    int maxSamples = 1024 * 1024;
    scanlinesPerPass = std::max(1, maxSamples / resolution.x);
    int nPasses = (resolution.y + scanlinesPerPass - 1) / scanlinesPerPass;
    scanlinesPerPass = (resolution.y + nPasses - 1) / nPasses;
    maxQueueSize = resolution.x * scanlinesPerPass;
    LOG_VERBOSE("Will render in %d passes %d scanlines per pass\n", nPasses,
                scanlinesPerPass);

    pixelSampleState = SOA<PixelSampleState>(maxQueueSize, alloc);

    rayQueues[0] = alloc.new_object<RayQueue>(maxQueueSize, alloc);
    rayQueues[1] = alloc.new_object<RayQueue>(maxQueueSize, alloc);

    shadowRayQueue = alloc.new_object<ShadowRayQueue>(maxQueueSize, alloc);

    if (haveSubsurface) {
        bssrdfEvalQueue =
            alloc.new_object<GetBSSRDFAndProbeRayQueue>(maxQueueSize, alloc);
        subsurfaceScatterQueue =
            alloc.new_object<SubsurfaceScatterQueue>(maxQueueSize, alloc);
    }

249
    if (infiniteLights->size())
M
Matt Pharr 已提交
250 251 252 253 254 255 256 257 258 259 260 261 262
        escapedRayQueue = alloc.new_object<EscapedRayQueue>(maxQueueSize, alloc);
    hitAreaLightQueue = alloc.new_object<HitAreaLightQueue>(maxQueueSize, alloc);

    basicEvalMaterialQueue = alloc.new_object<MaterialEvalQueue>(
        maxQueueSize, alloc,
        pstd::MakeConstSpan(&haveBasicEvalMaterial[1], haveBasicEvalMaterial.size() - 1));
    universalEvalMaterialQueue = alloc.new_object<MaterialEvalQueue>(
        maxQueueSize, alloc,
        pstd::MakeConstSpan(&haveUniversalEvalMaterial[1],
                            haveUniversalEvalMaterial.size() - 1));

    if (haveMedia) {
        mediumSampleQueue = alloc.new_object<MediumSampleQueue>(maxQueueSize, alloc);
263 264 265 266 267 268 269 270

        // TODO: in the presence of multiple PhaseFunction implementations,
        // it could be worthwhile to see which are present in the scene and
        // then initialize havePhase accordingly...
        pstd::array<bool, PhaseFunction::NumTags()> havePhase;
        havePhase.fill(true);
        mediumScatterQueue =
            alloc.new_object<MediumScatterQueue>(maxQueueSize, alloc, havePhase);
M
Matt Pharr 已提交
271 272 273 274
    }

    stats = alloc.new_object<Stats>(maxDepth, alloc);

275
#ifdef PBRT_BUILD_GPU_RENDERER
276 277 278 279 280 281 282
    if (Options->useGPU) {
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
        size_t endSize = mr->BytesAllocated();
        pathIntegratorBytes += endSize - startSize;
    }
283
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
284 285
}

286 287
// WavefrontPathIntegrator Method Definitions
Float WavefrontPathIntegrator::Render() {
288 289
    Bounds2i pixelBounds = film.PixelBounds();
    Vector2i resolution = pixelBounds.Diagonal();
M
Matt Pharr 已提交
290 291 292 293 294 295 296 297 298 299 300 301

    GUI *gui = nullptr;
    // FIXME: camera animation; whatever...
    Transform renderFromCamera = camera.GetCameraTransform().RenderFromCamera().startTransform;
    Transform cameraFromRender = Inverse(renderFromCamera);
    Transform cameraFromWorld = camera.GetCameraTransform().CameraFromWorld(camera.SampleTime(0.f));
    if (Options->interactive) {
        if (!Options->displayServer.empty())
            ErrorExit("--interactive and --display-server cannot be used at the same time.");
        gui = new GUI(film.GetFilename(), resolution, aggregate->Bounds());
    }

302
    Timer timer;
303
    // Prefetch allocations to GPU memory
304
#ifdef PBRT_BUILD_GPU_RENDERER
305 306
    if (Options->useGPU)
        PrefetchGPUAllocations();
307
#endif  // PBRT_BUILD_GPU_RENDERER
308

309
    // Launch thread to copy image for display server, if enabled
310 311
    if (!Options->displayServer.empty())
        StartDisplayThread();
M
Matt Pharr 已提交
312

313
    // Loop over sample indices and evaluate pixel samples
314
    int firstSampleIndex = 0, lastSampleIndex = samplesPerPixel;
315
    // Update sample index range based on debug start, if provided
M
Matt Pharr 已提交
316
    if (!Options->debugStart.empty()) {
317
        std::vector<int> values = SplitStringToInts(Options->debugStart, ',');
318 319
        if (values.size() != 1 && values.size() != 2)
            ErrorExit("Expected either one or two integer values for --debugstart.");
320

321
        firstSampleIndex = values[0];
322 323 324 325
        if (values.size() == 2)
            lastSampleIndex = firstSampleIndex + values[1];
        else
            lastSampleIndex = firstSampleIndex + 1;
M
Matt Pharr 已提交
326 327
    }

328
    ProgressReporter progress(lastSampleIndex - firstSampleIndex, "Rendering",
M
Matt Pharr 已提交
329 330
                              Options->quiet || Options->interactive, Options->useGPU);
    for (int sampleIndex = firstSampleIndex; sampleIndex < lastSampleIndex; ++sampleIndex) {
M
Matt Pharr 已提交
331 332
        // Attempt to work around issue #145.
#if !(defined(PBRT_IS_WINDOWS) && defined(PBRT_BUILD_GPU_RENDERER) && \
M
Matt Pharr 已提交
333
      __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 1)
334
        CheckCallbackScope _([&]() {
335
            return StringPrintf("Wavefront rendering failed at sample %d. Debug with "
336 337 338
                                "\"--debugstart %d\"\n",
                                sampleIndex, sampleIndex);
        });
M
Matt Pharr 已提交
339
#endif
340

341
        // Render image for sample _sampleIndex_
M
Matt Pharr 已提交
342
        LOG_VERBOSE("Starting to submit work for sample %d", sampleIndex);
M
Matt Pharr 已提交
343 344
        for (int y0 = pixelBounds.pMin.y; y0 < pixelBounds.pMax.y;
             y0 += scanlinesPerPass) {
345
            // Generate camera rays for current scanline range
346
            RayQueue *cameraRayQueue = CurrentRayQueue(0);
347 348
            Do(
                "Reset ray queue", PBRT_CPU_GPU_LAMBDA() {
349
                    PBRT_DBG("Starting scanlines at y0 = %d, sample %d / %d\n", y0,
350
                             sampleIndex, samplesPerPixel);
351 352
                    cameraRayQueue->Reset();
                });
M
Matt Pharr 已提交
353 354 355 356 357

            Transform cameraMotion;
            if (gui)
                cameraMotion = renderFromCamera * gui->GetCameraTransform() * cameraFromRender;
            GenerateCameraRays(y0, cameraMotion, sampleIndex);
358
            Do(
359
                "Update camera ray stats",
360
                PBRT_CPU_GPU_LAMBDA() { stats->cameraRays += cameraRayQueue->Size(); });
M
Matt Pharr 已提交
361

M
Matt Pharr 已提交
362
            // Trace rays and estimate radiance up to maximum ray depth
363
            for (int wavefrontDepth = 0; true; ++wavefrontDepth) {
364
                // Reset queues before tracing rays
365
                RayQueue *nextQueue = NextRayQueue(wavefrontDepth);
366 367
                Do(
                    "Reset queues before tracing rays", PBRT_CPU_GPU_LAMBDA() {
M
Matt Pharr 已提交
368
                        nextQueue->Reset();
369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
                        // Reset queues before tracing next batch of rays
                        if (mediumSampleQueue)
                            mediumSampleQueue->Reset();
                        if (mediumScatterQueue)
                            mediumScatterQueue->Reset();

                        if (escapedRayQueue)
                            escapedRayQueue->Reset();
                        hitAreaLightQueue->Reset();

                        basicEvalMaterialQueue->Reset();
                        universalEvalMaterialQueue->Reset();

                        if (bssrdfEvalQueue)
                            bssrdfEvalQueue->Reset();
                        if (subsurfaceScatterQueue)
                            subsurfaceScatterQueue->Reset();
                    });

                // Follow active ray paths and accumulate radiance estimates
389
                GenerateRaySamples(wavefrontDepth, sampleIndex);
390

391
                // Find closest intersections along active rays
392
                aggregate->IntersectClosest(
393 394 395
                    maxQueueSize, CurrentRayQueue(wavefrontDepth), escapedRayQueue,
                    hitAreaLightQueue, basicEvalMaterialQueue, universalEvalMaterialQueue,
                    mediumSampleQueue, NextRayQueue(wavefrontDepth));
M
Matt Pharr 已提交
396

397
                if (wavefrontDepth > 0) {
398
                    // As above, with the indexing...
399
                    RayQueue *statsQueue = CurrentRayQueue(wavefrontDepth);
400 401
                    Do(
                        "Update indirect ray stats", PBRT_CPU_GPU_LAMBDA() {
402
                            stats->indirectRays[wavefrontDepth] += statsQueue->Size();
403
                        });
404
                }
405 406 407 408 409 410 411

                SampleMediumInteraction(wavefrontDepth);

                HandleEscapedRays();

                HandleEmissiveIntersection();

412
                if (wavefrontDepth == maxDepth)
M
Matt Pharr 已提交
413
                    break;
414

M
Matt Pharr 已提交
415
                EvaluateMaterialsAndBSDFs(wavefrontDepth, cameraMotion);
416

417
                // Do immediately so that we have space for shadow rays for subsurface..
418
                TraceShadowRays(wavefrontDepth);
419 420

                SampleSubsurface(wavefrontDepth);
M
Matt Pharr 已提交
421
            }
M
Matt Pharr 已提交
422

M
Matt Pharr 已提交
423
            UpdateFilm();
M
Matt Pharr 已提交
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
        }

        // Copy updated film pixels to buffer for the display server.
        if (Options->useGPU && !Options->displayServer.empty())
            UpdateDisplayRGBFromFilm(pixelBounds);

        if (gui) {
            RGB *rgb = gui->MapFramebuffer();
            UpdateFramebufferFromFilm(pixelBounds, gui->exposure, rgb);
            gui->UnmapFramebuffer();

            if (gui->printCameraTransform) {
                SquareMatrix<4> cfw = (Inverse(gui->GetCameraTransform()) * cameraFromWorld).GetMatrix();
                Printf("Current camera transform:\nTransform [ ");
                for (int i = 0; i < 16; ++i)
                    Printf("%f ", cfw[i % 4][i / 4]);
                Printf("]\n");
                std::fflush(stdout);
                gui->printCameraTransform = false;
            }

            DisplayState state = gui->RefreshDisplay();
            if (state == DisplayState::EXIT)
                break;
            else if (state == DisplayState::RESET) {
                sampleIndex = firstSampleIndex - 1;
                ParallelFor("Reset pixels", resolution.x * resolution.y,
                            PBRT_CPU_GPU_LAMBDA(int i) {
                                int x = i % resolution.x, y = i / resolution.x;
                                film.ResetPixel(pixelBounds.pMin + Vector2i(x, y));
                            });
            }
M
Matt Pharr 已提交
456 457 458 459
        }

        progress.Update();
    }
M
Matt Pharr 已提交
460 461 462 463 464 465

    if (gui) {
        delete gui;
        gui = nullptr;
    }

M
Matt Pharr 已提交
466
    progress.Done();
467

468 469 470
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU)
        GPUWait();
471
#endif  // PBRT_BUILD_GPU_RENDERER
472
    Float seconds = timer.ElapsedSeconds();
473

474 475
    // Shut down display server thread, if active
    StopDisplayThread();
M
Matt Pharr 已提交
476

477
    return seconds;
M
Matt Pharr 已提交
478 479
}

480
void WavefrontPathIntegrator::HandleEscapedRays() {
481 482
    if (!escapedRayQueue)
        return;
483 484
    ForAllQueued(
        "Handle escaped rays", escapedRayQueue, maxQueueSize,
485
        PBRT_CPU_GPU_LAMBDA(const EscapedRayWorkItem w) {
486
            // Compute weighted radiance for escaped ray
487
            SampledSpectrum L(0.f);
488
            for (const auto &light : *infiniteLights) {
489 490
                if (SampledSpectrum Le = light.Le(Ray(w.rayo, w.rayd), w.lambda); Le) {
                    // Compute path radiance contribution from infinite light
491 492
                    PBRT_DBG("L %f %f %f %f beta %f %f %f %f Le %f %f %f %f", L[0], L[1],
                             L[2], L[3], w.beta[0], w.beta[1], w.beta[2], w.beta[3],
493
                             Le[0], Le[1], Le[2], Le[3]);
494
                    PBRT_DBG("pdf uni %f %f %f %f pdf nee %f %f %f %f", w.inv_w_u[0],
M
Matt Pharr 已提交
495 496
                             w.inv_w_u[1], w.inv_w_u[2], w.inv_w_u[3], w.inv_w_l[0],
                             w.inv_w_l[1], w.inv_w_l[2], w.inv_w_l[3]);
497

498
                    if (w.depth == 0 || w.specularBounce) {
499
                        L += w.beta * Le / w.inv_w_u.Average();
500 501 502
                    } else {
                        // Compute MIS-weighted radiance contribution from infinite light
                        LightSampleContext ctx = w.prevIntrCtx;
503
                        Float lightChoicePDF = lightSampler.PMF(ctx, light);
M
Matt Pharr 已提交
504 505
                        SampledSpectrum inv_w_l =
                            w.inv_w_l * lightChoicePDF * light.PDF_Li(ctx, w.rayd, true);
506
                        L += w.beta * Le / (w.inv_w_u + inv_w_l).Average();
507 508
                    }
                }
509
            }
510

511
            // Update pixel radiance if ray's radiance is nonzero
512
            if (L) {
513 514
                PBRT_DBG("Added L %f %f %f %f for escaped ray pixel index %d\n", L[0],
                         L[1], L[2], L[3], w.pixelIndex);
515

516 517 518
                L += pixelSampleState.L[w.pixelIndex];
                pixelSampleState.L[w.pixelIndex] = L;
            }
519
        });
M
Matt Pharr 已提交
520 521
}

522
void WavefrontPathIntegrator::HandleEmissiveIntersection() {
M
Matt Pharr 已提交
523 524
    ForAllQueued(
        "Handle emitters hit by indirect rays", hitAreaLightQueue, maxQueueSize,
525
        PBRT_CPU_GPU_LAMBDA(const HitAreaLightWorkItem w) {
526
            // Find emitted radiance from surface that ray hit
M
Matt Pharr 已提交
527
            SampledSpectrum Le = w.areaLight.L(w.p, w.n, w.uv, w.wo, w.lambda);
M
Matt Pharr 已提交
528 529
            if (!Le)
                return;
530
            PBRT_DBG("Got Le %f %f %f %f from hit area light at depth %d\n", Le[0], Le[1],
531
                     Le[2], Le[3], w.depth);
M
Matt Pharr 已提交
532

533
            // Compute area light's weighted radiance contribution to the path
534
            SampledSpectrum L(0.f);
535
            if (w.depth == 0 || w.specularBounce) {
536
                L = w.beta * Le / w.inv_w_u.Average();
M
Matt Pharr 已提交
537
            } else {
538
                // Compute MIS-weighted radiance contribution from area light
539 540
                Vector3f wi = -w.wo;
                LightSampleContext ctx = w.prevIntrCtx;
541
                Float lightChoicePDF = lightSampler.PMF(ctx, w.areaLight);
542
                Float lightPDF = lightChoicePDF * w.areaLight.PDF_Li(ctx, wi, true);
M
Matt Pharr 已提交
543

544 545 546
                SampledSpectrum inv_w_u = w.inv_w_u;
                SampledSpectrum inv_w_l = w.inv_w_l * lightPDF;
                L = w.beta * Le / (inv_w_u + inv_w_l).Average();
M
Matt Pharr 已提交
547 548
            }

549
            PBRT_DBG("Added L %f %f %f %f for pixel index %d\n", L[0], L[1], L[2], L[3],
550
                     w.pixelIndex);
551

552
            // Update _L_ in _PixelSampleState_ for area light's radiance
553 554
            L += pixelSampleState.L[w.pixelIndex];
            pixelSampleState.L[w.pixelIndex] = L;
M
Matt Pharr 已提交
555 556 557
        });
}

558
void WavefrontPathIntegrator::TraceShadowRays(int wavefrontDepth) {
559
    if (haveMedia)
560
        aggregate->IntersectShadowTr(maxQueueSize, shadowRayQueue, &pixelSampleState);
561
    else
562
        aggregate->IntersectShadow(maxQueueSize, shadowRayQueue, &pixelSampleState);
563
    // Reset shadow ray queue
564 565
    Do(
        "Reset shadowRayQueue", PBRT_CPU_GPU_LAMBDA() {
566
            stats->shadowRays[wavefrontDepth] += shadowRayQueue->Size();
567 568 569 570
            shadowRayQueue->Reset();
        });
}

571
WavefrontPathIntegrator::Stats::Stats(int maxDepth, Allocator alloc)
M
Matt Pharr 已提交
572 573
    : indirectRays(maxDepth + 1, alloc), shadowRays(maxDepth, alloc) {}

574
std::string WavefrontPathIntegrator::Stats::Print() const {
M
Matt Pharr 已提交
575 576 577 578 579 580 581 582 583 584 585 586
    std::string s;
    s += StringPrintf("    %-42s               %12" PRIu64 "\n", "Camera rays",
                      cameraRays);
    for (int i = 1; i < indirectRays.size(); ++i)
        s += StringPrintf("    %-42s               %12" PRIu64 "\n",
                          StringPrintf("Indirect rays, depth %-3d", i), indirectRays[i]);
    for (int i = 0; i < shadowRays.size(); ++i)
        s += StringPrintf("    %-42s               %12" PRIu64 "\n",
                          StringPrintf("Shadow rays, depth %-3d", i), shadowRays[i]);
    return s;
}

587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624
#ifdef PBRT_BUILD_GPU_RENDERER
void WavefrontPathIntegrator::PrefetchGPUAllocations() {
    int deviceIndex;
    CUDA_CHECK(cudaGetDevice(&deviceIndex));
    int hasConcurrentManagedAccess;
    CUDA_CHECK(cudaDeviceGetAttribute(&hasConcurrentManagedAccess,
                                      cudaDevAttrConcurrentManagedAccess,
                                      deviceIndex));

    // Copy all of the scene data structures over to GPU memory.  This
    // ensures that there isn't a big performance hitch for the first batch
    // of rays as that stuff is copied over on demand.
    if (hasConcurrentManagedAccess) {
        // Set things up so that we can still have read from the
        // WavefrontPathIntegrator struct on the CPU without hurting
        // performance. (This makes it possible to use the values of things
        // like WavefrontPathIntegrator::haveSubsurface to conditionally launch
        // kernels according to what's in the scene...)
        CUDA_CHECK(cudaMemAdvise(this, sizeof(*this), cudaMemAdviseSetReadMostly,
                                 /* ignored argument */ 0));
        CUDA_CHECK(cudaMemAdvise(this, sizeof(*this),
                                 cudaMemAdviseSetPreferredLocation, deviceIndex));

        // Copy all of the scene data structures over to GPU memory.  This
        // ensures that there isn't a big performance hitch for the first batch
        // of rays as that stuff is copied over on demand.
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
        mr->PrefetchToGPU();
    } else {
        // TODO: on systems with basic unified memory, just launching a
        // kernel should cause everything to be copied over. Is an empty
        // kernel sufficient?
    }
}
#endif // PBRT_BUILD_GPU_RENDERER

625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
void WavefrontPathIntegrator::StartDisplayThread() {
    Bounds2i pixelBounds = film.PixelBounds();
    Vector2i resolution = pixelBounds.Diagonal();

#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU) {
        // Allocate staging memory on the GPU to store the current WIP
        // image.
        CUDA_CHECK(cudaMalloc(&displayRGB, resolution.x * resolution.y * sizeof(RGB)));
        CUDA_CHECK(cudaMemset(displayRGB, 0, resolution.x * resolution.y * sizeof(RGB)));

        // Host-side memory for the WIP Image.  We'll just let this leak so
        // that the lambda passed to DisplayDynamic below doesn't access
        // freed memory after Render() returns...
        displayRGBHost = new RGB[resolution.x * resolution.y];

        // Note that we can't just capture |this| for the member variables
        // below because with managed memory on Windows, the CPU and GPU
        // can't be accessing the same memory concurrently...
M
Matt Pharr 已提交
644 645 646
        copyThread = new std::thread([exitCopyThread = this->exitCopyThread,
                                      displayRGBHost = this->displayRGBHost,
                                      displayRGB = this->displayRGB, resolution]() {
647 648 649 650 651 652 653 654 655 656 657
            GPURegisterThread("DISPLAY_SERVER_COPY_THREAD");

            // Copy back to the CPU using a separate stream so that we can
            // periodically but asynchronously pick up the latest results
            // from the GPU.
            cudaStream_t memcpyStream;
            CUDA_CHECK(cudaStreamCreate(&memcpyStream));
            GPUNameStream(memcpyStream, "DISPLAY_SERVER_COPY_STREAM");

            // Copy back to the host from the GPU buffer, without any
            // synthronization.
M
Matt Pharr 已提交
658
            while (!*exitCopyThread) {
659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704
                CUDA_CHECK(cudaMemcpyAsync(displayRGBHost, displayRGB,
                                           resolution.x * resolution.y * sizeof(RGB),
                                           cudaMemcpyDeviceToHost, memcpyStream));
                std::this_thread::sleep_for(std::chrono::milliseconds(50));

                CUDA_CHECK(cudaStreamSynchronize(memcpyStream));
            }

            // Copy one more time to get the final image before exiting.
            CUDA_CHECK(cudaMemcpy(displayRGBHost, displayRGB,
                                  resolution.x * resolution.y * sizeof(RGB),
                                  cudaMemcpyDeviceToHost));
            CUDA_CHECK(cudaDeviceSynchronize());
        });

        // Now on the CPU side, give the display system a lambda that
        // copies values from |displayRGBHost| into its buffers used for
        // sending messages to the display program (i.e., tev).
        DisplayDynamic(film.GetFilename(), {resolution.x, resolution.y},
                       {"R", "G", "B"},
                       [resolution, this](Bounds2i b, pstd::span<pstd::span<Float>> displayValue) {
                           int index = 0;
                           for (Point2i p : b) {
                               RGB rgb = displayRGBHost[p.x + p.y * resolution.x];
                               displayValue[0][index] = rgb.r;
                               displayValue[1][index] = rgb.g;
                               displayValue[2][index] = rgb.b;
                               ++index;
                           }
                       });
    } else
#endif  // PBRT_BUILD_GPU_RENDERER
        DisplayDynamic(film.GetFilename(), Point2i(pixelBounds.Diagonal()), {"R", "G", "B"},
                       [pixelBounds, this](Bounds2i b,
                                           pstd::span<pstd::span<Float>> displayValue) {
                           int index = 0;
                           for (Point2i p : b) {
                               RGB rgb =
                                   film.GetPixelRGB(pixelBounds.pMin + p, 1.f /* splat scale */);
                               for (int c = 0; c < 3; ++c)
                                   displayValue[c][index] = rgb[c];
                               ++index;
                           }
                       });
}

M
Matt Pharr 已提交
705
void WavefrontPathIntegrator::UpdateDisplayRGBFromFilm(Bounds2i pixelBounds) {
706
#ifdef PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
707 708 709 710 711 712 713
    Vector2i resolution = pixelBounds.Diagonal();
    GPUParallelFor(
                   "Update Display RGB Buffer", resolution.x * resolution.y,
                   PBRT_CPU_GPU_LAMBDA(int index) {
                       Point2i p(index % resolution.x, index / resolution.x);
                       displayRGB[index] = film.GetPixelRGB(p + pixelBounds.pMin);
                   });
714 715 716 717 718 719 720 721 722
#endif  //  PBRT_BUILD_GPU_RENDERER
}

void WavefrontPathIntegrator::StopDisplayThread() {
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU) {
        // Wait until rendering is all done before we start to shut down the
        // display stuff..
        if (!Options->displayServer.empty()) {
M
Matt Pharr 已提交
723 724 725 726
            *exitCopyThread = true;
            copyThread->join();
            delete copyThread;
            copyThread = nullptr;
727 728 729 730 731 732 733 734 735
        }

        // Another synchronization to make sure no kernels are running on the
        // GPU so that we can safely access unified memory from the CPU.
        GPUWait();
    }
#endif  // PBRT_BUILD_GPU_RENDERER
}

M
Matt Pharr 已提交
736 737 738 739 740 741 742 743 744 745
void WavefrontPathIntegrator::UpdateFramebufferFromFilm(Bounds2i pixelBounds, Float exposure,
                                                        RGB *rgb) {
    Vector2i resolution = pixelBounds.Diagonal();
    ParallelFor("Update framebuffer", resolution.x * resolution.y,
                PBRT_CPU_GPU_LAMBDA(int index) {
                    Point2i p(index % resolution.x, index / resolution.x);
                    rgb[index] = exposure * film.GetPixelRGB(p + film.PixelBounds().pMin);
                });
}

M
Matt Pharr 已提交
746
}  // namespace pbrt