integrator.cpp 31.2 KB
Newer Older
M
Matt Pharr 已提交
1 2 3 4
// pbrt is Copyright(c) 1998-2020 Matt Pharr, Wenzel Jakob, and Greg Humphreys.
// The pbrt source code is licensed under the Apache License, Version 2.0.
// SPDX: Apache-2.0

5
#include <pbrt/wavefront/integrator.h>
M
Matt Pharr 已提交
6 7 8 9 10

#include <pbrt/base/medium.h>
#include <pbrt/cameras.h>
#include <pbrt/film.h>
#include <pbrt/filters.h>
11 12 13
#ifdef PBRT_BUILD_GPU_RENDERER
#include <pbrt/gpu/aggregate.h>
#include <pbrt/gpu/memory.h>
14
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
15 16 17 18 19 20
#include <pbrt/lights.h>
#include <pbrt/lightsamplers.h>
#include <pbrt/util/color.h>
#include <pbrt/util/colorspace.h>
#include <pbrt/util/display.h>
#include <pbrt/util/file.h>
M
Matt Pharr 已提交
21
#include <pbrt/util/gui.h>
M
Matt Pharr 已提交
22 23 24 25 26
#include <pbrt/util/image.h>
#include <pbrt/util/log.h>
#include <pbrt/util/print.h>
#include <pbrt/util/progressreporter.h>
#include <pbrt/util/pstd.h>
27
#include <pbrt/util/spectrum.h>
M
Matt Pharr 已提交
28
#include <pbrt/util/stats.h>
M
Matt Pharr 已提交
29
#include <pbrt/util/string.h>
M
Matt Pharr 已提交
30
#include <pbrt/util/taggedptr.h>
31
#include <pbrt/wavefront/aggregate.h>
M
Matt Pharr 已提交
32

33
#include <atomic>
M
Matt Pharr 已提交
34 35 36 37
#include <cstring>
#include <iostream>
#include <map>

38
#ifdef PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
39 40
#include <cuda.h>
#include <cuda_runtime.h>
41
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
42 43 44

namespace pbrt {

45
STAT_MEMORY_COUNTER("Memory/Wavefront integrator pixel state", pathIntegratorBytes);
M
Matt Pharr 已提交
46

47 48 49
static void updateMaterialNeeds(
    Material m, pstd::array<bool, Material::NumTags()> *haveBasicEvalMaterial,
    pstd::array<bool, Material::NumTags()> *haveUniversalEvalMaterial,
50
    bool *haveSubsurface, bool *haveMedia) {
51 52 53 54 55 56 57 58 59 60 61 62
    if (!m)
        return;

    if (MixMaterial *mix = m.CastOrNullptr<MixMaterial>(); mix) {
        // This is a somewhat odd place for this check, but it's convenient...
        if (!m.CanEvaluateTextures(BasicTextureEvaluator()))
            ErrorExit("\"mix\" material has a texture that can't be evaluated with the "
                      "BasicTextureEvaluator, which is all that is currently supported "
                      "int the wavefront renderer--sorry! %s",
                      *mix);

        updateMaterialNeeds(mix->GetMaterial(0), haveBasicEvalMaterial,
63
                            haveUniversalEvalMaterial, haveSubsurface, haveMedia);
64
        updateMaterialNeeds(mix->GetMaterial(1), haveBasicEvalMaterial,
65
                            haveUniversalEvalMaterial, haveSubsurface, haveMedia);
66 67 68 69
        return;
    }

    *haveSubsurface |= m.HasSubsurfaceScattering();
70
    *haveMedia |= (m == nullptr);  // interface material
71 72 73 74 75 76 77 78

    FloatTexture displace = m.GetDisplacement();
    if (m.CanEvaluateTextures(BasicTextureEvaluator()) &&
        (!displace || BasicTextureEvaluator().CanEvaluate({displace}, {})))
        (*haveBasicEvalMaterial)[m.Tag()] = true;
    else
        (*haveUniversalEvalMaterial)[m.Tag()] = true;
}
79

80
WavefrontPathIntegrator::WavefrontPathIntegrator(
M
Matt Pharr 已提交
81
    pstd::pmr::memory_resource *memoryResource, BasicScene &scene)
M
Matt Pharr 已提交
82
    : memoryResource(memoryResource), exitCopyThread(new std::atomic<bool>(false)) {
83 84
    ThreadLocal<Allocator> threadAllocators(
        [memoryResource]() { return Allocator(memoryResource); });
85

86
    Allocator alloc = threadAllocators.Get();
87

M
Matt Pharr 已提交
88
    // Allocate all of the data structures that represent the scene...
89
    std::map<std::string, Medium> media = scene.CreateMedia();
M
Matt Pharr 已提交
90

91 92 93 94 95
    // "haveMedia" is a bit of a misnomer in that determines both whether
    // queues are allocated for the medium sampling kernels and they are
    // launched as well as whether the ray marching shadow ray kernel is
    // launched... Thus, it will be true if there actually are no media,
    // but some "interface" materials are present in the scene.
M
Matt Pharr 已提交
96
    haveMedia = false;
97
    // Check the shapes and instance definitions...
M
Matt Pharr 已提交
98 99 100 101 102 103
    for (const auto &shape : scene.shapes)
        if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
            haveMedia = true;
    for (const auto &shape : scene.animatedShapes)
        if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
            haveMedia = true;
104
    for (const auto &instanceDefinition : scene.instanceDefinitions) {
105 106 107 108 109 110 111
        for (const auto &shape : instanceDefinition.second->shapes)
            if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
                haveMedia = true;
        for (const auto &shape : instanceDefinition.second->animatedShapes)
            if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
                haveMedia = true;
    }
M
Matt Pharr 已提交
112

113 114
    // Textures
    LOG_VERBOSE("Starting to create textures");
115
    NamedTextures textures = scene.CreateTextures();
116 117
    LOG_VERBOSE("Done creating textures");

118
    LOG_VERBOSE("Starting to create lights");
119
    pstd::vector<Light> allLights;
120
    std::map<int, pstd::vector<Light> *> shapeIndexToAreaLights;
121

122
    infiniteLights = alloc.new_object<pstd::vector<Light>>(alloc);
M
Matt Pharr 已提交
123

124
    for (Light l : scene.CreateLights(textures, &shapeIndexToAreaLights)) {
M
Matt Pharr 已提交
125
        if (l.Is<UniformInfiniteLight>() || l.Is<ImageInfiniteLight>() ||
126
            l.Is<PortalImageInfiniteLight>())
127
            infiniteLights->push_back(l);
M
Matt Pharr 已提交
128 129 130

        allLights.push_back(l);
    }
131
    LOG_VERBOSE("Done creating lights");
M
Matt Pharr 已提交
132

133 134 135
    LOG_VERBOSE("Starting to create materials");
    std::map<std::string, pbrt::Material> namedMaterials;
    std::vector<pbrt::Material> materials;
M
Matt Pharr 已提交
136
    scene.CreateMaterials(textures, &namedMaterials, &materials);
137

M
Matt Pharr 已提交
138 139 140
    haveBasicEvalMaterial.fill(false);
    haveUniversalEvalMaterial.fill(false);
    haveSubsurface = false;
141 142
    for (Material m : materials)
        updateMaterialNeeds(m, &haveBasicEvalMaterial, &haveUniversalEvalMaterial,
143
                            &haveSubsurface, &haveMedia);
144 145
    for (const auto &m : namedMaterials)
        updateMaterialNeeds(m.second, &haveBasicEvalMaterial, &haveUniversalEvalMaterial,
146
                            &haveSubsurface, &haveMedia);
147 148
    LOG_VERBOSE("Finished creating materials");

149 150 151 152 153 154 155 156
    // Retrieve these here so that the CPU isn't writing to managed memory
    // concurrently with the OptiX acceleration-structure construction work
    // that follows. (Verbotten on Windows.)
    camera = scene.GetCamera();
    film = camera.GetFilm();
    filter = film.GetFilter();
    sampler = scene.GetSampler();

157 158
    if (Options->useGPU) {
#ifdef PBRT_BUILD_GPU_RENDERER
159 160 161
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
162 163
        aggregate = new OptiXAggregate(scene, mr, textures, shapeIndexToAreaLights, media,
                                       namedMaterials, materials);
164 165 166 167
#else
        LOG_FATAL("Options->useGPU was set without PBRT_BUILD_GPU_RENDERER enabled");
#endif
    } else
168 169
        aggregate = new CPUAggregate(scene, textures, shapeIndexToAreaLights, media,
                                     namedMaterials, materials);
M
Matt Pharr 已提交
170 171

    // Preprocess the light sources
172
    for (Light light : allLights)
173
        light.Preprocess(aggregate->Bounds());
M
Matt Pharr 已提交
174 175 176 177 178 179 180

    bool haveLights = !allLights.empty();
    for (const auto &m : media)
        haveLights |= m.second.IsEmissive();
    if (!haveLights)
        ErrorExit("No light sources specified");

181
    LOG_VERBOSE("Starting to create light sampler");
M
Matt Pharr 已提交
182 183 184 185
    std::string lightSamplerName =
        scene.integrator.parameters.GetOneString("lightsampler", "bvh");
    if (allLights.size() == 1)
        lightSamplerName = "uniform";
186
    lightSampler = LightSampler::Create(lightSamplerName, allLights, alloc);
187
    LOG_VERBOSE("Finished creating light sampler");
M
Matt Pharr 已提交
188

189
    if (scene.integrator.name != "path" && scene.integrator.name != "volpath")
190
        Warning(&scene.integrator.loc,
191
                "Ignoring specified integrator \"%s\": the wavefront integrator "
192 193
                "always uses a \"volpath\" integrator.",
                scene.integrator.name);
194

M
Matt Pharr 已提交
195 196 197 198
    // Integrator parameters
    regularize = scene.integrator.parameters.GetOneBool("regularize", false);
    maxDepth = scene.integrator.parameters.GetOneInt("maxdepth", 5);

199 200 201
    initializeVisibleSurface = film.UsesVisibleSurface();
    samplesPerPixel = sampler.SamplesPerPixel();

202 203
    // Warn about unsupported stuff...
    if (Options->forceDiffuse)
204
        ErrorExit("The wavefront integrator does not support --force-diffuse.");
205
    if (Options->writePartialImages)
206
        Warning("The wavefront integrator does not support --write-partial-images.");
207
    if (Options->recordPixelStatistics)
208
        ErrorExit("The wavefront integrator does not support --pixelstats.");
209
    if (!Options->mseReferenceImage.empty())
210
        ErrorExit("The wavefront integrator does not support --mse-reference-image.");
211
    if (!Options->mseReferenceOutput.empty())
212
        ErrorExit("The wavefront integrator does not support --mse-reference-out.");
213

214 215
        ///////////////////////////////////////////////////////////////////////////
        // Allocate storage for all of the queues/buffers...
M
Matt Pharr 已提交
216

217
#ifdef PBRT_BUILD_GPU_RENDERER
218 219 220 221 222 223 224
    size_t startSize = 0;
    if (Options->useGPU) {
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
        startSize = mr->BytesAllocated();
    }
225
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
226

227
    // Compute number of scanlines to render per pass
M
Matt Pharr 已提交
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
    Vector2i resolution = film.PixelBounds().Diagonal();
    // TODO: make this configurable. Base it on the amount of GPU memory?
    int maxSamples = 1024 * 1024;
    scanlinesPerPass = std::max(1, maxSamples / resolution.x);
    int nPasses = (resolution.y + scanlinesPerPass - 1) / scanlinesPerPass;
    scanlinesPerPass = (resolution.y + nPasses - 1) / nPasses;
    maxQueueSize = resolution.x * scanlinesPerPass;
    LOG_VERBOSE("Will render in %d passes %d scanlines per pass\n", nPasses,
                scanlinesPerPass);

    pixelSampleState = SOA<PixelSampleState>(maxQueueSize, alloc);

    rayQueues[0] = alloc.new_object<RayQueue>(maxQueueSize, alloc);
    rayQueues[1] = alloc.new_object<RayQueue>(maxQueueSize, alloc);

    shadowRayQueue = alloc.new_object<ShadowRayQueue>(maxQueueSize, alloc);

    if (haveSubsurface) {
        bssrdfEvalQueue =
            alloc.new_object<GetBSSRDFAndProbeRayQueue>(maxQueueSize, alloc);
        subsurfaceScatterQueue =
            alloc.new_object<SubsurfaceScatterQueue>(maxQueueSize, alloc);
    }

252
    if (infiniteLights->size())
M
Matt Pharr 已提交
253 254 255 256 257 258 259 260 261 262 263 264 265
        escapedRayQueue = alloc.new_object<EscapedRayQueue>(maxQueueSize, alloc);
    hitAreaLightQueue = alloc.new_object<HitAreaLightQueue>(maxQueueSize, alloc);

    basicEvalMaterialQueue = alloc.new_object<MaterialEvalQueue>(
        maxQueueSize, alloc,
        pstd::MakeConstSpan(&haveBasicEvalMaterial[1], haveBasicEvalMaterial.size() - 1));
    universalEvalMaterialQueue = alloc.new_object<MaterialEvalQueue>(
        maxQueueSize, alloc,
        pstd::MakeConstSpan(&haveUniversalEvalMaterial[1],
                            haveUniversalEvalMaterial.size() - 1));

    if (haveMedia) {
        mediumSampleQueue = alloc.new_object<MediumSampleQueue>(maxQueueSize, alloc);
266 267 268 269 270 271 272 273

        // TODO: in the presence of multiple PhaseFunction implementations,
        // it could be worthwhile to see which are present in the scene and
        // then initialize havePhase accordingly...
        pstd::array<bool, PhaseFunction::NumTags()> havePhase;
        havePhase.fill(true);
        mediumScatterQueue =
            alloc.new_object<MediumScatterQueue>(maxQueueSize, alloc, havePhase);
M
Matt Pharr 已提交
274 275 276 277
    }

    stats = alloc.new_object<Stats>(maxDepth, alloc);

278
#ifdef PBRT_BUILD_GPU_RENDERER
279 280 281 282 283 284 285
    if (Options->useGPU) {
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
        size_t endSize = mr->BytesAllocated();
        pathIntegratorBytes += endSize - startSize;
    }
286
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
287 288
}

289 290
// WavefrontPathIntegrator Method Definitions
Float WavefrontPathIntegrator::Render() {
291 292
    Bounds2i pixelBounds = film.PixelBounds();
    Vector2i resolution = pixelBounds.Diagonal();
M
Matt Pharr 已提交
293 294 295

    GUI *gui = nullptr;
    // FIXME: camera animation; whatever...
M
Matt Pharr 已提交
296 297
    Transform renderFromCamera =
        camera.GetCameraTransform().RenderFromCamera().startTransform;
M
Matt Pharr 已提交
298
    Transform cameraFromRender = Inverse(renderFromCamera);
M
Matt Pharr 已提交
299 300
    Transform cameraFromWorld =
        camera.GetCameraTransform().CameraFromWorld(camera.SampleTime(0.f));
M
Matt Pharr 已提交
301 302
    if (Options->interactive) {
        if (!Options->displayServer.empty())
M
Matt Pharr 已提交
303 304
            ErrorExit(
                "--interactive and --display-server cannot be used at the same time.");
M
Matt Pharr 已提交
305 306 307
        gui = new GUI(film.GetFilename(), resolution, aggregate->Bounds());
    }

308
    Timer timer;
309
    // Prefetch allocations to GPU memory
310
#ifdef PBRT_BUILD_GPU_RENDERER
311 312
    if (Options->useGPU)
        PrefetchGPUAllocations();
313
#endif  // PBRT_BUILD_GPU_RENDERER
314

315
    // Launch thread to copy image for display server, if enabled
316 317
    if (!Options->displayServer.empty())
        StartDisplayThread();
M
Matt Pharr 已提交
318

319
    // Loop over sample indices and evaluate pixel samples
320
    int firstSampleIndex = 0, lastSampleIndex = samplesPerPixel;
321
    // Update sample index range based on debug start, if provided
M
Matt Pharr 已提交
322
    if (!Options->debugStart.empty()) {
323
        std::vector<int> values = SplitStringToInts(Options->debugStart, ',');
324 325
        if (values.size() != 1 && values.size() != 2)
            ErrorExit("Expected either one or two integer values for --debugstart.");
326

327
        firstSampleIndex = values[0];
328 329 330 331
        if (values.size() == 2)
            lastSampleIndex = firstSampleIndex + values[1];
        else
            lastSampleIndex = firstSampleIndex + 1;
M
Matt Pharr 已提交
332 333
    }

334
    ProgressReporter progress(lastSampleIndex - firstSampleIndex, "Rendering",
M
Matt Pharr 已提交
335
                              Options->quiet || Options->interactive, Options->useGPU);
M
Matt Pharr 已提交
336 337
    for (int sampleIndex = firstSampleIndex; sampleIndex < lastSampleIndex;
         ++sampleIndex) {
M
Matt Pharr 已提交
338 339
        // Attempt to work around issue #145.
#if !(defined(PBRT_IS_WINDOWS) && defined(PBRT_BUILD_GPU_RENDERER) && \
M
Matt Pharr 已提交
340
      __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 1)
341
        CheckCallbackScope _([&]() {
342
            return StringPrintf("Wavefront rendering failed at sample %d. Debug with "
343 344 345
                                "\"--debugstart %d\"\n",
                                sampleIndex, sampleIndex);
        });
M
Matt Pharr 已提交
346
#endif
347

348
        // Render image for sample _sampleIndex_
M
Matt Pharr 已提交
349
        LOG_VERBOSE("Starting to submit work for sample %d", sampleIndex);
M
Matt Pharr 已提交
350 351
        for (int y0 = pixelBounds.pMin.y; y0 < pixelBounds.pMax.y;
             y0 += scanlinesPerPass) {
352
            // Generate camera rays for current scanline range
353
            RayQueue *cameraRayQueue = CurrentRayQueue(0);
354 355
            Do(
                "Reset ray queue", PBRT_CPU_GPU_LAMBDA() {
356
                    PBRT_DBG("Starting scanlines at y0 = %d, sample %d / %d\n", y0,
357
                             sampleIndex, samplesPerPixel);
358 359
                    cameraRayQueue->Reset();
                });
M
Matt Pharr 已提交
360 361 362

            Transform cameraMotion;
            if (gui)
M
Matt Pharr 已提交
363 364
                cameraMotion =
                    renderFromCamera * gui->GetCameraTransform() * cameraFromRender;
M
Matt Pharr 已提交
365
            GenerateCameraRays(y0, cameraMotion, sampleIndex);
366
            Do(
367
                "Update camera ray stats",
368
                PBRT_CPU_GPU_LAMBDA() { stats->cameraRays += cameraRayQueue->Size(); });
M
Matt Pharr 已提交
369

M
Matt Pharr 已提交
370
            // Trace rays and estimate radiance up to maximum ray depth
371
            for (int wavefrontDepth = 0; true; ++wavefrontDepth) {
372
                // Reset queues before tracing rays
373
                RayQueue *nextQueue = NextRayQueue(wavefrontDepth);
374 375
                Do(
                    "Reset queues before tracing rays", PBRT_CPU_GPU_LAMBDA() {
M
Matt Pharr 已提交
376
                        nextQueue->Reset();
377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
                        // Reset queues before tracing next batch of rays
                        if (mediumSampleQueue)
                            mediumSampleQueue->Reset();
                        if (mediumScatterQueue)
                            mediumScatterQueue->Reset();

                        if (escapedRayQueue)
                            escapedRayQueue->Reset();
                        hitAreaLightQueue->Reset();

                        basicEvalMaterialQueue->Reset();
                        universalEvalMaterialQueue->Reset();

                        if (bssrdfEvalQueue)
                            bssrdfEvalQueue->Reset();
                        if (subsurfaceScatterQueue)
                            subsurfaceScatterQueue->Reset();
                    });

                // Follow active ray paths and accumulate radiance estimates
397
                GenerateRaySamples(wavefrontDepth, sampleIndex);
398

399
                // Find closest intersections along active rays
400
                aggregate->IntersectClosest(
401 402 403
                    maxQueueSize, CurrentRayQueue(wavefrontDepth), escapedRayQueue,
                    hitAreaLightQueue, basicEvalMaterialQueue, universalEvalMaterialQueue,
                    mediumSampleQueue, NextRayQueue(wavefrontDepth));
M
Matt Pharr 已提交
404

405
                if (wavefrontDepth > 0) {
406
                    // As above, with the indexing...
407
                    RayQueue *statsQueue = CurrentRayQueue(wavefrontDepth);
408 409
                    Do(
                        "Update indirect ray stats", PBRT_CPU_GPU_LAMBDA() {
410
                            stats->indirectRays[wavefrontDepth] += statsQueue->Size();
411
                        });
412
                }
413 414 415 416 417 418 419

                SampleMediumInteraction(wavefrontDepth);

                HandleEscapedRays();

                HandleEmissiveIntersection();

420
                if (wavefrontDepth == maxDepth)
M
Matt Pharr 已提交
421
                    break;
422

M
Matt Pharr 已提交
423
                EvaluateMaterialsAndBSDFs(wavefrontDepth, cameraMotion);
424

425
                // Do immediately so that we have space for shadow rays for subsurface..
426
                TraceShadowRays(wavefrontDepth);
427 428

                SampleSubsurface(wavefrontDepth);
M
Matt Pharr 已提交
429
            }
M
Matt Pharr 已提交
430

M
Matt Pharr 已提交
431
            UpdateFilm();
M
Matt Pharr 已提交
432 433 434 435 436 437 438 439 440 441 442 443
        }

        // Copy updated film pixels to buffer for the display server.
        if (Options->useGPU && !Options->displayServer.empty())
            UpdateDisplayRGBFromFilm(pixelBounds);

        if (gui) {
            RGB *rgb = gui->MapFramebuffer();
            UpdateFramebufferFromFilm(pixelBounds, gui->exposure, rgb);
            gui->UnmapFramebuffer();

            if (gui->printCameraTransform) {
M
Matt Pharr 已提交
444 445
                SquareMatrix<4> cfw =
                    (Inverse(gui->GetCameraTransform()) * cameraFromWorld).GetMatrix();
M
Matt Pharr 已提交
446 447 448 449 450 451 452 453 454 455 456 457 458
                Printf("Current camera transform:\nTransform [ ");
                for (int i = 0; i < 16; ++i)
                    Printf("%f ", cfw[i % 4][i / 4]);
                Printf("]\n");
                std::fflush(stdout);
                gui->printCameraTransform = false;
            }

            DisplayState state = gui->RefreshDisplay();
            if (state == DisplayState::EXIT)
                break;
            else if (state == DisplayState::RESET) {
                sampleIndex = firstSampleIndex - 1;
M
Matt Pharr 已提交
459 460 461 462 463 464
                ParallelFor(
                    "Reset pixels", resolution.x * resolution.y,
                    PBRT_CPU_GPU_LAMBDA(int i) {
                        int x = i % resolution.x, y = i / resolution.x;
                        film.ResetPixel(pixelBounds.pMin + Vector2i(x, y));
                    });
M
Matt Pharr 已提交
465
            }
M
Matt Pharr 已提交
466 467 468 469
        }

        progress.Update();
    }
M
Matt Pharr 已提交
470 471 472 473 474 475

    if (gui) {
        delete gui;
        gui = nullptr;
    }

M
Matt Pharr 已提交
476
    progress.Done();
477

478 479 480
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU)
        GPUWait();
481
#endif  // PBRT_BUILD_GPU_RENDERER
482
    Float seconds = timer.ElapsedSeconds();
483

484 485
    // Shut down display server thread, if active
    StopDisplayThread();
M
Matt Pharr 已提交
486

487
    return seconds;
M
Matt Pharr 已提交
488 489
}

490
void WavefrontPathIntegrator::HandleEscapedRays() {
491 492
    if (!escapedRayQueue)
        return;
493 494
    ForAllQueued(
        "Handle escaped rays", escapedRayQueue, maxQueueSize,
495
        PBRT_CPU_GPU_LAMBDA(const EscapedRayWorkItem w) {
496
            // Compute weighted radiance for escaped ray
497
            SampledSpectrum L(0.f);
498
            for (const auto &light : *infiniteLights) {
499 500
                if (SampledSpectrum Le = light.Le(Ray(w.rayo, w.rayd), w.lambda); Le) {
                    // Compute path radiance contribution from infinite light
501
                    PBRT_DBG("L %f %f %f %f beta %f %f %f %f Le %f %f %f %f\n", L[0], L[1],
502
                             L[2], L[3], w.beta[0], w.beta[1], w.beta[2], w.beta[3],
503
                             Le[0], Le[1], Le[2], Le[3]);
504 505 506 507 508
                    PBRT_DBG("depth %d specularBounce %d pdf uni %f %f %f %f "
                             "pdf nee %f %f %f %f\n",
                             w.depth, w.specularBounce,
                             w.inv_w_u[0], w.inv_w_u[1], w.inv_w_u[2], w.inv_w_u[3],
                             w.inv_w_l[0], w.inv_w_l[1], w.inv_w_l[2], w.inv_w_l[3]);
509

510
                    if (w.depth == 0 || w.specularBounce) {
511
                        L += w.beta * Le / w.inv_w_u.Average();
512 513 514
                    } else {
                        // Compute MIS-weighted radiance contribution from infinite light
                        LightSampleContext ctx = w.prevIntrCtx;
515
                        Float lightChoicePDF = lightSampler.PMF(ctx, light);
M
Matt Pharr 已提交
516 517
                        SampledSpectrum inv_w_l =
                            w.inv_w_l * lightChoicePDF * light.PDF_Li(ctx, w.rayd, true);
518
                        L += w.beta * Le / (w.inv_w_u + inv_w_l).Average();
519 520
                    }
                }
521
            }
522

523
            // Update pixel radiance if ray's radiance is nonzero
524
            if (L) {
525 526
                PBRT_DBG("Added L %f %f %f %f for escaped ray pixel index %d\n", L[0],
                         L[1], L[2], L[3], w.pixelIndex);
527

528 529 530
                L += pixelSampleState.L[w.pixelIndex];
                pixelSampleState.L[w.pixelIndex] = L;
            }
531
        });
M
Matt Pharr 已提交
532 533
}

534
void WavefrontPathIntegrator::HandleEmissiveIntersection() {
M
Matt Pharr 已提交
535 536
    ForAllQueued(
        "Handle emitters hit by indirect rays", hitAreaLightQueue, maxQueueSize,
537
        PBRT_CPU_GPU_LAMBDA(const HitAreaLightWorkItem w) {
538
            // Find emitted radiance from surface that ray hit
M
Matt Pharr 已提交
539
            SampledSpectrum Le = w.areaLight.L(w.p, w.n, w.uv, w.wo, w.lambda);
M
Matt Pharr 已提交
540 541
            if (!Le)
                return;
542
            PBRT_DBG("Got Le %f %f %f %f from hit area light at depth %d\n", Le[0], Le[1],
543
                     Le[2], Le[3], w.depth);
M
Matt Pharr 已提交
544

545
            // Compute area light's weighted radiance contribution to the path
546
            SampledSpectrum L(0.f);
547
            if (w.depth == 0 || w.specularBounce) {
548
                L = w.beta * Le / w.inv_w_u.Average();
M
Matt Pharr 已提交
549
            } else {
550
                // Compute MIS-weighted radiance contribution from area light
551 552
                Vector3f wi = -w.wo;
                LightSampleContext ctx = w.prevIntrCtx;
553
                Float lightChoicePDF = lightSampler.PMF(ctx, w.areaLight);
554
                Float lightPDF = lightChoicePDF * w.areaLight.PDF_Li(ctx, wi, true);
M
Matt Pharr 已提交
555

556 557 558
                SampledSpectrum inv_w_u = w.inv_w_u;
                SampledSpectrum inv_w_l = w.inv_w_l * lightPDF;
                L = w.beta * Le / (inv_w_u + inv_w_l).Average();
M
Matt Pharr 已提交
559 560
            }

561
            PBRT_DBG("Added L %f %f %f %f for pixel index %d\n", L[0], L[1], L[2], L[3],
562
                     w.pixelIndex);
563

564
            // Update _L_ in _PixelSampleState_ for area light's radiance
565 566
            L += pixelSampleState.L[w.pixelIndex];
            pixelSampleState.L[w.pixelIndex] = L;
M
Matt Pharr 已提交
567 568 569
        });
}

570
void WavefrontPathIntegrator::TraceShadowRays(int wavefrontDepth) {
571
    if (haveMedia)
572
        aggregate->IntersectShadowTr(maxQueueSize, shadowRayQueue, &pixelSampleState);
573
    else
574
        aggregate->IntersectShadow(maxQueueSize, shadowRayQueue, &pixelSampleState);
575
    // Reset shadow ray queue
576 577
    Do(
        "Reset shadowRayQueue", PBRT_CPU_GPU_LAMBDA() {
578
            stats->shadowRays[wavefrontDepth] += shadowRayQueue->Size();
579 580 581 582
            shadowRayQueue->Reset();
        });
}

583
WavefrontPathIntegrator::Stats::Stats(int maxDepth, Allocator alloc)
M
Matt Pharr 已提交
584 585
    : indirectRays(maxDepth + 1, alloc), shadowRays(maxDepth, alloc) {}

586
std::string WavefrontPathIntegrator::Stats::Print() const {
M
Matt Pharr 已提交
587 588 589 590 591 592 593 594 595 596 597 598
    std::string s;
    s += StringPrintf("    %-42s               %12" PRIu64 "\n", "Camera rays",
                      cameraRays);
    for (int i = 1; i < indirectRays.size(); ++i)
        s += StringPrintf("    %-42s               %12" PRIu64 "\n",
                          StringPrintf("Indirect rays, depth %-3d", i), indirectRays[i]);
    for (int i = 0; i < shadowRays.size(); ++i)
        s += StringPrintf("    %-42s               %12" PRIu64 "\n",
                          StringPrintf("Shadow rays, depth %-3d", i), shadowRays[i]);
    return s;
}

599 600 601 602 603 604
#ifdef PBRT_BUILD_GPU_RENDERER
void WavefrontPathIntegrator::PrefetchGPUAllocations() {
    int deviceIndex;
    CUDA_CHECK(cudaGetDevice(&deviceIndex));
    int hasConcurrentManagedAccess;
    CUDA_CHECK(cudaDeviceGetAttribute(&hasConcurrentManagedAccess,
M
Matt Pharr 已提交
605
                                      cudaDevAttrConcurrentManagedAccess, deviceIndex));
606 607 608 609 610 611 612 613 614 615 616 617

    // Copy all of the scene data structures over to GPU memory.  This
    // ensures that there isn't a big performance hitch for the first batch
    // of rays as that stuff is copied over on demand.
    if (hasConcurrentManagedAccess) {
        // Set things up so that we can still have read from the
        // WavefrontPathIntegrator struct on the CPU without hurting
        // performance. (This makes it possible to use the values of things
        // like WavefrontPathIntegrator::haveSubsurface to conditionally launch
        // kernels according to what's in the scene...)
        CUDA_CHECK(cudaMemAdvise(this, sizeof(*this), cudaMemAdviseSetReadMostly,
                                 /* ignored argument */ 0));
M
Matt Pharr 已提交
618 619
        CUDA_CHECK(cudaMemAdvise(this, sizeof(*this), cudaMemAdviseSetPreferredLocation,
                                 deviceIndex));
620 621 622 623 624 625 626 627 628 629 630 631 632 633

        // Copy all of the scene data structures over to GPU memory.  This
        // ensures that there isn't a big performance hitch for the first batch
        // of rays as that stuff is copied over on demand.
        CUDATrackedMemoryResource *mr =
            dynamic_cast<CUDATrackedMemoryResource *>(memoryResource);
        CHECK(mr);
        mr->PrefetchToGPU();
    } else {
        // TODO: on systems with basic unified memory, just launching a
        // kernel should cause everything to be copied over. Is an empty
        // kernel sufficient?
    }
}
M
Matt Pharr 已提交
634
#endif  // PBRT_BUILD_GPU_RENDERER
635

636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654
void WavefrontPathIntegrator::StartDisplayThread() {
    Bounds2i pixelBounds = film.PixelBounds();
    Vector2i resolution = pixelBounds.Diagonal();

#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU) {
        // Allocate staging memory on the GPU to store the current WIP
        // image.
        CUDA_CHECK(cudaMalloc(&displayRGB, resolution.x * resolution.y * sizeof(RGB)));
        CUDA_CHECK(cudaMemset(displayRGB, 0, resolution.x * resolution.y * sizeof(RGB)));

        // Host-side memory for the WIP Image.  We'll just let this leak so
        // that the lambda passed to DisplayDynamic below doesn't access
        // freed memory after Render() returns...
        displayRGBHost = new RGB[resolution.x * resolution.y];

        // Note that we can't just capture |this| for the member variables
        // below because with managed memory on Windows, the CPU and GPU
        // can't be accessing the same memory concurrently...
M
Matt Pharr 已提交
655 656 657
        copyThread = new std::thread([exitCopyThread = this->exitCopyThread,
                                      displayRGBHost = this->displayRGBHost,
                                      displayRGB = this->displayRGB, resolution]() {
658 659 660 661 662 663 664 665 666 667 668
            GPURegisterThread("DISPLAY_SERVER_COPY_THREAD");

            // Copy back to the CPU using a separate stream so that we can
            // periodically but asynchronously pick up the latest results
            // from the GPU.
            cudaStream_t memcpyStream;
            CUDA_CHECK(cudaStreamCreate(&memcpyStream));
            GPUNameStream(memcpyStream, "DISPLAY_SERVER_COPY_STREAM");

            // Copy back to the host from the GPU buffer, without any
            // synthronization.
M
Matt Pharr 已提交
669
            while (!*exitCopyThread) {
670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687
                CUDA_CHECK(cudaMemcpyAsync(displayRGBHost, displayRGB,
                                           resolution.x * resolution.y * sizeof(RGB),
                                           cudaMemcpyDeviceToHost, memcpyStream));
                std::this_thread::sleep_for(std::chrono::milliseconds(50));

                CUDA_CHECK(cudaStreamSynchronize(memcpyStream));
            }

            // Copy one more time to get the final image before exiting.
            CUDA_CHECK(cudaMemcpy(displayRGBHost, displayRGB,
                                  resolution.x * resolution.y * sizeof(RGB),
                                  cudaMemcpyDeviceToHost));
            CUDA_CHECK(cudaDeviceSynchronize());
        });

        // Now on the CPU side, give the display system a lambda that
        // copies values from |displayRGBHost| into its buffers used for
        // sending messages to the display program (i.e., tev).
M
Matt Pharr 已提交
688 689 690 691 692 693 694 695 696 697 698 699
        DisplayDynamic(
            film.GetFilename(), {resolution.x, resolution.y}, {"R", "G", "B"},
            [resolution, this](Bounds2i b, pstd::span<pstd::span<float>> displayValue) {
                int index = 0;
                for (Point2i p : b) {
                    RGB rgb = displayRGBHost[p.x + p.y * resolution.x];
                    displayValue[0][index] = rgb.r;
                    displayValue[1][index] = rgb.g;
                    displayValue[2][index] = rgb.b;
                    ++index;
                }
            });
700 701
    } else
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
702 703 704 705 706 707 708 709 710 711 712 713
        DisplayDynamic(
            film.GetFilename(), Point2i(pixelBounds.Diagonal()), {"R", "G", "B"},
            [pixelBounds, this](Bounds2i b, pstd::span<pstd::span<float>> displayValue) {
                int index = 0;
                for (Point2i p : b) {
                    RGB rgb =
                        film.GetPixelRGB(pixelBounds.pMin + p, 1.f /* splat scale */);
                    for (int c = 0; c < 3; ++c)
                        displayValue[c][index] = rgb[c];
                    ++index;
                }
            });
714 715
}

M
Matt Pharr 已提交
716
void WavefrontPathIntegrator::UpdateDisplayRGBFromFilm(Bounds2i pixelBounds) {
717
#ifdef PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
718 719
    Vector2i resolution = pixelBounds.Diagonal();
    GPUParallelFor(
M
Matt Pharr 已提交
720 721 722 723 724
        "Update Display RGB Buffer", resolution.x * resolution.y,
        PBRT_CPU_GPU_LAMBDA(int index) {
            Point2i p(index % resolution.x, index / resolution.x);
            displayRGB[index] = film.GetPixelRGB(p + pixelBounds.pMin);
        });
725 726 727 728 729 730 731 732 733
#endif  //  PBRT_BUILD_GPU_RENDERER
}

void WavefrontPathIntegrator::StopDisplayThread() {
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU) {
        // Wait until rendering is all done before we start to shut down the
        // display stuff..
        if (!Options->displayServer.empty()) {
M
Matt Pharr 已提交
734 735 736 737
            *exitCopyThread = true;
            copyThread->join();
            delete copyThread;
            copyThread = nullptr;
738 739 740 741 742 743 744 745 746
        }

        // Another synchronization to make sure no kernels are running on the
        // GPU so that we can safely access unified memory from the CPU.
        GPUWait();
    }
#endif  // PBRT_BUILD_GPU_RENDERER
}

M
Matt Pharr 已提交
747 748
void WavefrontPathIntegrator::UpdateFramebufferFromFilm(Bounds2i pixelBounds,
                                                        Float exposure, RGB *rgb) {
M
Matt Pharr 已提交
749
    Vector2i resolution = pixelBounds.Diagonal();
M
Matt Pharr 已提交
750 751 752 753 754 755
    ParallelFor(
        "Update framebuffer", resolution.x * resolution.y,
        PBRT_CPU_GPU_LAMBDA(int index) {
            Point2i p(index % resolution.x, index / resolution.x);
            rgb[index] = exposure * film.GetPixelRGB(p + film.PixelBounds().pMin);
        });
M
Matt Pharr 已提交
756 757
}

M
Matt Pharr 已提交
758
}  // namespace pbrt