integrator.cpp 28.5 KB
Newer Older
M
Matt Pharr 已提交
1 2 3 4
// pbrt is Copyright(c) 1998-2020 Matt Pharr, Wenzel Jakob, and Greg Humphreys.
// The pbrt source code is licensed under the Apache License, Version 2.0.
// SPDX: Apache-2.0

5
#include <pbrt/wavefront/integrator.h>
M
Matt Pharr 已提交
6 7 8 9 10

#include <pbrt/base/medium.h>
#include <pbrt/cameras.h>
#include <pbrt/film.h>
#include <pbrt/filters.h>
11 12 13
#ifdef PBRT_BUILD_GPU_RENDERER
#include <pbrt/gpu/aggregate.h>
#include <pbrt/gpu/memory.h>
14
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
15 16 17 18 19 20 21 22 23 24 25
#include <pbrt/lights.h>
#include <pbrt/lightsamplers.h>
#include <pbrt/util/color.h>
#include <pbrt/util/colorspace.h>
#include <pbrt/util/display.h>
#include <pbrt/util/file.h>
#include <pbrt/util/image.h>
#include <pbrt/util/log.h>
#include <pbrt/util/print.h>
#include <pbrt/util/progressreporter.h>
#include <pbrt/util/pstd.h>
26
#include <pbrt/util/spectrum.h>
M
Matt Pharr 已提交
27
#include <pbrt/util/stats.h>
M
Matt Pharr 已提交
28
#include <pbrt/util/string.h>
M
Matt Pharr 已提交
29
#include <pbrt/util/taggedptr.h>
30
#include <pbrt/wavefront/aggregate.h>
M
Matt Pharr 已提交
31

32
#include <atomic>
M
Matt Pharr 已提交
33 34 35 36
#include <cstring>
#include <iostream>
#include <map>

37
#ifdef PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
38 39
#include <cuda.h>
#include <cuda_runtime.h>
40
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
41 42 43

namespace pbrt {

44
STAT_MEMORY_COUNTER("Memory/Wavefront integrator pixel state", pathIntegratorBytes);
M
Matt Pharr 已提交
45

46
WavefrontPathIntegrator::WavefrontPathIntegrator(Allocator alloc, ParsedScene &scene) {
M
Matt Pharr 已提交
47
    // Allocate all of the data structures that represent the scene...
48
    std::map<std::string, Medium> media = scene.CreateMedia(alloc);
M
Matt Pharr 已提交
49 50 51 52 53 54 55 56 57 58

    haveMedia = false;
    // Check the shapes...
    for (const auto &shape : scene.shapes)
        if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
            haveMedia = true;
    for (const auto &shape : scene.animatedShapes)
        if (!shape.insideMedium.empty() || !shape.outsideMedium.empty())
            haveMedia = true;

59
    auto findMedium = [&](const std::string &s, const FileLoc *loc) -> Medium {
M
Matt Pharr 已提交
60 61 62 63 64 65 66 67 68 69
        if (s.empty())
            return nullptr;

        auto iter = media.find(s);
        if (iter == media.end())
            ErrorExit(loc, "%s: medium not defined", s);
        haveMedia = true;
        return iter->second;
    };

70 71
    filter = Filter::Create(scene.filter.name, scene.filter.parameters, &scene.filter.loc,
                            alloc);
M
Matt Pharr 已提交
72

73 74
    Float exposureTime = scene.camera.parameters.GetOneFloat("shutterclose", 1.f) -
                         scene.camera.parameters.GetOneFloat("shutteropen", 0.f);
75 76 77 78 79
    if (exposureTime <= 0)
        ErrorExit(&scene.camera.loc,
                  "The specified camera shutter times imply that the shutter "
                  "does not open.  A black image will result.");

80 81
    film = Film::Create(scene.film.name, scene.film.parameters, exposureTime, filter,
                        &scene.film.loc, alloc);
M
Matt Pharr 已提交
82 83
    initializeVisibleSurface = film.UsesVisibleSurface();

84 85
    sampler = Sampler::Create(scene.sampler.name, scene.sampler.parameters,
                              film.FullResolution(), &scene.sampler.loc, alloc);
M
Matt Pharr 已提交
86

87 88 89
    Medium cameraMedium = findMedium(scene.camera.medium, &scene.camera.loc);
    camera = Camera::Create(scene.camera.name, scene.camera.parameters, cameraMedium,
                            scene.camera.cameraTransform, film, &scene.camera.loc, alloc);
M
Matt Pharr 已提交
90

91 92
    // Textures
    LOG_VERBOSE("Starting to create textures");
93
    NamedTextures textures = scene.CreateTextures(alloc, Options->useGPU);
94 95
    LOG_VERBOSE("Done creating textures");

96
    pstd::vector<Light> allLights;
M
Matt Pharr 已提交
97

98
    envLights = alloc.new_object<pstd::vector<Light>>(alloc);
M
Matt Pharr 已提交
99
    for (const auto &light : scene.lights) {
100
        Medium outsideMedium = findMedium(light.medium, &light.loc);
M
Matt Pharr 已提交
101 102 103 104
        if (light.renderFromObject.IsAnimated())
            Warning(&light.loc,
                    "Animated lights aren't supported. Using the start transform.");

105
        Light l = Light::Create(
M
Matt Pharr 已提交
106 107 108 109
            light.name, light.parameters, light.renderFromObject.startTransform,
            scene.camera.cameraTransform, outsideMedium, &light.loc, alloc);

        if (l.Is<UniformInfiniteLight>() || l.Is<ImageInfiniteLight>() ||
110
            l.Is<PortalImageInfiniteLight>())
111
            envLights->push_back(l);
M
Matt Pharr 已提交
112 113 114 115 116

        allLights.push_back(l);
    }

    // Area lights...
117
    std::map<int, pstd::vector<Light> *> shapeIndexToAreaLights;
M
Matt Pharr 已提交
118 119 120 121
    for (size_t i = 0; i < scene.shapes.size(); ++i) {
        const auto &shape = scene.shapes[i];
        if (shape.lightIndex == -1)
            continue;
122 123

        auto isInterface = [&]() {
124
            std::string materialName;
125
            if (shape.materialIndex != -1)
126
                materialName = scene.materials[shape.materialIndex].name;
127 128 129 130
            else {
                for (auto iter = scene.namedMaterials.begin();
                     iter != scene.namedMaterials.end(); ++iter)
                    if (iter->first == shape.materialName) {
131
                        materialName = iter->second.parameters.GetOneString("type", "");
132 133 134
                        break;
                    }
            }
135 136
            return (materialName == "interface" || materialName == "none" ||
                    materialName.empty());
137 138 139 140
        };
        if (isInterface())
            continue;

M
Matt Pharr 已提交
141 142 143 144
        CHECK_LT(shape.lightIndex, scene.areaLights.size());
        const auto &areaLightEntity = scene.areaLights[shape.lightIndex];
        AnimatedTransform renderFromLight(*shape.renderFromObject);

145
        pstd::vector<Shape> shapes =
146 147
            Shape::Create(shape.name, shape.renderFromObject, shape.objectFromRender,
                          shape.reverseOrientation, shape.parameters, &shape.loc, alloc);
M
Matt Pharr 已提交
148

149
        if (shapes.empty())
M
Matt Pharr 已提交
150 151
            continue;

152
        Medium outsideMedium = findMedium(shape.outsideMedium, &shape.loc);
M
Matt Pharr 已提交
153

154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
        FloatTexture alphaTex;
        std::string alphaTexName = shape.parameters.GetTexture("alpha");
        if (!alphaTexName.empty()) {
            if (textures.floatTextures.find(alphaTexName) !=
                textures.floatTextures.end()) {
                alphaTex = textures.floatTextures[alphaTexName];
                if (!BasicTextureEvaluator().CanEvaluate({alphaTex}, {}))
                    // A warning will be issued elsewhere...
                    alphaTex = nullptr;
            } else
                ErrorExit(&shape.loc,
                          "%s: couldn't find float texture for \"alpha\" parameter.",
                          alphaTexName);
        } else if (Float alpha = shape.parameters.GetOneFloat("alpha", 1.f); alpha < 1.f)
            alphaTex = alloc.new_object<FloatConstantTexture>(alpha);

170 171
        pstd::vector<Light> *lightsForShape =
            alloc.new_object<pstd::vector<Light>>(alloc);
172
        for (Shape sh : shapes) {
M
Matt Pharr 已提交
173
            if (renderFromLight.IsAnimated())
174
                ErrorExit(&shape.loc, "Animated lights are not supported.");
M
Matt Pharr 已提交
175 176
            DiffuseAreaLight *area = DiffuseAreaLight::Create(
                renderFromLight.startTransform, outsideMedium, areaLightEntity.parameters,
177 178
                areaLightEntity.parameters.ColorSpace(), &areaLightEntity.loc, alloc, sh,
                alphaTex);
M
Matt Pharr 已提交
179 180 181 182 183 184 185 186 187
            allLights.push_back(area);
            lightsForShape->push_back(area);
        }
        shapeIndexToAreaLights[i] = lightsForShape;
    }

    haveBasicEvalMaterial.fill(false);
    haveUniversalEvalMaterial.fill(false);
    haveSubsurface = false;
188 189
    if (Options->useGPU) {
#ifdef PBRT_BUILD_GPU_RENDERER
190 191
        aggregate = new OptiXAggregate(scene, alloc, textures, shapeIndexToAreaLights,
                                       media, &haveBasicEvalMaterial,
192 193 194 195 196
                                       &haveUniversalEvalMaterial, &haveSubsurface);
#else
        LOG_FATAL("Options->useGPU was set without PBRT_BUILD_GPU_RENDERER enabled");
#endif
    } else
197 198
        aggregate = new CPUAggregate(scene, alloc, textures, shapeIndexToAreaLights,
                                     media, &haveBasicEvalMaterial,
199
                                     &haveUniversalEvalMaterial, &haveSubsurface);
M
Matt Pharr 已提交
200 201

    // Preprocess the light sources
202
    for (Light light : allLights)
203
        light.Preprocess(aggregate->Bounds());
M
Matt Pharr 已提交
204 205 206 207 208 209 210 211 212 213 214

    bool haveLights = !allLights.empty();
    for (const auto &m : media)
        haveLights |= m.second.IsEmissive();
    if (!haveLights)
        ErrorExit("No light sources specified");

    std::string lightSamplerName =
        scene.integrator.parameters.GetOneString("lightsampler", "bvh");
    if (allLights.size() == 1)
        lightSamplerName = "uniform";
215
    lightSampler = LightSampler::Create(lightSamplerName, allLights, alloc);
M
Matt Pharr 已提交
216

217
    if (scene.integrator.name != "path" && scene.integrator.name != "volpath")
218
        Warning(&scene.integrator.loc,
219
                "Ignoring specified integrator \"%s\": the wavefront integrator "
220 221
                "always uses a \"volpath\" integrator.",
                scene.integrator.name);
222

M
Matt Pharr 已提交
223 224 225 226
    // Integrator parameters
    regularize = scene.integrator.parameters.GetOneBool("regularize", false);
    maxDepth = scene.integrator.parameters.GetOneInt("maxdepth", 5);

227 228
    // Warn about unsupported stuff...
    if (Options->forceDiffuse)
229
        Warning("The wavefront integrator does not support --force-diffuse.");
230
    if (Options->writePartialImages)
231
        Warning("The wavefront integrator does not support --write-partial-images.");
232
    if (Options->recordPixelStatistics)
233
        Warning("The wavefront integrator does not support --pixelstats.");
234
    if (!Options->mseReferenceImage.empty())
235
        Warning("The wavefront integrator does not support --mse-reference-image.");
236
    if (!Options->mseReferenceOutput.empty())
237
        Warning("The wavefront integrator does not support --mse-reference-out.");
238

239 240
        ///////////////////////////////////////////////////////////////////////////
        // Allocate storage for all of the queues/buffers...
M
Matt Pharr 已提交
241

242
#ifdef PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
243 244 245 246
    CUDATrackedMemoryResource *mr =
        dynamic_cast<CUDATrackedMemoryResource *>(gpuMemoryAllocator.resource());
    CHECK(mr != nullptr);
    size_t startSize = mr->BytesAllocated();
247
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
248

249
    // Compute number of scanlines to render per pass
M
Matt Pharr 已提交
250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
    Vector2i resolution = film.PixelBounds().Diagonal();
    // TODO: make this configurable. Base it on the amount of GPU memory?
    int maxSamples = 1024 * 1024;
    scanlinesPerPass = std::max(1, maxSamples / resolution.x);
    int nPasses = (resolution.y + scanlinesPerPass - 1) / scanlinesPerPass;
    scanlinesPerPass = (resolution.y + nPasses - 1) / nPasses;
    maxQueueSize = resolution.x * scanlinesPerPass;
    LOG_VERBOSE("Will render in %d passes %d scanlines per pass\n", nPasses,
                scanlinesPerPass);

    pixelSampleState = SOA<PixelSampleState>(maxQueueSize, alloc);

    rayQueues[0] = alloc.new_object<RayQueue>(maxQueueSize, alloc);
    rayQueues[1] = alloc.new_object<RayQueue>(maxQueueSize, alloc);

    shadowRayQueue = alloc.new_object<ShadowRayQueue>(maxQueueSize, alloc);

    if (haveSubsurface) {
        bssrdfEvalQueue =
            alloc.new_object<GetBSSRDFAndProbeRayQueue>(maxQueueSize, alloc);
        subsurfaceScatterQueue =
            alloc.new_object<SubsurfaceScatterQueue>(maxQueueSize, alloc);
    }

274
    if (envLights->size())
M
Matt Pharr 已提交
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
        escapedRayQueue = alloc.new_object<EscapedRayQueue>(maxQueueSize, alloc);
    hitAreaLightQueue = alloc.new_object<HitAreaLightQueue>(maxQueueSize, alloc);

    basicEvalMaterialQueue = alloc.new_object<MaterialEvalQueue>(
        maxQueueSize, alloc,
        pstd::MakeConstSpan(&haveBasicEvalMaterial[1], haveBasicEvalMaterial.size() - 1));
    universalEvalMaterialQueue = alloc.new_object<MaterialEvalQueue>(
        maxQueueSize, alloc,
        pstd::MakeConstSpan(&haveUniversalEvalMaterial[1],
                            haveUniversalEvalMaterial.size() - 1));

    if (haveMedia) {
        mediumSampleQueue = alloc.new_object<MediumSampleQueue>(maxQueueSize, alloc);
        mediumScatterQueue = alloc.new_object<MediumScatterQueue>(maxQueueSize, alloc);
    }

    stats = alloc.new_object<Stats>(maxDepth, alloc);

293
#ifdef PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
294 295
    size_t endSize = mr->BytesAllocated();
    pathIntegratorBytes += endSize - startSize;
296
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
297 298
}

299 300
// WavefrontPathIntegrator Method Definitions
Float WavefrontPathIntegrator::Render() {
301
    // Prefetch allocations to GPU memory
302 303 304 305 306 307
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU) {
        int deviceIndex;
        CUDA_CHECK(cudaGetDevice(&deviceIndex));
        int hasConcurrentManagedAccess;
        CUDA_CHECK(cudaDeviceGetAttribute(&hasConcurrentManagedAccess,
308 309
                                          cudaDevAttrConcurrentManagedAccess,
                                          deviceIndex));
310 311 312 313 314 315 316 317 318 319

        // Copy all of the scene data structures over to GPU memory.  This
        // ensures that there isn't a big performance hitch for the first batch
        // of rays as that stuff is copied over on demand.
        if (hasConcurrentManagedAccess) {
            // Set things up so that we can still have read from the
            // WavefrontPathIntegrator struct on the CPU without hurting
            // performance. (This makes it possible to use the values of things
            // like WavefrontPathIntegrator::haveSubsurface to conditionally launch
            // kernels according to what's in the scene...)
320 321
            CUDA_CHECK(cudaMemAdvise(this, sizeof(*this), cudaMemAdviseSetReadMostly,
                                     /* ignored argument */ 0));
322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337
            CUDA_CHECK(cudaMemAdvise(this, sizeof(*this),
                                     cudaMemAdviseSetPreferredLocation, deviceIndex));

            // Copy all of the scene data structures over to GPU memory.  This
            // ensures that there isn't a big performance hitch for the first batch
            // of rays as that stuff is copied over on demand.
            CUDATrackedMemoryResource *mr =
                dynamic_cast<CUDATrackedMemoryResource *>(gpuMemoryAllocator.resource());
            CHECK(mr != nullptr);
            mr->PrefetchToGPU();
        } else {
            // TODO: on systems with basic unified memory, just launching a
            // kernel should cause everything to be copied over. Is an empty
            // kernel sufficient?
        }
    }
338
#endif  // PBRT_BUILD_GPU_RENDERER
339 340

    Timer timer;
M
Matt Pharr 已提交
341
    Vector2i resolution = film.PixelBounds().Diagonal();
342
    Bounds2i pixelBounds = film.PixelBounds();
M
Matt Pharr 已提交
343
    int spp = sampler.SamplesPerPixel();
344
    // Launch thread to copy image for display server, if enabled
M
Matt Pharr 已提交
345 346 347 348 349
    RGB *displayRGB = nullptr, *displayRGBHost = nullptr;
    std::atomic<bool> exitCopyThread{false};
    std::thread copyThread;

    if (!Options->displayServer.empty()) {
350 351 352 353
#ifdef PBRT_BUILD_GPU_RENDERER
        if (Options->useGPU) {
            // Allocate staging memory on the GPU to store the current WIP
            // image.
354 355 356 357
            CUDA_CHECK(
                cudaMalloc(&displayRGB, resolution.x * resolution.y * sizeof(RGB)));
            CUDA_CHECK(
                cudaMemset(displayRGB, 0, resolution.x * resolution.y * sizeof(RGB)));
358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383

            // Host-side memory for the WIP Image.  We'll just let this leak so
            // that the lambda passed to DisplayDynamic below doesn't access
            // freed memory after Render() returns...
            displayRGBHost = new RGB[resolution.x * resolution.y];

            copyThread = std::thread([&]() {
                GPURegisterThread("DISPLAY_SERVER_COPY_THREAD");

                // Copy back to the CPU using a separate stream so that we can
                // periodically but asynchronously pick up the latest results
                // from the GPU.
                cudaStream_t memcpyStream;
                CUDA_CHECK(cudaStreamCreate(&memcpyStream));
                GPUNameStream(memcpyStream, "DISPLAY_SERVER_COPY_STREAM");

                // Copy back to the host from the GPU buffer, without any
                // synthronization.
                while (!exitCopyThread) {
                    CUDA_CHECK(cudaMemcpyAsync(displayRGBHost, displayRGB,
                                               resolution.x * resolution.y * sizeof(RGB),
                                               cudaMemcpyDeviceToHost, memcpyStream));
                    std::this_thread::sleep_for(std::chrono::milliseconds(50));

                    CUDA_CHECK(cudaStreamSynchronize(memcpyStream));
                }
M
Matt Pharr 已提交
384

385 386 387 388 389 390 391 392 393 394
                // Copy one more time to get the final image before exiting.
                CUDA_CHECK(cudaMemcpy(displayRGBHost, displayRGB,
                                      resolution.x * resolution.y * sizeof(RGB),
                                      cudaMemcpyDeviceToHost));
                CUDA_CHECK(cudaDeviceSynchronize());
            });

            // Now on the CPU side, give the display system a lambda that
            // copies values from |displayRGBHost| into its buffers used for
            // sending messages to the display program (i.e., tev).
395 396
            DisplayDynamic(film.GetFilename(), {resolution.x, resolution.y},
                           {"R", "G", "B"},
397
                           [resolution, displayRGBHost](
398
                               Bounds2i b, pstd::span<pstd::span<Float>> displayValue) {
399 400 401 402 403 404 405 406 407 408
                               int index = 0;
                               for (Point2i p : b) {
                                   RGB rgb = displayRGBHost[p.x + p.y * resolution.x];
                                   displayValue[0][index] = rgb.r;
                                   displayValue[1][index] = rgb.g;
                                   displayValue[2][index] = rgb.b;
                                   ++index;
                               }
                           });
        } else
409 410 411 412 413 414 415 416 417 418 419 420 421 422
#endif  // PBRT_BUILD_GPU_RENDERER
            DisplayDynamic(
                film.GetFilename(), Point2i(pixelBounds.Diagonal()), {"R", "G", "B"},
                [pixelBounds, this](Bounds2i b,
                                    pstd::span<pstd::span<Float>> displayValue) {
                    int index = 0;
                    for (Point2i p : b) {
                        RGB rgb =
                            film.GetPixelRGB(pixelBounds.pMin + p, 1.f /* splat scale */);
                        for (int c = 0; c < 3; ++c)
                            displayValue[c][index] = rgb[c];
                        ++index;
                    }
                });
M
Matt Pharr 已提交
423 424
    }

M
Matt Pharr 已提交
425
    int firstSampleIndex = 0, lastSampleIndex = spp;
426
    // Update sample index range based on debug start, if provided
M
Matt Pharr 已提交
427
    if (!Options->debugStart.empty()) {
428 429
        std::vector<int> values = SplitStringToInts(Options->debugStart, ',');
        if (values.size() != 2)
430 431
            ErrorExit("Expected two integer values for --debugstart.");

432 433
        firstSampleIndex = values[0];
        lastSampleIndex = firstSampleIndex + values[1];
M
Matt Pharr 已提交
434 435
    }

436
    ProgressReporter progress(lastSampleIndex - firstSampleIndex, "Rendering",
437
                              Options->quiet, Options->useGPU);
438 439 440
    for (int sampleIndex = firstSampleIndex; sampleIndex < lastSampleIndex;
         ++sampleIndex) {
        // Render image for sample _sampleIndex_
M
Matt Pharr 已提交
441
        LOG_VERBOSE("Starting to submit work for sample %d", sampleIndex);
M
Matt Pharr 已提交
442 443
        for (int y0 = pixelBounds.pMin.y; y0 < pixelBounds.pMax.y;
             y0 += scanlinesPerPass) {
444
            // Generate camera rays for current scanline range
445
            RayQueue *cameraRayQueue = CurrentRayQueue(0);
446 447
            Do(
                "Reset ray queue", PBRT_CPU_GPU_LAMBDA() {
448 449 450 451
                    PBRT_DBG("Starting scanlines at y0 = %d, sample %d / %d\n", y0,
                             sampleIndex, spp);
                    cameraRayQueue->Reset();
                });
M
Matt Pharr 已提交
452
            GenerateCameraRays(y0, sampleIndex);
453
            Do(
454
                "Update camera ray stats",
455
                PBRT_CPU_GPU_LAMBDA() { stats->cameraRays += cameraRayQueue->Size(); });
M
Matt Pharr 已提交
456

M
Matt Pharr 已提交
457
            // Trace rays and estimate radiance up to maximum ray depth
M
Matt Pharr 已提交
458
            for (int depth = 0; true; ++depth) {
459
                // Reset queues before tracing rays
M
Matt Pharr 已提交
460
                RayQueue *nextQueue = NextRayQueue(depth);
461 462
                Do(
                    "Reset queues before tracing rays", PBRT_CPU_GPU_LAMBDA() {
M
Matt Pharr 已提交
463
                        nextQueue->Reset();
464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
                        // Reset queues before tracing next batch of rays
                        if (mediumSampleQueue)
                            mediumSampleQueue->Reset();
                        if (mediumScatterQueue)
                            mediumScatterQueue->Reset();

                        if (escapedRayQueue)
                            escapedRayQueue->Reset();
                        hitAreaLightQueue->Reset();

                        basicEvalMaterialQueue->Reset();
                        universalEvalMaterialQueue->Reset();

                        if (bssrdfEvalQueue)
                            bssrdfEvalQueue->Reset();
                        if (subsurfaceScatterQueue)
                            subsurfaceScatterQueue->Reset();
                    });

                // Follow active ray paths and accumulate radiance estimates
                GenerateRaySamples(depth, sampleIndex);
                // Find closest intersections along active rays
M
Matt Pharr 已提交
486 487 488 489
                aggregate->IntersectClosest(maxQueueSize, escapedRayQueue,
                                            hitAreaLightQueue, basicEvalMaterialQueue,
                                            universalEvalMaterialQueue, mediumSampleQueue,
                                            CurrentRayQueue(depth), NextRayQueue(depth));
M
Matt Pharr 已提交
490

491 492 493
                if (depth > 0) {
                    // As above, with the indexing...
                    RayQueue *statsQueue = CurrentRayQueue(depth);
494 495
                    Do(
                        "Update indirect ray stats", PBRT_CPU_GPU_LAMBDA() {
496 497
                            stats->indirectRays[depth] += statsQueue->Size();
                        });
498
                }
M
Matt Pharr 已提交
499 500 501 502 503 504 505 506
                if (haveMedia)
                    SampleMediumInteraction(depth);
                if (escapedRayQueue)
                    HandleEscapedRays(depth);
                HandleRayFoundEmission(depth);
                if (depth == maxDepth)
                    break;
                EvaluateMaterialsAndBSDFs(depth);
507
                // Do immediately so that we have space for shadow rays for subsurface..
M
Matt Pharr 已提交
508 509 510 511
                TraceShadowRays(depth);
                if (haveSubsurface)
                    SampleSubsurface(depth);
            }
M
Matt Pharr 已提交
512

M
Matt Pharr 已提交
513
            UpdateFilm();
514
            // Copy updated film pixels to buffer for display
515
#ifdef PBRT_BUILD_GPU_RENDERER
516
            if (Options->useGPU && !Options->displayServer.empty())
517 518
                GPUParallelFor(
                    "Update Display RGB Buffer", maxQueueSize,
519
                    PBRT_CPU_GPU_LAMBDA(int pixelIndex) {
520 521 522 523 524 525 526
                        Point2i pPixel = pixelSampleState.pPixel[pixelIndex];
                        if (!InsideExclusive(pPixel, film.PixelBounds()))
                            return;

                        Point2i p(pPixel - film.PixelBounds().pMin);
                        displayRGB[p.x + p.y * resolution.x] = film.GetPixelRGB(pPixel);
                    });
527
#endif  //  PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
528 529 530 531 532
        }

        progress.Update();
    }
    progress.Done();
533 534 535
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU)
        GPUWait();
536
#endif  // PBRT_BUILD_GPU_RENDERER
537
    Float seconds = timer.ElapsedSeconds();
538
    // Shut down display server thread, if active
539 540 541 542 543 544 545 546
#ifdef PBRT_BUILD_GPU_RENDERER
    if (Options->useGPU) {
        // Wait until rendering is all done before we start to shut down the
        // display stuff..
        if (!Options->displayServer.empty()) {
            exitCopyThread = true;
            copyThread.join();
        }
547

548 549 550
        // Another synchronization to make sure no kernels are running on the
        // GPU so that we can safely access unified memory from the CPU.
        GPUWait();
M
Matt Pharr 已提交
551
    }
552
#endif  // PBRT_BUILD_GPU_RENDERER
M
Matt Pharr 已提交
553

554
    return seconds;
M
Matt Pharr 已提交
555 556
}

557
void WavefrontPathIntegrator::HandleEscapedRays(int depth) {
558 559
    ForAllQueued(
        "Handle escaped rays", escapedRayQueue, maxQueueSize,
560
        PBRT_CPU_GPU_LAMBDA(const EscapedRayWorkItem w) {
561 562
            // Update pixel radiance for escaped ray
            SampledSpectrum L(0.f);
563
            for (const auto &light : *envLights) {
564 565
                if (SampledSpectrum Le = light.Le(Ray(w.rayo, w.rayd), w.lambda); Le) {
                    // Compute path radiance contribution from infinite light
566 567 568
                    PBRT_DBG("L %f %f %f %f T_hat %f %f %f %f Le %f %f %f %f", L[0], L[1],
                             L[2], L[3], w.T_hat[0], w.T_hat[1], w.T_hat[2], w.T_hat[3],
                             Le[0], Le[1], Le[2], Le[3]);
569
                    PBRT_DBG("pdf uni %f %f %f %f pdf nee %f %f %f %f", w.uniPathPDF[0],
570 571 572
                             w.uniPathPDF[1], w.uniPathPDF[2], w.uniPathPDF[3],
                             w.lightPathPDF[0], w.lightPathPDF[1], w.lightPathPDF[2],
                             w.lightPathPDF[3]);
573 574 575 576 577 578 579 580 581 582 583 584 585

                    if (depth == 0 || w.specularBounce) {
                        L += w.T_hat * Le / w.uniPathPDF.Average();
                    } else {
                        // Compute MIS-weighted radiance contribution from infinite light
                        LightSampleContext ctx = w.prevIntrCtx;
                        Float lightChoicePDF = lightSampler.PDF(ctx, light);
                        SampledSpectrum lightPathPDF =
                            w.lightPathPDF * lightChoicePDF *
                            light.PDF_Li(ctx, w.rayd, LightSamplingMode::WithMIS);
                        L += w.T_hat * Le / (w.uniPathPDF + lightPathPDF).Average();
                    }
                }
586
            }
587
            if (L) {
588 589
                PBRT_DBG("Added L %f %f %f %f for escaped ray pixel index %d\n", L[0],
                         L[1], L[2], L[3], w.pixelIndex);
590

591 592 593
                L += pixelSampleState.L[w.pixelIndex];
                pixelSampleState.L[w.pixelIndex] = L;
            }
594
        });
M
Matt Pharr 已提交
595 596
}

597
void WavefrontPathIntegrator::HandleRayFoundEmission(int depth) {
M
Matt Pharr 已提交
598 599
    ForAllQueued(
        "Handle emitters hit by indirect rays", hitAreaLightQueue, maxQueueSize,
600
        PBRT_CPU_GPU_LAMBDA(const HitAreaLightWorkItem w) {
601
            // Find emitted radiance from surface that ray hit
M
Matt Pharr 已提交
602
            SampledSpectrum Le = w.areaLight.L(w.p, w.n, w.uv, w.wo, w.lambda);
M
Matt Pharr 已提交
603 604
            if (!Le)
                return;
605
            PBRT_DBG("Got Le %f %f %f %f from hit area light at depth %d\n", Le[0], Le[1],
606
                     Le[2], Le[3], depth);
M
Matt Pharr 已提交
607

608
            // Compute area light's weighted radiance contribution to the path
609
            SampledSpectrum L(0.f);
610 611
            if (depth == 0 || w.isSpecularBounce) {
                L = w.T_hat * Le / w.uniPathPDF.Average();
M
Matt Pharr 已提交
612
            } else {
613
                // Compute MIS-weighted radiance contribution from area light
614 615
                Vector3f wi = -w.wo;
                LightSampleContext ctx = w.prevIntrCtx;
616
                Float lightChoicePDF = lightSampler.PDF(ctx, w.areaLight);
M
Matt Pharr 已提交
617
                Float lightPDF = lightChoicePDF *
618
                                 w.areaLight.PDF_Li(ctx, wi, LightSamplingMode::WithMIS);
M
Matt Pharr 已提交
619

620 621 622
                SampledSpectrum uniPathPDF = w.uniPathPDF;
                SampledSpectrum lightPathPDF = w.lightPathPDF * lightPDF;
                L = w.T_hat * Le / (uniPathPDF + lightPathPDF).Average();
M
Matt Pharr 已提交
623 624
            }

625
            PBRT_DBG("Added L %f %f %f %f for pixel index %d\n", L[0], L[1], L[2], L[3],
626
                     w.pixelIndex);
627

628
            // Update _L_ in _PixelSampleState_ for area light's radiance
629 630
            L += pixelSampleState.L[w.pixelIndex];
            pixelSampleState.L[w.pixelIndex] = L;
M
Matt Pharr 已提交
631 632 633
        });
}

634
void WavefrontPathIntegrator::TraceShadowRays(int depth) {
635
    if (haveMedia)
636
        aggregate->IntersectShadowTr(maxQueueSize, shadowRayQueue, &pixelSampleState);
637
    else
638
        aggregate->IntersectShadow(maxQueueSize, shadowRayQueue, &pixelSampleState);
639
    // Reset shadow ray queue
640 641
    Do(
        "Reset shadowRayQueue", PBRT_CPU_GPU_LAMBDA() {
642 643 644 645 646
            stats->shadowRays[depth] += shadowRayQueue->Size();
            shadowRayQueue->Reset();
        });
}

647
WavefrontPathIntegrator::Stats::Stats(int maxDepth, Allocator alloc)
M
Matt Pharr 已提交
648 649
    : indirectRays(maxDepth + 1, alloc), shadowRays(maxDepth, alloc) {}

650
std::string WavefrontPathIntegrator::Stats::Print() const {
M
Matt Pharr 已提交
651 652 653 654 655 656 657 658 659 660 661 662 663
    std::string s;
    s += StringPrintf("    %-42s               %12" PRIu64 "\n", "Camera rays",
                      cameraRays);
    for (int i = 1; i < indirectRays.size(); ++i)
        s += StringPrintf("    %-42s               %12" PRIu64 "\n",
                          StringPrintf("Indirect rays, depth %-3d", i), indirectRays[i]);
    for (int i = 0; i < shadowRays.size(); ++i)
        s += StringPrintf("    %-42s               %12" PRIu64 "\n",
                          StringPrintf("Shadow rays, depth %-3d", i), shadowRays[i]);
    return s;
}

}  // namespace pbrt