cmd/imgtool: refactor OptiX denoiser use into gpu/denoiser.*

a4f48c69 · Matt Pharr · cacd50aa · a4f48c69 · a4f48c69 · a4f48c69
6 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -672,11 +672,13 @@ SET (PBRT_UTIL_SOURCE_HEADERS
 if (PBRT_CUDA_ENABLED)
  set (PBRT_GPU_SOURCE
    src/pbrt/gpu/aggregate.cpp
+    src/pbrt/gpu/denoiser.cpp
    src/pbrt/gpu/memory.cpp
    src/pbrt/gpu/util.cpp
  )
  set (PBRT_GPU_SOURCE_HEADERS
    src/pbrt/gpu/aggregate.h
+    src/pbrt/gpu/denoiser.h
    src/pbrt/gpu/memory.h
    src/pbrt/gpu/optix.h
    src/pbrt/gpu/util.h

--- a/src/pbrt/cmd/imgtool.cpp
+++ b/src/pbrt/cmd/imgtool.cpp
@@ -6,6 +6,10 @@

 #include <pbrt/filters.h>
 #include <pbrt/options.h>
+#ifdef PBRT_BUILD_GPU_RENDERER
+#include <pbrt/gpu/denoiser.h>
+#include <pbrt/gpu/util.h>
+#endif // PBRT_BUILD_GPU_RENDERER
 #include <pbrt/util/args.h>
 #include <pbrt/util/check.h>
 #include <pbrt/util/color.h>
@@ -39,31 +43,6 @@ extern "C" {

 #include <flip.h>

-#ifdef PBRT_BUILD_GPU_RENDERER
-
-#include <pbrt/gpu/memory.h>
-#include <pbrt/gpu/util.h>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-
-#include <optix.h>
-#include <optix_stubs.h>
-
-#define OPTIX_CHECK(EXPR)                                                           \
-    do {                                                                            \
-        OptixResult res = EXPR;                                                     \
-        if (res != OPTIX_SUCCESS)                                                   \
-            LOG_FATAL("OptiX call " #EXPR " failed with code %d: \"%s\"", int(res), \
-                      optixGetErrorString(res));                                    \
-    } while (false) /* eat semicolon */
-#endif
-
-// Stop that, Windows.
-#ifdef RGB
-#undef RGB
-#endif
-
 using namespace pbrt;

 struct CommandUsage {
@@ -2504,19 +2483,11 @@ int denoise_optix(std::vector<std::string> args) {
    if (outFilename.empty())
        usage("denoise-optix", "output image filename must be provided.");

-    CUDA_CHECK(cudaFree(nullptr));
-
-    CUcontext cudaContext;
-    CU_CHECK(cuCtxGetCurrent(&cudaContext));
-    CHECK(cudaContext != nullptr);
-
-    OPTIX_CHECK(optixInit());
-    OptixDeviceContext optixContext;
-    OPTIX_CHECK(optixDeviceContextCreate(cudaContext, 0, &optixContext));
-
    ImageAndMetadata im = Image::Read(inFilename);
    Image &image = im.image;

+    CUDA_CHECK(cudaFree(nullptr));
+
    int nLayers = 3;
    ImageChannelDesc desc[3] = {
        image.GetChannelDesc({"R", "G", "B"}),
@@ -2537,114 +2508,49 @@ int denoise_optix(std::vector<std::string> args) {
        nLayers = 1;
    }

-    OptixDenoiserOptions options = {};
-#if (OPTIX_VERSION >= 70300)
-    if (nLayers == 3)
-        options.guideAlbedo = options.guideNormal = 1;
-
-    OptixDenoiser denoiserHandle;
-    OPTIX_CHECK(optixDenoiserCreate(optixContext, OPTIX_DENOISER_MODEL_KIND_HDR, &options,
-                                    &denoiserHandle));
-#else
-    options.inputKind = (nLayers == 3) ? OPTIX_DENOISER_INPUT_RGB_ALBEDO_NORMAL
-                                       : OPTIX_DENOISER_INPUT_RGB;
-
-    OptixDenoiser denoiserHandle;
-    OPTIX_CHECK(optixDenoiserCreate(optixContext, &options, &denoiserHandle));
-
-    OPTIX_CHECK(
-        optixDenoiserSetModel(denoiserHandle, OPTIX_DENOISER_MODEL_KIND_HDR, nullptr, 0));
-#endif
+    Denoiser denoiser((Vector2i)image.Resolution(), nLayers == 3);

-    OptixDenoiserSizes memorySizes;
-    OPTIX_CHECK(optixDenoiserComputeMemoryResources(denoiserHandle, image.Resolution().x,
-                                                    image.Resolution().y, &memorySizes));
-
-    void *denoiserState;
-    CUDA_CHECK(cudaMalloc(&denoiserState, memorySizes.stateSizeInBytes));
-    void *scratchBuffer;
-    CUDA_CHECK(cudaMalloc(&scratchBuffer, memorySizes.withoutOverlapScratchSizeInBytes));
+    size_t imageBytes = 3 * image.Resolution().x * image.Resolution().y * sizeof(float);

-    OPTIX_CHECK(optixDenoiserSetup(
-        denoiserHandle, 0 /* stream */, image.Resolution().x, image.Resolution().y,
-        CUdeviceptr(denoiserState), memorySizes.stateSizeInBytes,
-        CUdeviceptr(scratchBuffer), memorySizes.withoutOverlapScratchSizeInBytes));
+    auto copyChannelsToGPU = [&](std::array<std::string, 3> ch,
+                                 bool flipZ = false) {
+        void *bufGPU;
+        CUDA_CHECK(cudaMalloc(&bufGPU, imageBytes));
+        std::vector<float> hostStaging(imageBytes / sizeof(float));

-    size_t imageBytes = 3 * image.Resolution().x * image.Resolution().y * sizeof(float);
-    std::vector<OptixImage2D> inputLayers(nLayers);
-    for (int i = 0; i < nLayers; ++i) {
-        inputLayers[i].width = image.Resolution().x;
-        inputLayers[i].height = image.Resolution().y;
-        inputLayers[i].rowStrideInBytes = image.Resolution().x * 3 * sizeof(float);
-        inputLayers[i].pixelStrideInBytes = 0;
-        inputLayers[i].format = OPTIX_PIXEL_FORMAT_FLOAT3;
-
-        size_t sz = 3 * image.Resolution().x * image.Resolution().y;
-        std::vector<float> bufHost(sz);
+        ImageChannelDesc desc = image.GetChannelDesc(ch);
+        CHECK(desc);
        int offset = 0;
        for (int y = 0; y < image.Resolution().y; ++y)
            for (int x = 0; x < image.Resolution().x; ++x) {
-                ImageChannelValues v = image.GetChannels({x, y}, desc[i]);
-                if (i == 2)
-                    v[2] *= -1;  // flip z--right handed...
+                ImageChannelValues v = image.GetChannels({x, y}, desc);
+                if (flipZ)
+                    v[2] *= -1;  // flip normal's z--right handed...
                for (int c = 0; c < 3; ++c)
-                    bufHost[offset++] = v[c];
+                    hostStaging[offset++] = v[c];
            }
+        CUDA_CHECK(cudaMemcpy(bufGPU, hostStaging.data(), imageBytes,
+                              cudaMemcpyHostToDevice));
+        return bufGPU;
+    };
+    RGB *rgbGPU = (RGB *)copyChannelsToGPU({"R", "G", "B"});

-        void *bufGPU;
-        CUDA_CHECK(cudaMalloc(&bufGPU, imageBytes));
-        CUDA_CHECK(
-            cudaMemcpy(bufGPU, bufHost.data(), imageBytes, cudaMemcpyHostToDevice));
-        inputLayers[i].data = CUdeviceptr(bufGPU);
-    }
-
-    OptixImage2D outputImage;
-    outputImage.width = image.Resolution().x;
-    outputImage.height = image.Resolution().y;
-    outputImage.rowStrideInBytes = image.Resolution().x * 3 * sizeof(float);
-    outputImage.pixelStrideInBytes = 0;
-    outputImage.format = OPTIX_PIXEL_FORMAT_FLOAT3;
-    CUDA_CHECK(cudaMalloc((void **)&outputImage.data, imageBytes));
-
-    void *intensity;
-    CUDA_CHECK(cudaMalloc(&intensity, sizeof(float)));
-    OPTIX_CHECK(optixDenoiserComputeIntensity(
-        denoiserHandle, 0 /* stream */, &inputLayers[0], CUdeviceptr(intensity),
-        CUdeviceptr(scratchBuffer), memorySizes.withoutOverlapScratchSizeInBytes));
-
-    OptixDenoiserParams params = {};
-    params.denoiseAlpha = 0;
-    params.hdrIntensity = CUdeviceptr(intensity);
-    params.blendFactor = 0;  // TODO what should this be??
-
-#if (OPTIX_VERSION >= 70300)
-    OptixDenoiserGuideLayer guideLayer;
+    RGB *albedoGPU = nullptr;
+    Normal3f *normalGPU = nullptr;
    if (nLayers == 3) {
-        guideLayer.albedo = inputLayers[1];
-        guideLayer.normal = inputLayers[2];
-    }
-
-    OptixDenoiserLayer layers;
-    layers.input = inputLayers[0];
-    layers.output = outputImage;
-
-    OPTIX_CHECK(optixDenoiserInvoke(
-        denoiserHandle, 0 /* stream */, &params, CUdeviceptr(denoiserState),
-        memorySizes.stateSizeInBytes, &guideLayer, &layers, 1 /* # layers to denoise */,
-        0 /* offset x */, 0 /* offset y */, CUdeviceptr(scratchBuffer),
-        memorySizes.withoutOverlapScratchSizeInBytes));
-#else
-    OPTIX_CHECK(optixDenoiserInvoke(
-        denoiserHandle, 0 /* stream */, &params, CUdeviceptr(denoiserState),
-        memorySizes.stateSizeInBytes, inputLayers.data(), nLayers, 0 /* offset x */,
-        0 /* offset y */, &outputImage, CUdeviceptr(scratchBuffer),
-        memorySizes.withoutOverlapScratchSizeInBytes));
-#endif
+        albedoGPU = (RGB *)copyChannelsToGPU({"Albedo.R", "Albedo.G", "Albedo.B"});
+        normalGPU = (Normal3f *)copyChannelsToGPU({"Nsx", "Nsy", "Nsz"}, true);
+    }
+
+    RGB *rgbResultGPU;
+    CUDA_CHECK(cudaMalloc(&rgbResultGPU, imageBytes));
+
+    denoiser.Denoise(rgbGPU, normalGPU, albedoGPU, rgbResultGPU);

    CUDA_CHECK(cudaDeviceSynchronize());

    Image result(PixelFormat::Float, image.Resolution(), {"R", "G", "B"});
-    CUDA_CHECK(cudaMemcpy(result.RawPointer({0, 0}), (const void *)outputImage.data,
+    CUDA_CHECK(cudaMemcpy(result.RawPointer({0, 0}), (const void *)rgbResultGPU,
                          imageBytes, cudaMemcpyDeviceToHost));

    ImageMetadata outMetadata;

--- a/src/pbrt/gpu/denoiser.cpp
+++ b/src/pbrt/gpu/denoiser.cpp
+// pbrt is Copyright(c) 1998-2020 Matt Pharr, Wenzel Jakob, and Greg Humphreys.
+// The pbrt source code is licensed under the Apache License, Version 2.0.
+// SPDX: Apache-2.0
+
+#include <pbrt/gpu/denoiser.h>
+
+#include <pbrt/gpu/memory.h>
+#include <pbrt/gpu/util.h>
+
+#include <array>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <optix.h>
+#include <optix_stubs.h>
+
+#define OPTIX_CHECK(EXPR)                                                           \
+    do {                                                                            \
+        OptixResult res = EXPR;                                                     \
+        if (res != OPTIX_SUCCESS)                                                   \
+            LOG_FATAL("OptiX call " #EXPR " failed with code %d: \"%s\"", int(res), \
+                      optixGetErrorString(res));                                    \
+    } while (false) /* eat semicolon */
+
+// Stop that, Windows.
+#ifdef RGB
+#undef RGB
+#endif
+
+namespace pbrt {
+
+Denoiser::Denoiser(Vector2i resolution, bool haveAlbedoAndNormal)
+    : resolution(resolution), haveAlbedoAndNormal(haveAlbedoAndNormal) {
+    CUcontext cudaContext;
+    CU_CHECK(cuCtxGetCurrent(&cudaContext));
+    CHECK(cudaContext != nullptr);
+
+    OPTIX_CHECK(optixInit());
+    OptixDeviceContext optixContext;
+    OPTIX_CHECK(optixDeviceContextCreate(cudaContext, 0, &optixContext));
+
+    OptixDenoiserOptions options = {};
+#if (OPTIX_VERSION >= 70300)
+    if (haveAlbedoAndNormal)
+        options.guideAlbedo = options.guideNormal = 1;
+
+    OPTIX_CHECK(optixDenoiserCreate(optixContext, OPTIX_DENOISER_MODEL_KIND_HDR, &options,
+                                    &denoiserHandle));
+#else
+    options.inputKind = haveAlbedoAndNormal ? OPTIX_DENOISER_INPUT_RGB_ALBEDO_NORMAL
+                                            : OPTIX_DENOISER_INPUT_RGB;
+
+    OPTIX_CHECK(optixDenoiserCreate(optixContext, &options, &denoiserHandle));
+
+    OPTIX_CHECK(
+        optixDenoiserSetModel(denoiserHandle, OPTIX_DENOISER_MODEL_KIND_HDR, nullptr, 0));
+#endif
+
+    OPTIX_CHECK(optixDenoiserComputeMemoryResources(denoiserHandle, resolution.x,
+                                                    resolution.y, &memorySizes));
+
+    CUDA_CHECK(cudaMalloc(&denoiserState, memorySizes.stateSizeInBytes));
+    CUDA_CHECK(cudaMalloc(&scratchBuffer, memorySizes.withoutOverlapScratchSizeInBytes));
+
+    OPTIX_CHECK(optixDenoiserSetup(
+        denoiserHandle, 0 /* stream */, resolution.x, resolution.y,
+        CUdeviceptr(denoiserState), memorySizes.stateSizeInBytes,
+        CUdeviceptr(scratchBuffer), memorySizes.withoutOverlapScratchSizeInBytes));
+
+    CUDA_CHECK(cudaMalloc(&intensity, sizeof(float)));
+}
+
+void Denoiser::Denoise(RGB *rgb, Normal3f *n, RGB *albedo, RGB *result) {
+    std::array<OptixImage2D, 3> inputLayers;
+    int nLayers = haveAlbedoAndNormal ? 3 : 1;
+    for (int i = 0; i < nLayers; ++i) {
+        inputLayers[i].width = resolution.x;
+        inputLayers[i].height = resolution.y;
+        inputLayers[i].rowStrideInBytes = resolution.x * 3 * sizeof(float);
+        inputLayers[i].pixelStrideInBytes = 0;
+        inputLayers[i].format = OPTIX_PIXEL_FORMAT_FLOAT3;
+    }
+    inputLayers[0].data = CUdeviceptr(rgb);
+    if (haveAlbedoAndNormal) {
+        CHECK(n != nullptr && albedo != nullptr);
+        inputLayers[1].data = CUdeviceptr(albedo);
+        inputLayers[2].data = CUdeviceptr(n);
+    } else
+        CHECK(n == nullptr && albedo == nullptr);
+
+    OptixImage2D outputImage;
+    outputImage.width = resolution.x;
+    outputImage.height = resolution.y;
+    outputImage.rowStrideInBytes = resolution.x * 3 * sizeof(float);
+    outputImage.pixelStrideInBytes = 0;
+    outputImage.format = OPTIX_PIXEL_FORMAT_FLOAT3;
+    outputImage.data = CUdeviceptr(result);
+
+    OPTIX_CHECK(optixDenoiserComputeIntensity(
+        denoiserHandle, 0 /* stream */, &inputLayers[0], CUdeviceptr(intensity),
+        CUdeviceptr(scratchBuffer), memorySizes.withoutOverlapScratchSizeInBytes));
+
+    OptixDenoiserParams params = {};
+    params.denoiseAlpha = 0;
+    params.hdrIntensity = CUdeviceptr(intensity);
+    params.blendFactor = 0;  // TODO what should this be??
+
+#if (OPTIX_VERSION >= 70300)
+    OptixDenoiserGuideLayer guideLayer;
+    if (haveAlbedoAndNormal) {
+        guideLayer.albedo = inputLayers[1];
+        guideLayer.normal = inputLayers[2];
+    }
+
+    OptixDenoiserLayer layers;
+    layers.input = inputLayers[0];
+    layers.output = outputImage;
+
+    OPTIX_CHECK(optixDenoiserInvoke(
+        denoiserHandle, 0 /* stream */, &params, CUdeviceptr(denoiserState),
+        memorySizes.stateSizeInBytes, &guideLayer, &layers, 1 /* # layers to denoise */,
+        0 /* offset x */, 0 /* offset y */, CUdeviceptr(scratchBuffer),
+        memorySizes.withoutOverlapScratchSizeInBytes));
+#else
+    OPTIX_CHECK(optixDenoiserInvoke(
+        denoiserHandle, 0 /* stream */, &params, CUdeviceptr(denoiserState),
+        memorySizes.stateSizeInBytes, inputLayers.data(), nLayers, 0 /* offset x */,
+        0 /* offset y */, &outputImage, CUdeviceptr(scratchBuffer),
+        memorySizes.withoutOverlapScratchSizeInBytes));
+#endif
+}
+
+} // namespace pbrt
--- a/src/pbrt/gpu/denoiser.h
+++ b/src/pbrt/gpu/denoiser.h
+// pbrt is Copyright(c) 1998-2020 Matt Pharr, Wenzel Jakob, and Greg Humphreys.
+// The pbrt source code is licensed under the Apache License, Version 2.0.
+// SPDX: Apache-2.0
+
+#ifndef PBRT_GPU_DENOISER_H
+#define PBRT_GPU_DENOISER_H
+
+#include <pbrt/pbrt.h>
+
+#include <pbrt/util/color.h>
+#include <pbrt/util/vecmath.h>
+
+#include <optix.h>
+
+namespace pbrt {
+
+class Denoiser {
+ public:
+    Denoiser(Vector2i resolution, bool haveAlbedoAndNormal);
+
+    // All pointers should be to GPU memory.
+    // |n| and |albedo| should be nullptr iff \haveAlbedoAndNormal| is false.
+    void Denoise(RGB *rgb, Normal3f *n, RGB *albedo, RGB *result);
+
+ private:
+    Vector2i resolution;
+    bool haveAlbedoAndNormal;
+    OptixDenoiser denoiserHandle;
+    OptixDenoiserSizes memorySizes;
+    void *denoiserState, *scratchBuffer, *intensity;
+};
+
+} // namespace pbrt
+
+#endif // PBRT_GPU_DENOISER_H
--- a/src/pbrt/gpu/util.cpp
+++ b/src/pbrt/gpu/util.cpp
@@ -204,6 +204,10 @@ void GPUWait() {
    CUDA_CHECK(cudaDeviceSynchronize());
 }

+void GPUMemset(void *ptr, int byte, size_t bytes) {
+    CUDA_CHECK(cudaMemset(ptr, byte, bytes));
+}
+
 void ReportKernelStats() {
    CUDA_CHECK(cudaDeviceSynchronize());


--- a/src/pbrt/gpu/util.h
+++ b/src/pbrt/gpu/util.h
@@ -123,6 +123,8 @@ void ReportKernelStats();
 void GPUInit();
 void GPUThreadInit();

+void GPUMemset(void *ptr, int byte, size_t bytes);
+
 void GPURegisterThread(const char *name);
 void GPUNameStream(cudaStream_t stream, const char *name);