Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
imjiangjun
pbrt-v4
提交
4a87e704
P
pbrt-v4
项目概览
imjiangjun
/
pbrt-v4
12 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
pbrt-v4
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
4a87e704
编写于
11月 16, 2020
作者:
M
Matt Pharr
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Update from book source. MultiWorkQueue simplifications.
上级
c7c74f1f
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
59 addition
and
72 deletion
+59
-72
src/pbrt/gpu/launch.h
src/pbrt/gpu/launch.h
+12
-12
src/pbrt/gpu/media.cpp
src/pbrt/gpu/media.cpp
+1
-1
src/pbrt/gpu/optix.cu
src/pbrt/gpu/optix.cu
+1
-1
src/pbrt/gpu/surfscatter.cpp
src/pbrt/gpu/surfscatter.cpp
+1
-1
src/pbrt/gpu/workitems.h
src/pbrt/gpu/workitems.h
+4
-4
src/pbrt/gpu/workqueue.h
src/pbrt/gpu/workqueue.h
+33
-53
src/pbrt/pbrt.h
src/pbrt/pbrt.h
+7
-0
未找到文件。
src/pbrt/gpu/launch.h
浏览文件 @
4a87e704
...
...
@@ -39,8 +39,8 @@ inline int GetBlockSize(const char *description, F kernel) {
return
iter
->
second
;
int
minGridSize
,
blockSize
;
CUDA_CHECK
(
cudaOccupancyMaxPotentialBlockSize
(
&
minGridSize
,
&
blockSize
,
kernel
,
0
,
0
));
CUDA_CHECK
(
cudaOccupancyMaxPotentialBlockSize
(
&
minGridSize
,
&
blockSize
,
kernel
,
0
,
0
));
kernelBlockSizes
[
index
]
=
blockSize
;
LOG_VERBOSE
(
"[%s]: block size %d"
,
description
,
blockSize
);
...
...
@@ -56,11 +56,16 @@ __global__ void Kernel(F func, int nItems) {
func
(
tid
);
}
#ifdef PBRT_IS_WINDOWS
#define PBRT_GPU_LAMBDA(...) [=,*this] PBRT_GPU(__VA_ARGS__) mutable
#else
#define PBRT_GPU_LAMBDA(...) [=] PBRT_GPU(__VA_ARGS__)
#endif
// GPU Launch Function Declarations
template
<
typename
F
>
void
GPUParallelFor
(
const
char
*
description
,
int
nItems
,
F
func
);
template
<
typename
F
>
void
GPUDo
(
const
char
*
description
,
F
func
)
{
GPUParallelFor
(
description
,
1
,
[
=
]
PBRT_GPU
(
int
)
mutable
{
func
();
});
}
void
GPUWait
();
template
<
typename
F
>
void
GPUParallelFor
(
const
char
*
description
,
int
nItems
,
F
func
)
{
...
...
@@ -89,11 +94,6 @@ void GPUParallelFor(const char *description, int nItems, F func) {
#endif
}
template
<
typename
F
>
void
GPUDo
(
const
char
*
description
,
F
func
)
{
GPUParallelFor
(
description
,
1
,
[
=
]
PBRT_GPU
(
int
)
mutable
{
func
();
});
}
void
ReportKernelStats
();
}
// namespace pbrt
...
...
src/pbrt/gpu/media.cpp
浏览文件 @
4a87e704
...
...
@@ -209,7 +209,7 @@ void GPUPathIntegrator::SampleMediumInteraction(int depth) {
auto
enqueue
=
[
=
](
auto
ptr
)
{
using
Material
=
typename
std
::
remove_reference_t
<
decltype
(
*
ptr
)
>
;
q
->
Push
<
Material
>
(
MaterialEvalWorkItem
<
Material
>
{
q
->
Push
<
Material
EvalWorkItem
<
Material
>
>
(
MaterialEvalWorkItem
<
Material
>
{
ptr
,
lambda
,
beta
,
uniPathPDF
,
ms
.
pi
,
ms
.
n
,
ms
.
ns
,
ms
.
dpdus
,
ms
.
dpdvs
,
ms
.
dndus
,
ms
.
dndvs
,
-
ray
.
d
,
ms
.
uv
,
ray
.
time
,
ms
.
anyNonSpecularBounces
,
ms
.
etaScale
,
ms
.
mediumInterface
,
ms
.
pixelIndex
});
...
...
src/pbrt/gpu/optix.cu
浏览文件 @
4a87e704
...
...
@@ -229,7 +229,7 @@ static __forceinline__ __device__ void ProcessClosestIntersection(
auto
enqueue
=
[
=
](
auto
ptr
)
{
using
Material
=
typename
std
::
remove_reference_t
<
decltype
(
*
ptr
)
>
;
q
->
Push
<
Material
>
(
MaterialEvalWorkItem
<
Material
>
{
q
->
Push
<
Material
EvalWorkItem
<
Material
>
>
(
MaterialEvalWorkItem
<
Material
>
{
ptr
,
r
.
lambda
,
r
.
beta
,
r
.
uniPathPDF
,
intr
.
pi
,
intr
.
n
,
intr
.
shading
.
n
,
intr
.
shading
.
dpdu
,
intr
.
shading
.
dpdv
,
intr
.
shading
.
dndu
,
intr
.
shading
.
dndv
,
intr
.
wo
,
intr
.
uv
,
intr
.
time
,
r
.
anyNonSpecularBounces
,
r
.
etaScale
,
...
...
src/pbrt/gpu/surfscatter.cpp
浏览文件 @
4a87e704
...
...
@@ -50,7 +50,7 @@ void GPUPathIntegrator::EvaluateMaterialAndBSDF(TextureEvaluator texEval,
RayQueue
*
nextRayQueue
=
NextRayQueue
(
depth
);
ForAllQueued
(
name
.
c_str
(),
evalQueue
->
Get
<
Material
>
(),
maxQueueSize
,
name
.
c_str
(),
evalQueue
->
Get
<
Material
EvalWorkItem
<
Material
>
>
(),
maxQueueSize
,
PBRT_GPU_LAMBDA
(
const
MaterialEvalWorkItem
<
Material
>
me
,
int
index
)
{
const
Material
*
material
=
me
.
material
;
...
...
src/pbrt/gpu/workitems.h
浏览文件 @
4a87e704
...
...
@@ -439,10 +439,10 @@ class MediumSampleQueue : public WorkQueue<MediumSampleWorkItem> {
using
MediumScatterQueue
=
WorkQueue
<
MediumScatterWorkItem
>
;
using
MaterialEvalQueue
=
MultiWorkQueue
<
MaterialEvalWorkItem
,
CoatedDiffuseMaterial
,
CoatedConductorMaterial
,
ConductorMaterial
,
DielectricMaterial
,
DiffuseMaterial
,
DiffuseTransmissionMaterial
,
HairMaterial
,
MeasuredMaterial
,
SubsurfaceMaterial
,
ThinDielectricMaterial
,
MixMaterial
>
;
MultiWorkQueue
<
MaterialEvalWorkItem
<
CoatedDiffuseMaterial
>
,
MaterialEvalWorkItem
<
CoatedConductorMaterial
>
,
MaterialEvalWorkItem
<
ConductorMaterial
>
,
MaterialEvalWorkItem
<
DielectricMaterial
>
,
MaterialEvalWorkItem
<
DiffuseMaterial
>
,
MaterialEvalWorkItem
<
DiffuseTransmissionMaterial
>
,
MaterialEvalWorkItem
<
HairMaterial
>
,
MaterialEvalWorkItem
<
MeasuredMaterial
>
,
MaterialEvalWorkItem
<
SubsurfaceMaterial
>
,
MaterialEvalWorkItem
<
ThinDielectricMaterial
>
,
MaterialEvalWorkItem
<
MixMaterial
>
>
;
}
// namespace pbrt
...
...
src/pbrt/gpu/workqueue.h
浏览文件 @
4a87e704
...
...
@@ -20,32 +20,35 @@
#if (__CUDA_ARCH__ >= 600)
#define PBRT_HAVE_CUDA_ATOMICS
#endif
#endif // PBRT_IS_WINDOWS
#endif
// PBRT_IS_WINDOWS
#ifdef PBRT_HAVE_CUDA_ATOMICS
#include <cuda/atomic>
#endif // PBRT_HAVE_CUDA_ATOMICS
#endif
// PBRT_HAVE_CUDA_ATOMICS
namespace
pbrt
{
// WorkQueue Definition
template
<
typename
WorkItem
>
class
WorkQueue
:
public
SOA
<
WorkItem
>
{
public:
// WorkQueue Public Methods
WorkQueue
(
int
n
,
Allocator
alloc
)
:
SOA
<
WorkItem
>
(
n
,
alloc
)
{}
PBRT_CPU_GPU
int
Size
()
const
{
#ifdef PBRT_HAVE_CUDA_ATOMICS
return
size
.
load
(
cuda
::
std
::
memory_order_relaxed
);
namespace
std
=
cuda
::
std
;
return
size
.
load
(
std
::
memory_order_relaxed
);
#else
return
size
;
#endif
}
PBRT_CPU_GPU
void
Reset
()
{
#ifdef PBRT_HAVE_CUDA_ATOMICS
size
.
store
(
0
,
cuda
::
std
::
memory_order_relaxed
);
namespace
std
=
cuda
::
std
;
size
.
store
(
0
,
std
::
memory_order_relaxed
);
#else
size
=
0
;
#endif
...
...
@@ -59,10 +62,12 @@ class WorkQueue : public SOA<WorkItem> {
}
protected:
// WorkQueue Protected Methods
PBRT_CPU_GPU
int
AllocateEntry
()
{
#ifdef PBRT_HAVE_CUDA_ATOMICS
return
size
.
fetch_add
(
1
,
cuda
::
std
::
memory_order_relaxed
);
namespace
std
=
cuda
::
std
;
return
size
.
fetch_add
(
1
,
std
::
memory_order_relaxed
);
#else
#ifdef PBRT_IS_GPU_CODE
return
atomicAdd
(
&
size
,
1
);
...
...
@@ -74,13 +79,16 @@ class WorkQueue : public SOA<WorkItem> {
}
private:
// WorkQueue Private Members
#ifdef PBRT_HAVE_CUDA_ATOMICS
cuda
::
atomic
<
int
,
cuda
::
thread_scope_device
>
size
{
0
};
using
GPUAtomicInt
=
cuda
::
atomic
<
int
,
cuda
::
thread_scope_device
>
;
GPUAtomicInt
size
{
0
};
#else
int
size
=
0
;
#endif
};
// WorkQueue Inline Functions
template
<
typename
F
,
typename
WorkItem
>
void
ForAllQueued
(
const
char
*
desc
,
WorkQueue
<
WorkItem
>
*
q
,
int
maxQueued
,
F
func
)
{
GPUParallelFor
(
desc
,
maxQueued
,
[
=
]
PBRT_GPU
(
int
index
)
mutable
{
...
...
@@ -90,22 +98,16 @@ void ForAllQueued(const char *desc, WorkQueue<WorkItem> *q, int maxQueued, F fun
});
}
template
<
template
<
typename
>
class
Work
,
typename
...
Ts
>
class
MultiWorkQueueHelper
;
// MultiWorkQueue Definition
template
<
typename
...
Ts
>
class
MultiWorkQueue
;
template
<
t
emplate
<
typename
>
class
Work
>
class
MultiWorkQueue
Helper
<
Work
>
{
template
<
t
ypename
T
,
typename
...
Ts
>
class
MultiWorkQueue
<
T
,
Ts
...
>
:
public
MultiWorkQueue
<
Ts
...
>
{
public:
MultiWorkQueueHelper
(
int
n
,
Allocator
alloc
,
pstd
::
span
<
const
bool
>
)
{}
};
template
<
template
<
typename
>
class
WorkItem
,
typename
T
,
typename
...
Ts
>
class
MultiWorkQueueHelper
<
WorkItem
,
T
,
Ts
...
>
:
public
MultiWorkQueueHelper
<
WorkItem
,
Ts
...
>
{
public:
MultiWorkQueueHelper
(
int
n
,
Allocator
alloc
,
pstd
::
span
<
const
bool
>
haveType
)
:
MultiWorkQueueHelper
<
WorkItem
,
Ts
...
>
(
n
,
alloc
,
haveType
.
subspan
(
1
,
haveType
.
size
())),
// MultiWorkQueue Public Methods
MultiWorkQueue
(
int
n
,
Allocator
alloc
,
pstd
::
span
<
const
bool
>
haveType
)
:
MultiWorkQueue
<
Ts
...
>
(
n
,
alloc
,
haveType
.
subspan
(
1
,
haveType
.
size
())),
q
(
haveType
.
front
()
?
n
:
1
,
alloc
)
{}
template
<
typename
Tsz
>
...
...
@@ -113,22 +115,22 @@ class MultiWorkQueueHelper<WorkItem, T, Ts...>
if
constexpr
(
std
::
is_same_v
<
Tsz
,
T
>
)
return
q
.
Size
();
else
return
MultiWorkQueue
Helper
<
WorkItem
,
Ts
...
>::
template
Size
<
Tsz
>();
return
MultiWorkQueue
<
Ts
...
>::
template
Size
<
Tsz
>();
}
PBRT_CPU_GPU
void
Reset
()
{
q
.
Reset
();
if
constexpr
(
sizeof
...(
Ts
)
>
0
)
MultiWorkQueue
Helper
<
WorkItem
,
Ts
...
>::
Reset
();
MultiWorkQueue
<
Ts
...
>::
Reset
();
}
template
<
typename
Tg
>
PBRT_CPU_GPU
WorkQueue
<
WorkItem
<
Tg
>
>
*
Get
()
{
PBRT_CPU_GPU
WorkQueue
<
Tg
>
*
Get
()
{
if
constexpr
(
std
::
is_same_v
<
Tg
,
T
>
)
return
&
q
;
else
return
MultiWorkQueue
Helper
<
WorkItem
,
Ts
...
>::
template
Get
<
Tg
>();
return
MultiWorkQueue
<
Ts
...
>::
template
Get
<
Tg
>();
}
template
<
typename
Tq
,
typename
...
Args
>
...
...
@@ -136,40 +138,18 @@ class MultiWorkQueueHelper<WorkItem, T, Ts...>
if
constexpr
(
std
::
is_same_v
<
Tq
,
T
>
)
return
q
.
Push
(
std
::
forward
<
Args
>
(
args
)...);
else
return
MultiWorkQueueHelper
<
WorkItem
,
Ts
...
>::
template
Push
<
Tq
>(
std
::
forward
<
Args
>
(
args
)...);
return
MultiWorkQueue
<
Ts
...
>::
template
Push
<
Tq
>(
std
::
forward
<
Args
>
(
args
)...);
}
private:
WorkQueue
<
WorkItem
<
T
>>
q
;
// MultiWorkQueue Private Members
WorkQueue
<
T
>
q
;
};
template
<
template
<
typename
>
class
WorkItem
,
typename
...
Ts
>
class
MultiWorkQueue
{
template
<
>
class
MultiWorkQueue
<>
{
public:
MultiWorkQueue
(
int
n
,
Allocator
alloc
,
pstd
::
span
<
const
bool
>
haveType
)
:
helper
(
n
,
alloc
,
haveType
)
{}
template
<
typename
T
>
PBRT_CPU_GPU
int
Size
()
const
{
return
helper
.
template
Size
<
T
>();
}
PBRT_CPU_GPU
void
Reset
()
{
helper
.
Reset
();
}
template
<
typename
T
>
PBRT_CPU_GPU
WorkQueue
<
WorkItem
<
T
>>
*
Get
()
{
return
helper
.
template
Get
<
T
>();
}
template
<
typename
T
,
typename
...
Args
>
PBRT_CPU_GPU
int
Push
(
Args
&&
...
args
)
{
return
helper
.
template
Push
<
T
>(
std
::
forward
<
Args
>
(
args
)...);
}
private:
MultiWorkQueueHelper
<
WorkItem
,
Ts
...
>
helper
;
MultiWorkQueue
(
int
n
,
Allocator
alloc
,
pstd
::
span
<
const
bool
>
haveType
)
{}
};
}
// namespace pbrt
...
...
src/pbrt/pbrt.h
浏览文件 @
4a87e704
...
...
@@ -8,6 +8,7 @@
#include <stdint.h>
#include <cstddef>
// GPU Macro Definitions
#if defined(__CUDA_ARCH__)
#define PBRT_IS_GPU_CODE
#endif
...
...
@@ -29,6 +30,12 @@
#define PBRT_GPU
#endif
#ifdef PBRT_IS_WINDOWS
#define PBRT_GPU_LAMBDA(...) [ =, *this ] PBRT_GPU(__VA_ARGS__) mutable
#else
#define PBRT_GPU_LAMBDA(...) [=] PBRT_GPU(__VA_ARGS__)
#endif
#ifdef PBRT_BUILD_GPU_RENDERER
#define PBRT_L1_CACHE_LINE_SIZE 128
#else
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录