J
joanna.wozna.intel 推送
Add Conv Transpose BF16 (#30877) caf9d398
29920次提交
名称 最后提交 最后更新
..
details
dynload
stream
details
dynload
stream
details
dynload
stream
details
dynload
stream
details
dynload
stream
details
dynload
stream
details
dynload
stream
details
dynload
stream
details
dynload
stream
CMakeLists.txt
bfloat16.h
bfloat16_test.cc
bkcl_helper.h
collective_helper.cc
collective_helper.h
complex128.h
complex64.h
cpu_helper.cc
cpu_helper.h
cpu_helper_test.cc
cpu_info.cc
cpu_info.h
cpu_info_test.cc
cuda_device_function.h
cuda_device_guard.cc
cuda_device_guard.h
cuda_error.proto
cuda_helper.h
cuda_helper_test.cu
cuda_primitives.h
cuda_profiler.h
cuda_resource_pool.cc
cuda_resource_pool.h
cudnn_desc.h
cudnn_desc_test.cc
cudnn_helper.h
cudnn_helper_test.cc
cudnn_workspace_helper.cc
cudnn_workspace_helper.h
denormal.cc
denormal.h
device_code.cc
device_code.h
device_code_test.cc
device_context.cc
device_context.h
device_context_test.cu
device_context_xpu_test.cc
device_memory_aligment.cc
device_memory_aligment.h
device_tracer.cc
device_tracer.h
enforce.cc
enforce.h
enforce_test.cc
error_codes.proto
errors.cc
errors.h
errors_test.cc
event.h
flags.cc
float16.h
float16_test.cc
float16_test.cu
for_range.h
gen_comm_id_helper.cc
gen_comm_id_helper.h
gloo_context.cc
gloo_context.h
gpu_info.cc
gpu_info.h
gpu_launch_config.h
hostdevice.h
init.cc
init.h
init_test.cc
lock_guard_ptr.h
lodtensor_printer.cc
lodtensor_printer.h
lodtensor_printer_test.cc
macros.h
miopen_desc.h
mkldnn_helper.h
mkldnn_reuse.h
monitor.cc
monitor.h
nccl_helper.h
place.cc
place.h
place_test.cc
port.h
profiler.cc
profiler.cu
profiler.h
profiler.proto
profiler_helper.h
profiler_test.cc
resource_pool.h
stream_callback_manager.cc
stream_callback_manager.h
test_limit_gpu_memory.cu
timer.cc
timer.h
timer_test.cc
transform.h
transform_test.cu
type_defs.h
variant.h
xpu_header.h
CMakeLists.txt
bfloat16.h
bfloat16_test.cc
bkcl_helper.h
collective_helper.cc
collective_helper.h
complex128.h
complex64.h
cpu_helper.cc
cpu_helper.h
cpu_helper_test.cc
cpu_info.cc
cpu_info.h
cpu_info_test.cc
cuda_device_function.h
cuda_device_guard.cc
cuda_device_guard.h
cuda_error.proto
cuda_helper.h
cuda_helper_test.cu
cuda_primitives.h
cuda_profiler.h
cuda_resource_pool.cc
cuda_resource_pool.h
cudnn_desc.h
cudnn_desc_test.cc
cudnn_helper.h
cudnn_helper_test.cc
cudnn_workspace_helper.cc
cudnn_workspace_helper.h
denormal.cc
denormal.h
device_code.cc
device_code.h
device_code_test.cc
device_context.cc
device_context.h
device_context_test.cu
device_context_xpu_test.cc
device_memory_aligment.cc
device_memory_aligment.h
device_tracer.cc
device_tracer.h
enforce.cc
enforce.h
enforce_test.cc
error_codes.proto
errors.cc
errors.h
errors_test.cc
event.h
flags.cc
float16.h
float16_test.cc
float16_test.cu
for_range.h
gen_comm_id_helper.cc
gen_comm_id_helper.h
gloo_context.cc
gloo_context.h
gpu_info.cc
gpu_info.h
gpu_launch_config.h
hostdevice.h
init.cc
init.h
init_test.cc
lock_guard_ptr.h
lodtensor_printer.cc
lodtensor_printer.h
lodtensor_printer_test.cc
macros.h
miopen_desc.h
mkldnn_helper.h
mkldnn_reuse.h
monitor.cc
monitor.h
nccl_helper.h
place.cc
place.h
place_test.cc
port.h
profiler.cc
profiler.cu
profiler.h
profiler.proto
profiler_helper.h
profiler_test.cc
resource_pool.h
stream_callback_manager.cc
stream_callback_manager.h
test_limit_gpu_memory.cu
timer.cc
timer.h
timer_test.cc
transform.h
transform_test.cu
type_defs.h
variant.h
xpu_header.h
CMakeLists.txt
bfloat16.h
bfloat16_test.cc
bkcl_helper.h
collective_helper.cc
collective_helper.h
complex128.h
complex64.h
cpu_helper.cc
cpu_helper.h
cpu_helper_test.cc
cpu_info.cc
cpu_info.h
cpu_info_test.cc
cuda_device_function.h
cuda_device_guard.cc
cuda_device_guard.h
cuda_error.proto
cuda_helper.h
cuda_helper_test.cu
cuda_primitives.h
cuda_profiler.h
cuda_resource_pool.cc
cuda_resource_pool.h
cudnn_desc.h
cudnn_desc_test.cc
cudnn_helper.h
cudnn_helper_test.cc
cudnn_workspace_helper.cc
cudnn_workspace_helper.h
denormal.cc
denormal.h
device_code.cc
device_code.h
device_code_test.cc
device_context.cc
device_context.h
device_context_test.cu
device_context_xpu_test.cc
device_memory_aligment.cc
device_memory_aligment.h
device_tracer.cc
device_tracer.h
enforce.cc
enforce.h
enforce_test.cc
error_codes.proto
errors.cc
errors.h
errors_test.cc
event.h
flags.cc
float16.h
float16_test.cc
float16_test.cu
for_range.h
gen_comm_id_helper.cc
gen_comm_id_helper.h
gloo_context.cc
gloo_context.h
gpu_info.cc
gpu_info.h
gpu_launch_config.h
hostdevice.h
init.cc
init.h
init_test.cc
lock_guard_ptr.h
lodtensor_printer.cc
lodtensor_printer.h
lodtensor_printer_test.cc
macros.h
miopen_desc.h
mkldnn_helper.h
mkldnn_reuse.h
monitor.cc
monitor.h
nccl_helper.h
place.cc
place.h
place_test.cc
port.h
profiler.cc
profiler.cu
profiler.h
profiler.proto
profiler_helper.h
profiler_test.cc
resource_pool.h
stream_callback_manager.cc
stream_callback_manager.h
test_limit_gpu_memory.cu
timer.cc
timer.h
timer_test.cc
transform.h
transform_test.cu
type_defs.h
variant.h
xpu_header.h
CMakeLists.txt
bfloat16.h
bfloat16_test.cc
bkcl_helper.h
collective_helper.cc
collective_helper.h
complex128.h
complex64.h
cpu_helper.cc
cpu_helper.h
cpu_helper_test.cc
cpu_info.cc
cpu_info.h
cpu_info_test.cc
cuda_device_function.h
cuda_device_guard.cc
cuda_device_guard.h
cuda_error.proto
cuda_helper.h
cuda_helper_test.cu
cuda_primitives.h
cuda_profiler.h
cuda_resource_pool.cc
cuda_resource_pool.h
cudnn_desc.h
cudnn_desc_test.cc
cudnn_helper.h
cudnn_helper_test.cc
cudnn_workspace_helper.cc
cudnn_workspace_helper.h
denormal.cc
denormal.h
device_code.cc
device_code.h
device_code_test.cc
device_context.cc
device_context.h
device_context_test.cu
device_context_xpu_test.cc
device_memory_aligment.cc
device_memory_aligment.h
device_tracer.cc
device_tracer.h
enforce.cc
enforce.h
enforce_test.cc
error_codes.proto
errors.cc
errors.h
errors_test.cc
event.h
flags.cc
float16.h
float16_test.cc
float16_test.cu
for_range.h
gen_comm_id_helper.cc
gen_comm_id_helper.h
gloo_context.cc
gloo_context.h
gpu_info.cc
gpu_info.h
gpu_launch_config.h
hostdevice.h
init.cc
init.h
init_test.cc
lock_guard_ptr.h
lodtensor_printer.cc
lodtensor_printer.h
lodtensor_printer_test.cc
macros.h
miopen_desc.h
mkldnn_helper.h
mkldnn_reuse.h
monitor.cc
monitor.h
nccl_helper.h
place.cc
place.h
place_test.cc
port.h
profiler.cc
profiler.cu
profiler.h
profiler.proto
profiler_helper.h
profiler_test.cc
resource_pool.h
stream_callback_manager.cc
stream_callback_manager.h
test_limit_gpu_memory.cu
timer.cc
timer.h
timer_test.cc
transform.h
transform_test.cu
type_defs.h
variant.h
xpu_header.h
CMakeLists.txt
bfloat16.h
bfloat16_test.cc
bkcl_helper.h
collective_helper.cc
collective_helper.h
complex128.h
complex64.h
cpu_helper.cc
cpu_helper.h
cpu_helper_test.cc
cpu_info.cc
cpu_info.h
cpu_info_test.cc
cuda_device_function.h
cuda_device_guard.cc
cuda_device_guard.h
cuda_error.proto
cuda_helper.h
cuda_helper_test.cu
cuda_primitives.h
cuda_profiler.h
cuda_resource_pool.cc
cuda_resource_pool.h
cudnn_desc.h
cudnn_desc_test.cc
cudnn_helper.h
cudnn_helper_test.cc
cudnn_workspace_helper.cc
cudnn_workspace_helper.h
denormal.cc
denormal.h
device_code.cc
device_code.h
device_code_test.cc
device_context.cc
device_context.h
device_context_test.cu
device_context_xpu_test.cc
device_memory_aligment.cc
device_memory_aligment.h
device_tracer.cc
device_tracer.h
enforce.cc
enforce.h
enforce_test.cc
error_codes.proto
errors.cc
errors.h
errors_test.cc
event.h
flags.cc
float16.h
float16_test.cc
float16_test.cu
for_range.h
gen_comm_id_helper.cc
gen_comm_id_helper.h
gloo_context.cc
gloo_context.h
gpu_info.cc
gpu_info.h
gpu_launch_config.h
hostdevice.h
init.cc
init.h
init_test.cc
lock_guard_ptr.h
lodtensor_printer.cc
lodtensor_printer.h
lodtensor_printer_test.cc
macros.h
miopen_desc.h
mkldnn_helper.h
mkldnn_reuse.h
monitor.cc
monitor.h
nccl_helper.h
place.cc
place.h
place_test.cc
port.h
profiler.cc
profiler.cu
profiler.h
profiler.proto
profiler_helper.h
profiler_test.cc
resource_pool.h
stream_callback_manager.cc
stream_callback_manager.h
test_limit_gpu_memory.cu
timer.cc
timer.h
timer_test.cc
transform.h
transform_test.cu
type_defs.h
variant.h
xpu_header.h
CMakeLists.txt
bfloat16.h
bfloat16_test.cc
bkcl_helper.h
collective_helper.cc
collective_helper.h
complex128.h
complex64.h
cpu_helper.cc
cpu_helper.h
cpu_helper_test.cc
cpu_info.cc
cpu_info.h
cpu_info_test.cc
cuda_device_function.h
cuda_device_guard.cc
cuda_device_guard.h
cuda_error.proto
cuda_helper.h
cuda_helper_test.cu
cuda_primitives.h
cuda_profiler.h
cuda_resource_pool.cc
cuda_resource_pool.h
cudnn_desc.h
cudnn_desc_test.cc
cudnn_helper.h
cudnn_helper_test.cc
cudnn_workspace_helper.cc
cudnn_workspace_helper.h
denormal.cc
denormal.h
device_code.cc
device_code.h
device_code_test.cc
device_context.cc
device_context.h
device_context_test.cu
device_context_xpu_test.cc
device_memory_aligment.cc
device_memory_aligment.h
device_tracer.cc
device_tracer.h
enforce.cc
enforce.h
enforce_test.cc
error_codes.proto
errors.cc
errors.h
errors_test.cc
event.h
flags.cc
float16.h
float16_test.cc
float16_test.cu
for_range.h
gen_comm_id_helper.cc
gen_comm_id_helper.h
gloo_context.cc
gloo_context.h
gpu_info.cc
gpu_info.h
gpu_launch_config.h
hostdevice.h
init.cc
init.h
init_test.cc
lock_guard_ptr.h
lodtensor_printer.cc
lodtensor_printer.h
lodtensor_printer_test.cc
macros.h
miopen_desc.h
mkldnn_helper.h
mkldnn_reuse.h
monitor.cc
monitor.h
nccl_helper.h
place.cc
place.h
place_test.cc
port.h
profiler.cc
profiler.cu
profiler.h
profiler.proto
profiler_helper.h
profiler_test.cc
resource_pool.h
stream_callback_manager.cc
stream_callback_manager.h
test_limit_gpu_memory.cu
timer.cc
timer.h
timer_test.cc
transform.h
transform_test.cu
type_defs.h
variant.h
xpu_header.h
CMakeLists.txt
bfloat16.h
bfloat16_test.cc
bkcl_helper.h
collective_helper.cc
collective_helper.h
complex128.h
complex64.h
cpu_helper.cc
cpu_helper.h
cpu_helper_test.cc
cpu_info.cc
cpu_info.h
cpu_info_test.cc
cuda_device_function.h
cuda_device_guard.cc
cuda_device_guard.h
cuda_error.proto
cuda_helper.h
cuda_helper_test.cu
cuda_primitives.h
cuda_profiler.h
cuda_resource_pool.cc
cuda_resource_pool.h
cudnn_desc.h
cudnn_desc_test.cc
cudnn_helper.h
cudnn_helper_test.cc
cudnn_workspace_helper.cc
cudnn_workspace_helper.h
denormal.cc
denormal.h
device_code.cc
device_code.h
device_code_test.cc
device_context.cc
device_context.h
device_context_test.cu
device_context_xpu_test.cc
device_memory_aligment.cc
device_memory_aligment.h
device_tracer.cc
device_tracer.h
enforce.cc
enforce.h
enforce_test.cc
error_codes.proto
errors.cc
errors.h
errors_test.cc
event.h
flags.cc
float16.h
float16_test.cc
float16_test.cu
for_range.h
gen_comm_id_helper.cc
gen_comm_id_helper.h
gloo_context.cc
gloo_context.h
gpu_info.cc
gpu_info.h
gpu_launch_config.h
hostdevice.h
init.cc
init.h
init_test.cc
lock_guard_ptr.h
lodtensor_printer.cc
lodtensor_printer.h
lodtensor_printer_test.cc
macros.h
miopen_desc.h
mkldnn_helper.h
mkldnn_reuse.h
monitor.cc
monitor.h
nccl_helper.h
place.cc
place.h
place_test.cc
port.h
profiler.cc
profiler.cu
profiler.h
profiler.proto
profiler_helper.h
profiler_test.cc
resource_pool.h
stream_callback_manager.cc
stream_callback_manager.h
test_limit_gpu_memory.cu
timer.cc
timer.h
timer_test.cc
transform.h
transform_test.cu
type_defs.h
variant.h
xpu_header.h
CMakeLists.txt
bfloat16.h
bfloat16_test.cc
bkcl_helper.h
collective_helper.cc
collective_helper.h
complex128.h
complex64.h
cpu_helper.cc
cpu_helper.h
cpu_helper_test.cc
cpu_info.cc
cpu_info.h
cpu_info_test.cc
cuda_device_function.h
cuda_device_guard.cc
cuda_device_guard.h
cuda_error.proto
cuda_helper.h
cuda_helper_test.cu
cuda_primitives.h
cuda_profiler.h
cuda_resource_pool.cc
cuda_resource_pool.h
cudnn_desc.h
cudnn_desc_test.cc
cudnn_helper.h
cudnn_helper_test.cc
cudnn_workspace_helper.cc
cudnn_workspace_helper.h
denormal.cc
denormal.h
device_code.cc
device_code.h
device_code_test.cc
device_context.cc
device_context.h
device_context_test.cu
device_context_xpu_test.cc
device_memory_aligment.cc
device_memory_aligment.h
device_tracer.cc
device_tracer.h
enforce.cc
enforce.h
enforce_test.cc
error_codes.proto
errors.cc
errors.h
errors_test.cc
event.h
flags.cc
float16.h
float16_test.cc
float16_test.cu
for_range.h
gen_comm_id_helper.cc
gen_comm_id_helper.h
gloo_context.cc
gloo_context.h
gpu_info.cc
gpu_info.h
gpu_launch_config.h
hostdevice.h
init.cc
init.h
init_test.cc
lock_guard_ptr.h
lodtensor_printer.cc
lodtensor_printer.h
lodtensor_printer_test.cc
macros.h
miopen_desc.h
mkldnn_helper.h
mkldnn_reuse.h
monitor.cc
monitor.h
nccl_helper.h
place.cc
place.h
place_test.cc
port.h
profiler.cc
profiler.cu
profiler.h
profiler.proto
profiler_helper.h
profiler_test.cc
resource_pool.h
stream_callback_manager.cc
stream_callback_manager.h
test_limit_gpu_memory.cu
timer.cc
timer.h
timer_test.cc
transform.h
transform_test.cu
type_defs.h
variant.h
xpu_header.h
CMakeLists.txt
bfloat16.h
bfloat16_test.cc
bkcl_helper.h
collective_helper.cc
collective_helper.h
complex128.h
complex64.h
cpu_helper.cc
cpu_helper.h
cpu_helper_test.cc
cpu_info.cc
cpu_info.h
cpu_info_test.cc
cuda_device_function.h
cuda_device_guard.cc
cuda_device_guard.h
cuda_error.proto
cuda_helper.h
cuda_helper_test.cu
cuda_primitives.h
cuda_profiler.h
cuda_resource_pool.cc
cuda_resource_pool.h
cudnn_desc.h
cudnn_desc_test.cc
cudnn_helper.h
cudnn_helper_test.cc
cudnn_workspace_helper.cc
cudnn_workspace_helper.h
denormal.cc
denormal.h
device_code.cc
device_code.h
device_code_test.cc
device_context.cc
device_context.h
device_context_test.cu
device_context_xpu_test.cc
device_memory_aligment.cc
device_memory_aligment.h
device_tracer.cc
device_tracer.h
enforce.cc
enforce.h
enforce_test.cc
error_codes.proto
errors.cc
errors.h
errors_test.cc
event.h
flags.cc
float16.h
float16_test.cc
float16_test.cu
for_range.h
gen_comm_id_helper.cc
gen_comm_id_helper.h
gloo_context.cc
gloo_context.h
gpu_info.cc
gpu_info.h
gpu_launch_config.h
hostdevice.h
init.cc
init.h
init_test.cc
lock_guard_ptr.h
lodtensor_printer.cc
lodtensor_printer.h
lodtensor_printer_test.cc
macros.h
miopen_desc.h
mkldnn_helper.h
mkldnn_reuse.h
monitor.cc
monitor.h
nccl_helper.h
place.cc
place.h
place_test.cc
port.h
profiler.cc
profiler.cu
profiler.h
profiler.proto
profiler_helper.h
profiler_test.cc
resource_pool.h
stream_callback_manager.cc
stream_callback_manager.h
test_limit_gpu_memory.cu
timer.cc
timer.h
timer_test.cc
transform.h
transform_test.cu
type_defs.h
variant.h
xpu_header.h

项目简介

PArallel Distributed Deep LEarning: Machine Learning Framework from Industrial Practice (『飞桨』核心框架,深度学习&机器学习高性能单机、分布式训练和跨平台部署)

:rocket: Github 镜像仓库 :rocket:

源项目地址 :arrow_down: :arrow_down: :arrow_down:

https://github.com/paddlepaddle/paddle

发行版本

当前项目没有发行版本

贡献者 228

全部贡献者

开发语言

  • C++ 45.8 %
  • Python 45.5 %
  • Cuda 6.4 %
  • CMake 1.1 %
  • Shell 0.7 %