imma8832_256x64.cuh 631 字节
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
#pragma once

#include "./base.cuh"

#define TX             128
#define TY             1
#define BM             64
#define BN             256
#define BK             128
#define mma_m          16
#define mma_n          8
#define mma_k          64
#define reg_m          8
#define reg_n          8
#define packed_channel 64
#define BKd32          (BK / 32)
#define reg_md4        (reg_m >> 2)
#define WARPS          (TX / 32)
#define cache_per_warp 128
#define reg_nd4        (reg_n >> 2)
#define ldg_src        (BN * BK / (16 * TX))
#define ldg_filter     (BM * BK / (16 * TX))
#define ldg_width      16

// vim: syntax=cpp.doxygen