提交 be7f2615 编写于 作者: B Ben Skeggs

drm/nvc0/gr: update fuc source to assemble with latest envyas

Signed-off-by: NBen Skeggs <bskeggs@redhat.com>
上级 020c6bf3
...@@ -71,9 +71,9 @@ queue_put: ...@@ -71,9 +71,9 @@ queue_put:
ld b32 $r9 D[$r13 + 0x4] // PUT ld b32 $r9 D[$r13 + 0x4] // PUT
xor $r8 8 xor $r8 8
cmpu b32 $r8 $r9 cmpu b32 $r8 $r9
bra ne queue_put_next bra ne #queue_put_next
mov $r15 E_CMD_OVERFLOW mov $r15 E_CMD_OVERFLOW
call error call #error
ret ret
// store cmd/data on queue // store cmd/data on queue
...@@ -104,7 +104,7 @@ queue_get: ...@@ -104,7 +104,7 @@ queue_get:
ld b32 $r8 D[$r13 + 0x0] // GET ld b32 $r8 D[$r13 + 0x0] // GET
ld b32 $r9 D[$r13 + 0x4] // PUT ld b32 $r9 D[$r13 + 0x4] // PUT
cmpu b32 $r8 $r9 cmpu b32 $r8 $r9
bra e queue_get_done bra e #queue_get_done
// fetch first cmd/data pair // fetch first cmd/data pair
and $r9 $r8 7 and $r9 $r8 7
shl b32 $r9 3 shl b32 $r9 3
...@@ -135,9 +135,9 @@ nv_rd32: ...@@ -135,9 +135,9 @@ nv_rd32:
nv_rd32_wait: nv_rd32_wait:
iord $r12 I[$r11 + 0x000] iord $r12 I[$r11 + 0x000]
xbit $r12 $r12 31 xbit $r12 $r12 31
bra ne nv_rd32_wait bra ne #nv_rd32_wait
mov $r10 6 // DONE_MMIO_RD mov $r10 6 // DONE_MMIO_RD
call wait_doneo call #wait_doneo
iord $r15 I[$r11 + 0x100] // MMIO_RDVAL iord $r15 I[$r11 + 0x100] // MMIO_RDVAL
ret ret
...@@ -157,7 +157,7 @@ nv_wr32: ...@@ -157,7 +157,7 @@ nv_wr32:
nv_wr32_wait: nv_wr32_wait:
iord $r12 I[$r11 + 0x000] iord $r12 I[$r11 + 0x000]
xbit $r12 $r12 31 xbit $r12 $r12 31
bra ne nv_wr32_wait bra ne #nv_wr32_wait
ret ret
// (re)set watchdog timer // (re)set watchdog timer
...@@ -193,7 +193,7 @@ $1: ...@@ -193,7 +193,7 @@ $1:
shl b32 $r8 6 shl b32 $r8 6
iord $r8 I[$r8 + 0x000] // DONE iord $r8 I[$r8 + 0x000] // DONE
xbit $r8 $r8 $r10 xbit $r8 $r8 $r10
bra $2 wait_done_$1 bra $2 #wait_done_$1
trace_clr(T_WAIT) trace_clr(T_WAIT)
ret ret
') ')
...@@ -216,7 +216,7 @@ mmctx_size: ...@@ -216,7 +216,7 @@ mmctx_size:
add b32 $r9 $r8 add b32 $r9 $r8
add b32 $r14 4 add b32 $r14 4
cmpu b32 $r14 $r15 cmpu b32 $r14 $r15
bra ne nv_mmctx_size_loop bra ne #nv_mmctx_size_loop
mov b32 $r15 $r9 mov b32 $r15 $r9
ret ret
...@@ -238,12 +238,12 @@ mmctx_xfer: ...@@ -238,12 +238,12 @@ mmctx_xfer:
shl b32 $r8 6 shl b32 $r8 6
clear b32 $r9 clear b32 $r9
or $r11 $r11 or $r11 $r11
bra e mmctx_base_disabled bra e #mmctx_base_disabled
iowr I[$r8 + 0x000] $r11 // MMCTX_BASE iowr I[$r8 + 0x000] $r11 // MMCTX_BASE
bset $r9 0 // BASE_EN bset $r9 0 // BASE_EN
mmctx_base_disabled: mmctx_base_disabled:
or $r14 $r14 or $r14 $r14
bra e mmctx_multi_disabled bra e #mmctx_multi_disabled
iowr I[$r8 + 0x200] $r14 // MMCTX_MULTI_STRIDE iowr I[$r8 + 0x200] $r14 // MMCTX_MULTI_STRIDE
iowr I[$r8 + 0x300] $r15 // MMCTX_MULTI_MASK iowr I[$r8 + 0x300] $r15 // MMCTX_MULTI_MASK
bset $r9 1 // MULTI_EN bset $r9 1 // MULTI_EN
...@@ -264,7 +264,7 @@ mmctx_xfer: ...@@ -264,7 +264,7 @@ mmctx_xfer:
mmctx_wait_free: mmctx_wait_free:
iord $r14 I[$r8 + 0x000] // MMCTX_CTRL iord $r14 I[$r8 + 0x000] // MMCTX_CTRL
and $r14 0x1f and $r14 0x1f
bra e mmctx_wait_free bra e #mmctx_wait_free
// queue up an entry // queue up an entry
ld b32 $r14 D[$r12] ld b32 $r14 D[$r12]
...@@ -272,19 +272,19 @@ mmctx_xfer: ...@@ -272,19 +272,19 @@ mmctx_xfer:
iowr I[$r8 + 0x300] $r14 iowr I[$r8 + 0x300] $r14
add b32 $r12 4 add b32 $r12 4
cmpu b32 $r12 $r13 cmpu b32 $r12 $r13
bra ne mmctx_exec_loop bra ne #mmctx_exec_loop
xbit $r11 $r10 2 xbit $r11 $r10 2
bra ne mmctx_stop bra ne #mmctx_stop
// wait for queue to empty // wait for queue to empty
mmctx_fini_wait: mmctx_fini_wait:
iord $r11 I[$r8 + 0x000] // MMCTX_CTRL iord $r11 I[$r8 + 0x000] // MMCTX_CTRL
and $r11 0x1f and $r11 0x1f
cmpu b32 $r11 0x10 cmpu b32 $r11 0x10
bra ne mmctx_fini_wait bra ne #mmctx_fini_wait
mov $r10 2 // DONE_MMCTX mov $r10 2 // DONE_MMCTX
call wait_donez call #wait_donez
bra mmctx_done bra #mmctx_done
mmctx_stop: mmctx_stop:
xbit $r11 $r10 0 xbit $r11 $r10 0
shl b32 $r11 16 // DIR shl b32 $r11 16 // DIR
...@@ -295,7 +295,7 @@ mmctx_xfer: ...@@ -295,7 +295,7 @@ mmctx_xfer:
// wait for STOP_TRIGGER to clear // wait for STOP_TRIGGER to clear
iord $r11 I[$r8 + 0x000] // MMCTX_CTRL iord $r11 I[$r8 + 0x000] // MMCTX_CTRL
xbit $r11 $r11 18 xbit $r11 $r11 18
bra ne mmctx_stop_wait bra ne #mmctx_stop_wait
mmctx_done: mmctx_done:
trace_clr(T_MMCTX) trace_clr(T_MMCTX)
ret ret
...@@ -305,7 +305,7 @@ mmctx_xfer: ...@@ -305,7 +305,7 @@ mmctx_xfer:
strand_wait: strand_wait:
push $r10 push $r10
mov $r10 2 mov $r10 2
call wait_donez call #wait_donez
pop $r10 pop $r10
ret ret
...@@ -316,7 +316,7 @@ strand_pre: ...@@ -316,7 +316,7 @@ strand_pre:
sethi $r8 0x20000 sethi $r8 0x20000
mov $r9 0xc mov $r9 0xc
iowr I[$r8] $r9 iowr I[$r8] $r9
call strand_wait call #strand_wait
ret ret
// unknown - call after issuing strand commands // unknown - call after issuing strand commands
...@@ -326,7 +326,7 @@ strand_post: ...@@ -326,7 +326,7 @@ strand_post:
sethi $r8 0x20000 sethi $r8 0x20000
mov $r9 0xd mov $r9 0xd
iowr I[$r8] $r9 iowr I[$r8] $r9
call strand_wait call #strand_wait
ret ret
// Selects strand set?! // Selects strand set?!
...@@ -341,11 +341,11 @@ strand_set: ...@@ -341,11 +341,11 @@ strand_set:
iowr I[$r10 + 0x000] $r12 // 0x93c = 0xf iowr I[$r10 + 0x000] $r12 // 0x93c = 0xf
mov $r12 0xb mov $r12 0xb
iowr I[$r11 + 0x000] $r12 // 0x928 = 0xb iowr I[$r11 + 0x000] $r12 // 0x928 = 0xb
call strand_wait call #strand_wait
iowr I[$r10 + 0x000] $r14 // 0x93c = <id> iowr I[$r10 + 0x000] $r14 // 0x93c = <id>
mov $r12 0xa mov $r12 0xa
iowr I[$r11 + 0x000] $r12 // 0x928 = 0xa iowr I[$r11 + 0x000] $r12 // 0x928 = 0xa
call strand_wait call #strand_wait
ret ret
// Initialise strand context data // Initialise strand context data
...@@ -357,22 +357,22 @@ strand_set: ...@@ -357,22 +357,22 @@ strand_set:
// //
strand_ctx_init: strand_ctx_init:
trace_set(T_STRINIT) trace_set(T_STRINIT)
call strand_pre call #strand_pre
mov $r14 3 mov $r14 3
call strand_set call #strand_set
mov $r10 0x46fc mov $r10 0x46fc
sethi $r10 0x20000 sethi $r10 0x20000
add b32 $r11 $r10 0x400 add b32 $r11 $r10 0x400
iowr I[$r10 + 0x100] $r0 // STRAND_FIRST_GENE = 0 iowr I[$r10 + 0x100] $r0 // STRAND_FIRST_GENE = 0
mov $r12 1 mov $r12 1
iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_FIRST_GENE iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_FIRST_GENE
call strand_wait call #strand_wait
sub b32 $r12 $r0 1 sub b32 $r12 $r0 1
iowr I[$r10 + 0x000] $r12 // STRAND_GENE_CNT = 0xffffffff iowr I[$r10 + 0x000] $r12 // STRAND_GENE_CNT = 0xffffffff
mov $r12 2 mov $r12 2
iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_GENE_CNT iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_GENE_CNT
call strand_wait call #strand_wait
call strand_post call #strand_post
// read the size of each strand, poke the context offset of // read the size of each strand, poke the context offset of
// each into STRAND_{SAVE,LOAD}_SWBASE now, no need to worry // each into STRAND_{SAVE,LOAD}_SWBASE now, no need to worry
...@@ -391,7 +391,7 @@ strand_ctx_init: ...@@ -391,7 +391,7 @@ strand_ctx_init:
add b32 $r14 $r10 add b32 $r14 $r10
add b32 $r8 4 add b32 $r8 4
sub b32 $r9 1 sub b32 $r9 1
bra ne ctx_init_strand_loop bra ne #ctx_init_strand_loop
shl b32 $r14 8 shl b32 $r14 8
sub b32 $r15 $r14 $r15 sub b32 $r15 $r14 $r15
......
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
* - watchdog timer around ctx operations * - watchdog timer around ctx operations
*/ */
.section nvc0_grgpc_data .section #nvc0_grgpc_data
include(`nvc0_graph.fuc') include(`nvc0_graph.fuc')
gpc_id: .b32 0 gpc_id: .b32 0
gpc_mmio_list_head: .b32 0 gpc_mmio_list_head: .b32 0
...@@ -48,40 +48,40 @@ cmd_queue: queue_init ...@@ -48,40 +48,40 @@ cmd_queue: queue_init
// chipset descriptions // chipset descriptions
chipsets: chipsets:
.b8 0xc0 0 0 0 .b8 0xc0 0 0 0
.b16 nvc0_gpc_mmio_head .b16 #nvc0_gpc_mmio_head
.b16 nvc0_gpc_mmio_tail .b16 #nvc0_gpc_mmio_tail
.b16 nvc0_tpc_mmio_head .b16 #nvc0_tpc_mmio_head
.b16 nvc0_tpc_mmio_tail .b16 #nvc0_tpc_mmio_tail
.b8 0xc1 0 0 0 .b8 0xc1 0 0 0
.b16 nvc0_gpc_mmio_head .b16 #nvc0_gpc_mmio_head
.b16 nvc1_gpc_mmio_tail .b16 #nvc1_gpc_mmio_tail
.b16 nvc0_tpc_mmio_head .b16 #nvc0_tpc_mmio_head
.b16 nvc1_tpc_mmio_tail .b16 #nvc1_tpc_mmio_tail
.b8 0xc3 0 0 0 .b8 0xc3 0 0 0
.b16 nvc0_gpc_mmio_head .b16 #nvc0_gpc_mmio_head
.b16 nvc0_gpc_mmio_tail .b16 #nvc0_gpc_mmio_tail
.b16 nvc0_tpc_mmio_head .b16 #nvc0_tpc_mmio_head
.b16 nvc3_tpc_mmio_tail .b16 #nvc3_tpc_mmio_tail
.b8 0xc4 0 0 0 .b8 0xc4 0 0 0
.b16 nvc0_gpc_mmio_head .b16 #nvc0_gpc_mmio_head
.b16 nvc0_gpc_mmio_tail .b16 #nvc0_gpc_mmio_tail
.b16 nvc0_tpc_mmio_head .b16 #nvc0_tpc_mmio_head
.b16 nvc3_tpc_mmio_tail .b16 #nvc3_tpc_mmio_tail
.b8 0xc8 0 0 0 .b8 0xc8 0 0 0
.b16 nvc0_gpc_mmio_head .b16 #nvc0_gpc_mmio_head
.b16 nvc0_gpc_mmio_tail .b16 #nvc0_gpc_mmio_tail
.b16 nvc0_tpc_mmio_head .b16 #nvc0_tpc_mmio_head
.b16 nvc0_tpc_mmio_tail .b16 #nvc0_tpc_mmio_tail
.b8 0xce 0 0 0 .b8 0xce 0 0 0
.b16 nvc0_gpc_mmio_head .b16 #nvc0_gpc_mmio_head
.b16 nvc0_gpc_mmio_tail .b16 #nvc0_gpc_mmio_tail
.b16 nvc0_tpc_mmio_head .b16 #nvc0_tpc_mmio_head
.b16 nvc3_tpc_mmio_tail .b16 #nvc3_tpc_mmio_tail
.b8 0xcf 0 0 0 .b8 0xcf 0 0 0
.b16 nvc0_gpc_mmio_head .b16 #nvc0_gpc_mmio_head
.b16 nvc0_gpc_mmio_tail .b16 #nvc0_gpc_mmio_tail
.b16 nvc0_tpc_mmio_head .b16 #nvc0_tpc_mmio_head
.b16 nvcf_tpc_mmio_tail .b16 #nvcf_tpc_mmio_tail
.b8 0 0 0 0 .b8 0 0 0 0
// GPC mmio lists // GPC mmio lists
...@@ -147,8 +147,8 @@ mmctx_data(0x000544, 1) ...@@ -147,8 +147,8 @@ mmctx_data(0x000544, 1)
nvc1_tpc_mmio_tail: nvc1_tpc_mmio_tail:
.section nvc0_grgpc_code .section #nvc0_grgpc_code
bra init bra #init
define(`include_code') define(`include_code')
include(`nvc0_graph.fuc') include(`nvc0_graph.fuc')
...@@ -160,10 +160,10 @@ error: ...@@ -160,10 +160,10 @@ error:
push $r14 push $r14
mov $r14 -0x67ec // 0x9814 mov $r14 -0x67ec // 0x9814
sethi $r14 0x400000 sethi $r14 0x400000
call nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code call #nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code
add b32 $r14 0x41c add b32 $r14 0x41c
mov $r15 1 mov $r15 1
call nv_wr32 // HUB_CTXCTL_INTR_UP_SET call #nv_wr32 // HUB_CTXCTL_INTR_UP_SET
pop $r14 pop $r14
ret ret
...@@ -190,7 +190,7 @@ init: ...@@ -190,7 +190,7 @@ init:
iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
// setup i0 handler, and route all interrupts to it // setup i0 handler, and route all interrupts to it
mov $r1 ih mov $r1 #ih
mov $iv0 $r1 mov $iv0 $r1
mov $r1 0x400 mov $r1 0x400
iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
...@@ -210,24 +210,24 @@ init: ...@@ -210,24 +210,24 @@ init:
and $r2 0x1f and $r2 0x1f
shl b32 $r3 $r2 shl b32 $r3 $r2
sub b32 $r3 1 sub b32 $r3 1
st b32 D[$r0 + tpc_count] $r2 st b32 D[$r0 + #tpc_count] $r2
st b32 D[$r0 + tpc_mask] $r3 st b32 D[$r0 + #tpc_mask] $r3
add b32 $r1 0x400 add b32 $r1 0x400
iord $r2 I[$r1 + 0x000] // MYINDEX iord $r2 I[$r1 + 0x000] // MYINDEX
st b32 D[$r0 + gpc_id] $r2 st b32 D[$r0 + #gpc_id] $r2
// find context data for this chipset // find context data for this chipset
mov $r2 0x800 mov $r2 0x800
shl b32 $r2 6 shl b32 $r2 6
iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
mov $r1 chipsets - 12 mov $r1 #chipsets - 12
init_find_chipset: init_find_chipset:
add b32 $r1 12 add b32 $r1 12
ld b32 $r3 D[$r1 + 0x00] ld b32 $r3 D[$r1 + 0x00]
cmpu b32 $r3 $r2 cmpu b32 $r3 $r2
bra e init_context bra e #init_context
cmpu b32 $r3 0 cmpu b32 $r3 0
bra ne init_find_chipset bra ne #init_find_chipset
// unknown chipset // unknown chipset
ret ret
...@@ -253,19 +253,19 @@ init: ...@@ -253,19 +253,19 @@ init:
clear b32 $r15 clear b32 $r15
ld b16 $r14 D[$r1 + 4] ld b16 $r14 D[$r1 + 4]
ld b16 $r15 D[$r1 + 6] ld b16 $r15 D[$r1 + 6]
st b16 D[$r0 + gpc_mmio_list_head] $r14 st b16 D[$r0 + #gpc_mmio_list_head] $r14
st b16 D[$r0 + gpc_mmio_list_tail] $r15 st b16 D[$r0 + #gpc_mmio_list_tail] $r15
call mmctx_size call #mmctx_size
add b32 $r2 $r15 add b32 $r2 $r15
add b32 $r3 $r15 add b32 $r3 $r15
// calculate per-TPC mmio context size, store the list pointers // calculate per-TPC mmio context size, store the list pointers
ld b16 $r14 D[$r1 + 8] ld b16 $r14 D[$r1 + 8]
ld b16 $r15 D[$r1 + 10] ld b16 $r15 D[$r1 + 10]
st b16 D[$r0 + tpc_mmio_list_head] $r14 st b16 D[$r0 + #tpc_mmio_list_head] $r14
st b16 D[$r0 + tpc_mmio_list_tail] $r15 st b16 D[$r0 + #tpc_mmio_list_tail] $r15
call mmctx_size call #mmctx_size
ld b32 $r14 D[$r0 + tpc_count] ld b32 $r14 D[$r0 + #tpc_count]
mulu $r14 $r15 mulu $r14 $r15
add b32 $r2 $r14 add b32 $r2 $r14
add b32 $r3 $r14 add b32 $r3 $r14
...@@ -283,7 +283,7 @@ init: ...@@ -283,7 +283,7 @@ init:
// calculate size of strand context data // calculate size of strand context data
mov b32 $r15 $r2 mov b32 $r15 $r2
call strand_ctx_init call #strand_ctx_init
add b32 $r3 $r15 add b32 $r3 $r15
// save context size, and tell HUB we're done // save context size, and tell HUB we're done
...@@ -301,13 +301,13 @@ init: ...@@ -301,13 +301,13 @@ init:
main: main:
bset $flags $p0 bset $flags $p0
sleep $p0 sleep $p0
mov $r13 cmd_queue mov $r13 #cmd_queue
call queue_get call #queue_get
bra $p1 main bra $p1 #main
// 0x0000-0x0003 are all context transfers // 0x0000-0x0003 are all context transfers
cmpu b32 $r14 0x04 cmpu b32 $r14 0x04
bra nc main_not_ctx_xfer bra nc #main_not_ctx_xfer
// fetch $flags and mask off $p1/$p2 // fetch $flags and mask off $p1/$p2
mov $r1 $flags mov $r1 $flags
mov $r2 0x0006 mov $r2 0x0006
...@@ -318,14 +318,14 @@ main: ...@@ -318,14 +318,14 @@ main:
or $r1 $r14 or $r1 $r14
mov $flags $r1 mov $flags $r1
// transfer context data // transfer context data
call ctx_xfer call #ctx_xfer
bra main bra #main
main_not_ctx_xfer: main_not_ctx_xfer:
shl b32 $r15 $r14 16 shl b32 $r15 $r14 16
or $r15 E_BAD_COMMAND or $r15 E_BAD_COMMAND
call error call #error
bra main bra #main
// interrupt handler // interrupt handler
ih: ih:
...@@ -342,13 +342,13 @@ ih: ...@@ -342,13 +342,13 @@ ih:
// incoming fifo command? // incoming fifo command?
iord $r10 I[$r0 + 0x200] // INTR iord $r10 I[$r0 + 0x200] // INTR
and $r11 $r10 0x00000004 and $r11 $r10 0x00000004
bra e ih_no_fifo bra e #ih_no_fifo
// queue incoming fifo command for later processing // queue incoming fifo command for later processing
mov $r11 0x1900 mov $r11 0x1900
mov $r13 cmd_queue mov $r13 #cmd_queue
iord $r14 I[$r11 + 0x100] // FIFO_CMD iord $r14 I[$r11 + 0x100] // FIFO_CMD
iord $r15 I[$r11 + 0x000] // FIFO_DATA iord $r15 I[$r11 + 0x000] // FIFO_DATA
call queue_put call #queue_put
add b32 $r11 0x400 add b32 $r11 0x400
mov $r14 1 mov $r14 1
iowr I[$r11 + 0x000] $r14 // FIFO_ACK iowr I[$r11 + 0x000] $r14 // FIFO_ACK
...@@ -374,11 +374,11 @@ ih: ...@@ -374,11 +374,11 @@ ih:
// //
hub_barrier_done: hub_barrier_done:
mov $r15 1 mov $r15 1
ld b32 $r14 D[$r0 + gpc_id] ld b32 $r14 D[$r0 + #gpc_id]
shl b32 $r15 $r14 shl b32 $r15 $r14
mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET
sethi $r14 0x400000 sethi $r14 0x400000
call nv_wr32 call #nv_wr32
ret ret
// Disables various things, waits a bit, and re-enables them.. // Disables various things, waits a bit, and re-enables them..
...@@ -395,7 +395,7 @@ ctx_redswitch: ...@@ -395,7 +395,7 @@ ctx_redswitch:
mov $r15 8 mov $r15 8
ctx_redswitch_delay: ctx_redswitch_delay:
sub b32 $r15 1 sub b32 $r15 1
bra ne ctx_redswitch_delay bra ne #ctx_redswitch_delay
mov $r15 0xa20 mov $r15 0xa20
iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER
ret ret
...@@ -413,8 +413,8 @@ ctx_xfer: ...@@ -413,8 +413,8 @@ ctx_xfer:
mov $r1 0xa04 mov $r1 0xa04
shl b32 $r1 6 shl b32 $r1 6
iowr I[$r1 + 0x000] $r15// MEM_BASE iowr I[$r1 + 0x000] $r15// MEM_BASE
bra not $p1 ctx_xfer_not_load bra not $p1 #ctx_xfer_not_load
call ctx_redswitch call #ctx_redswitch
ctx_xfer_not_load: ctx_xfer_not_load:
// strands // strands
...@@ -422,7 +422,7 @@ ctx_xfer: ...@@ -422,7 +422,7 @@ ctx_xfer:
sethi $r1 0x20000 sethi $r1 0x20000
mov $r2 0xc mov $r2 0xc
iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
call strand_wait call #strand_wait
mov $r2 0x47fc mov $r2 0x47fc
sethi $r2 0x20000 sethi $r2 0x20000
iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
...@@ -435,46 +435,46 @@ ctx_xfer: ...@@ -435,46 +435,46 @@ ctx_xfer:
or $r10 2 // first or $r10 2 // first
mov $r11 0x0000 mov $r11 0x0000
sethi $r11 0x500000 sethi $r11 0x500000
ld b32 $r12 D[$r0 + gpc_id] ld b32 $r12 D[$r0 + #gpc_id]
shl b32 $r12 15 shl b32 $r12 15
add b32 $r11 $r12 // base = NV_PGRAPH_GPCn add b32 $r11 $r12 // base = NV_PGRAPH_GPCn
ld b32 $r12 D[$r0 + gpc_mmio_list_head] ld b32 $r12 D[$r0 + #gpc_mmio_list_head]
ld b32 $r13 D[$r0 + gpc_mmio_list_tail] ld b32 $r13 D[$r0 + #gpc_mmio_list_tail]
mov $r14 0 // not multi mov $r14 0 // not multi
call mmctx_xfer call #mmctx_xfer
// per-TPC mmio context // per-TPC mmio context
xbit $r10 $flags $p1 // direction xbit $r10 $flags $p1 // direction
or $r10 4 // last or $r10 4 // last
mov $r11 0x4000 mov $r11 0x4000
sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0 sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0
ld b32 $r12 D[$r0 + gpc_id] ld b32 $r12 D[$r0 + #gpc_id]
shl b32 $r12 15 shl b32 $r12 15
add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0 add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0
ld b32 $r12 D[$r0 + tpc_mmio_list_head] ld b32 $r12 D[$r0 + #tpc_mmio_list_head]
ld b32 $r13 D[$r0 + tpc_mmio_list_tail] ld b32 $r13 D[$r0 + #tpc_mmio_list_tail]
ld b32 $r15 D[$r0 + tpc_mask] ld b32 $r15 D[$r0 + #tpc_mask]
mov $r14 0x800 // stride = 0x800 mov $r14 0x800 // stride = 0x800
call mmctx_xfer call #mmctx_xfer
// wait for strands to finish // wait for strands to finish
call strand_wait call #strand_wait
// if load, or a save without a load following, do some // if load, or a save without a load following, do some
// unknown stuff that's done after finishing a block of // unknown stuff that's done after finishing a block of
// strand commands // strand commands
bra $p1 ctx_xfer_post bra $p1 #ctx_xfer_post
bra not $p2 ctx_xfer_done bra not $p2 #ctx_xfer_done
ctx_xfer_post: ctx_xfer_post:
mov $r1 0x4afc mov $r1 0x4afc
sethi $r1 0x20000 sethi $r1 0x20000
mov $r2 0xd mov $r2 0xd
iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d
call strand_wait call #strand_wait
// mark completion in HUB's barrier // mark completion in HUB's barrier
ctx_xfer_done: ctx_xfer_done:
call hub_barrier_done call #hub_barrier_done
ret ret
.align 256 .align 256
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
* m4 nvc0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_grhub.fuc.h * m4 nvc0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_grhub.fuc.h
*/ */
.section nvc0_grhub_data .section #nvc0_grhub_data
include(`nvc0_graph.fuc') include(`nvc0_graph.fuc')
gpc_count: .b32 0 gpc_count: .b32 0
rop_count: .b32 0 rop_count: .b32 0
...@@ -39,26 +39,26 @@ ctx_current: .b32 0 ...@@ -39,26 +39,26 @@ ctx_current: .b32 0
chipsets: chipsets:
.b8 0xc0 0 0 0 .b8 0xc0 0 0 0
.b16 nvc0_hub_mmio_head .b16 #nvc0_hub_mmio_head
.b16 nvc0_hub_mmio_tail .b16 #nvc0_hub_mmio_tail
.b8 0xc1 0 0 0 .b8 0xc1 0 0 0
.b16 nvc0_hub_mmio_head .b16 #nvc0_hub_mmio_head
.b16 nvc1_hub_mmio_tail .b16 #nvc1_hub_mmio_tail
.b8 0xc3 0 0 0 .b8 0xc3 0 0 0
.b16 nvc0_hub_mmio_head .b16 #nvc0_hub_mmio_head
.b16 nvc0_hub_mmio_tail .b16 #nvc0_hub_mmio_tail
.b8 0xc4 0 0 0 .b8 0xc4 0 0 0
.b16 nvc0_hub_mmio_head .b16 #nvc0_hub_mmio_head
.b16 nvc0_hub_mmio_tail .b16 #nvc0_hub_mmio_tail
.b8 0xc8 0 0 0 .b8 0xc8 0 0 0
.b16 nvc0_hub_mmio_head .b16 #nvc0_hub_mmio_head
.b16 nvc0_hub_mmio_tail .b16 #nvc0_hub_mmio_tail
.b8 0xce 0 0 0 .b8 0xce 0 0 0
.b16 nvc0_hub_mmio_head .b16 #nvc0_hub_mmio_head
.b16 nvc0_hub_mmio_tail .b16 #nvc0_hub_mmio_tail
.b8 0xcf 0 0 0 .b8 0xcf 0 0 0
.b16 nvc0_hub_mmio_head .b16 #nvc0_hub_mmio_head
.b16 nvc0_hub_mmio_tail .b16 #nvc0_hub_mmio_tail
.b8 0 0 0 0 .b8 0 0 0 0
nvc0_hub_mmio_head: nvc0_hub_mmio_head:
...@@ -113,8 +113,8 @@ chan_mmio_address: .b32 0 ...@@ -113,8 +113,8 @@ chan_mmio_address: .b32 0
.align 256 .align 256
xfer_data: .b32 0 xfer_data: .b32 0
.section nvc0_grhub_code .section #nvc0_grhub_code
bra init bra #init
define(`include_code') define(`include_code')
include(`nvc0_graph.fuc') include(`nvc0_graph.fuc')
...@@ -157,7 +157,7 @@ init: ...@@ -157,7 +157,7 @@ init:
iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
// setup i0 handler, and route all interrupts to it // setup i0 handler, and route all interrupts to it
mov $r1 ih mov $r1 #ih
mov $iv0 $r1 mov $iv0 $r1
mov $r1 0x400 mov $r1 0x400
iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
...@@ -201,11 +201,11 @@ init: ...@@ -201,11 +201,11 @@ init:
// fetch enabled GPC/ROP counts // fetch enabled GPC/ROP counts
mov $r14 -0x69fc // 0x409604 mov $r14 -0x69fc // 0x409604
sethi $r14 0x400000 sethi $r14 0x400000
call nv_rd32 call #nv_rd32
extr $r1 $r15 16:20 extr $r1 $r15 16:20
st b32 D[$r0 + rop_count] $r1 st b32 D[$r0 + #rop_count] $r1
and $r15 0x1f and $r15 0x1f
st b32 D[$r0 + gpc_count] $r15 st b32 D[$r0 + #gpc_count] $r15
// set BAR_REQMASK to GPC mask // set BAR_REQMASK to GPC mask
mov $r1 1 mov $r1 1
...@@ -220,14 +220,14 @@ init: ...@@ -220,14 +220,14 @@ init:
mov $r2 0x800 mov $r2 0x800
shl b32 $r2 6 shl b32 $r2 6
iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
mov $r15 chipsets - 8 mov $r15 #chipsets - 8
init_find_chipset: init_find_chipset:
add b32 $r15 8 add b32 $r15 8
ld b32 $r3 D[$r15 + 0x00] ld b32 $r3 D[$r15 + 0x00]
cmpu b32 $r3 $r2 cmpu b32 $r3 $r2
bra e init_context bra e #init_context
cmpu b32 $r3 0 cmpu b32 $r3 0
bra ne init_find_chipset bra ne #init_find_chipset
// unknown chipset // unknown chipset
ret ret
...@@ -239,9 +239,9 @@ init: ...@@ -239,9 +239,9 @@ init:
ld b16 $r14 D[$r15 + 4] ld b16 $r14 D[$r15 + 4]
ld b16 $r15 D[$r15 + 6] ld b16 $r15 D[$r15 + 6]
sethi $r14 0 sethi $r14 0
st b32 D[$r0 + hub_mmio_list_head] $r14 st b32 D[$r0 + #hub_mmio_list_head] $r14
st b32 D[$r0 + hub_mmio_list_tail] $r15 st b32 D[$r0 + #hub_mmio_list_tail] $r15
call mmctx_size call #mmctx_size
// set mmctx base addresses now so we don't have to do it later, // set mmctx base addresses now so we don't have to do it later,
// they don't (currently) ever change // they don't (currently) ever change
...@@ -260,7 +260,7 @@ init: ...@@ -260,7 +260,7 @@ init:
add b32 $r1 1 add b32 $r1 1
shl b32 $r1 8 shl b32 $r1 8
mov b32 $r15 $r1 mov b32 $r15 $r1
call strand_ctx_init call #strand_ctx_init
add b32 $r1 $r15 add b32 $r1 $r15
// initialise each GPC in sequence by passing in the offset of its // initialise each GPC in sequence by passing in the offset of its
...@@ -271,40 +271,40 @@ init: ...@@ -271,40 +271,40 @@ init:
// when it has completed, and return the size of its context data // when it has completed, and return the size of its context data
// in GPCn_CC_SCRATCH[1] // in GPCn_CC_SCRATCH[1]
// //
ld b32 $r3 D[$r0 + gpc_count] ld b32 $r3 D[$r0 + #gpc_count]
mov $r4 0x2000 mov $r4 0x2000
sethi $r4 0x500000 sethi $r4 0x500000
init_gpc: init_gpc:
// setup, and start GPC ucode running // setup, and start GPC ucode running
add b32 $r14 $r4 0x804 add b32 $r14 $r4 0x804
mov b32 $r15 $r1 mov b32 $r15 $r1
call nv_wr32 // CC_SCRATCH[1] = ctx offset call #nv_wr32 // CC_SCRATCH[1] = ctx offset
add b32 $r14 $r4 0x800 add b32 $r14 $r4 0x800
mov b32 $r15 $r2 mov b32 $r15 $r2
call nv_wr32 // CC_SCRATCH[0] = chipset call #nv_wr32 // CC_SCRATCH[0] = chipset
add b32 $r14 $r4 0x10c add b32 $r14 $r4 0x10c
clear b32 $r15 clear b32 $r15
call nv_wr32 call #nv_wr32
add b32 $r14 $r4 0x104 add b32 $r14 $r4 0x104
call nv_wr32 // ENTRY call #nv_wr32 // ENTRY
add b32 $r14 $r4 0x100 add b32 $r14 $r4 0x100
mov $r15 2 // CTRL_START_TRIGGER mov $r15 2 // CTRL_START_TRIGGER
call nv_wr32 // CTRL call #nv_wr32 // CTRL
// wait for it to complete, and adjust context size // wait for it to complete, and adjust context size
add b32 $r14 $r4 0x800 add b32 $r14 $r4 0x800
init_gpc_wait: init_gpc_wait:
call nv_rd32 call #nv_rd32
xbit $r15 $r15 31 xbit $r15 $r15 31
bra e init_gpc_wait bra e #init_gpc_wait
add b32 $r14 $r4 0x804 add b32 $r14 $r4 0x804
call nv_rd32 call #nv_rd32
add b32 $r1 $r15 add b32 $r1 $r15
// next! // next!
add b32 $r4 0x8000 add b32 $r4 0x8000
sub b32 $r3 1 sub b32 $r3 1
bra ne init_gpc bra ne #init_gpc
// save context size, and tell host we're ready // save context size, and tell host we're ready
mov $r2 0x800 mov $r2 0x800
...@@ -322,13 +322,13 @@ main: ...@@ -322,13 +322,13 @@ main:
// sleep until we have something to do // sleep until we have something to do
bset $flags $p0 bset $flags $p0
sleep $p0 sleep $p0
mov $r13 cmd_queue mov $r13 #cmd_queue
call queue_get call #queue_get
bra $p1 main bra $p1 #main
// context switch, requested by GPU? // context switch, requested by GPU?
cmpu b32 $r14 0x4001 cmpu b32 $r14 0x4001
bra ne main_not_ctx_switch bra ne #main_not_ctx_switch
trace_set(T_AUTO) trace_set(T_AUTO)
mov $r1 0xb00 mov $r1 0xb00
shl b32 $r1 6 shl b32 $r1 6
...@@ -336,39 +336,39 @@ main: ...@@ -336,39 +336,39 @@ main:
iord $r1 I[$r1 + 0x000] // CHAN_CUR iord $r1 I[$r1 + 0x000] // CHAN_CUR
xbit $r3 $r1 31 xbit $r3 $r1 31
bra e chsw_no_prev bra e #chsw_no_prev
xbit $r3 $r2 31 xbit $r3 $r2 31
bra e chsw_prev_no_next bra e #chsw_prev_no_next
push $r2 push $r2
mov b32 $r2 $r1 mov b32 $r2 $r1
trace_set(T_SAVE) trace_set(T_SAVE)
bclr $flags $p1 bclr $flags $p1
bset $flags $p2 bset $flags $p2
call ctx_xfer call #ctx_xfer
trace_clr(T_SAVE); trace_clr(T_SAVE);
pop $r2 pop $r2
trace_set(T_LOAD); trace_set(T_LOAD);
bset $flags $p1 bset $flags $p1
call ctx_xfer call #ctx_xfer
trace_clr(T_LOAD); trace_clr(T_LOAD);
bra chsw_done bra #chsw_done
chsw_prev_no_next: chsw_prev_no_next:
push $r2 push $r2
mov b32 $r2 $r1 mov b32 $r2 $r1
bclr $flags $p1 bclr $flags $p1
bclr $flags $p2 bclr $flags $p2
call ctx_xfer call #ctx_xfer
pop $r2 pop $r2
mov $r1 0xb00 mov $r1 0xb00
shl b32 $r1 6 shl b32 $r1 6
iowr I[$r1] $r2 iowr I[$r1] $r2
bra chsw_done bra #chsw_done
chsw_no_prev: chsw_no_prev:
xbit $r3 $r2 31 xbit $r3 $r2 31
bra e chsw_done bra e #chsw_done
bset $flags $p1 bset $flags $p1
bclr $flags $p2 bclr $flags $p2
call ctx_xfer call #ctx_xfer
// ack the context switch request // ack the context switch request
chsw_done: chsw_done:
...@@ -377,32 +377,32 @@ main: ...@@ -377,32 +377,32 @@ main:
mov $r2 1 mov $r2 1
iowr I[$r1 + 0x000] $r2 // 0x409b0c iowr I[$r1 + 0x000] $r2 // 0x409b0c
trace_clr(T_AUTO) trace_clr(T_AUTO)
bra main bra #main
// request to set current channel? (*not* a context switch) // request to set current channel? (*not* a context switch)
main_not_ctx_switch: main_not_ctx_switch:
cmpu b32 $r14 0x0001 cmpu b32 $r14 0x0001
bra ne main_not_ctx_chan bra ne #main_not_ctx_chan
mov b32 $r2 $r15 mov b32 $r2 $r15
call ctx_chan call #ctx_chan
bra main_done bra #main_done
// request to store current channel context? // request to store current channel context?
main_not_ctx_chan: main_not_ctx_chan:
cmpu b32 $r14 0x0002 cmpu b32 $r14 0x0002
bra ne main_not_ctx_save bra ne #main_not_ctx_save
trace_set(T_SAVE) trace_set(T_SAVE)
bclr $flags $p1 bclr $flags $p1
bclr $flags $p2 bclr $flags $p2
call ctx_xfer call #ctx_xfer
trace_clr(T_SAVE) trace_clr(T_SAVE)
bra main_done bra #main_done
main_not_ctx_save: main_not_ctx_save:
shl b32 $r15 $r14 16 shl b32 $r15 $r14 16
or $r15 E_BAD_COMMAND or $r15 E_BAD_COMMAND
call error call #error
bra main bra #main
main_done: main_done:
mov $r1 0x820 mov $r1 0x820
...@@ -410,7 +410,7 @@ main: ...@@ -410,7 +410,7 @@ main:
clear b32 $r2 clear b32 $r2
bset $r2 31 bset $r2 31
iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000 iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
bra main bra #main
// interrupt handler // interrupt handler
ih: ih:
...@@ -427,13 +427,13 @@ ih: ...@@ -427,13 +427,13 @@ ih:
// incoming fifo command? // incoming fifo command?
iord $r10 I[$r0 + 0x200] // INTR iord $r10 I[$r0 + 0x200] // INTR
and $r11 $r10 0x00000004 and $r11 $r10 0x00000004
bra e ih_no_fifo bra e #ih_no_fifo
// queue incoming fifo command for later processing // queue incoming fifo command for later processing
mov $r11 0x1900 mov $r11 0x1900
mov $r13 cmd_queue mov $r13 #cmd_queue
iord $r14 I[$r11 + 0x100] // FIFO_CMD iord $r14 I[$r11 + 0x100] // FIFO_CMD
iord $r15 I[$r11 + 0x000] // FIFO_DATA iord $r15 I[$r11 + 0x000] // FIFO_DATA
call queue_put call #queue_put
add b32 $r11 0x400 add b32 $r11 0x400
mov $r14 1 mov $r14 1
iowr I[$r11 + 0x000] $r14 // FIFO_ACK iowr I[$r11 + 0x000] $r14 // FIFO_ACK
...@@ -441,18 +441,18 @@ ih: ...@@ -441,18 +441,18 @@ ih:
// context switch request? // context switch request?
ih_no_fifo: ih_no_fifo:
and $r11 $r10 0x00000100 and $r11 $r10 0x00000100
bra e ih_no_ctxsw bra e #ih_no_ctxsw
// enqueue a context switch for later processing // enqueue a context switch for later processing
mov $r13 cmd_queue mov $r13 #cmd_queue
mov $r14 0x4001 mov $r14 0x4001
call queue_put call #queue_put
// anything we didn't handle, bring it to the host's attention // anything we didn't handle, bring it to the host's attention
ih_no_ctxsw: ih_no_ctxsw:
mov $r11 0x104 mov $r11 0x104
not b32 $r11 not b32 $r11
and $r11 $r10 $r11 and $r11 $r10 $r11
bra e ih_no_other bra e #ih_no_other
mov $r10 0xc1c mov $r10 0xc1c
shl b32 $r10 6 shl b32 $r10 6
iowr I[$r10] $r11 // INTR_UP_SET iowr I[$r10] $r11 // INTR_UP_SET
...@@ -478,11 +478,11 @@ ctx_4160s: ...@@ -478,11 +478,11 @@ ctx_4160s:
mov $r14 0x4160 mov $r14 0x4160
sethi $r14 0x400000 sethi $r14 0x400000
mov $r15 1 mov $r15 1
call nv_wr32 call #nv_wr32
ctx_4160s_wait: ctx_4160s_wait:
call nv_rd32 call #nv_rd32
xbit $r15 $r15 4 xbit $r15 $r15 4
bra e ctx_4160s_wait bra e #ctx_4160s_wait
ret ret
// Without clearing again at end of xfer, some things cause PGRAPH // Without clearing again at end of xfer, some things cause PGRAPH
...@@ -492,7 +492,7 @@ ctx_4160c: ...@@ -492,7 +492,7 @@ ctx_4160c:
mov $r14 0x4160 mov $r14 0x4160
sethi $r14 0x400000 sethi $r14 0x400000
clear b32 $r15 clear b32 $r15
call nv_wr32 call #nv_wr32
ret ret
// Again, not real sure // Again, not real sure
...@@ -503,7 +503,7 @@ ctx_4170s: ...@@ -503,7 +503,7 @@ ctx_4170s:
mov $r14 0x4170 mov $r14 0x4170
sethi $r14 0x400000 sethi $r14 0x400000
or $r15 0x10 or $r15 0x10
call nv_wr32 call #nv_wr32
ret ret
// Waits for a ctx_4170s() call to complete // Waits for a ctx_4170s() call to complete
...@@ -511,9 +511,9 @@ ctx_4170s: ...@@ -511,9 +511,9 @@ ctx_4170s:
ctx_4170w: ctx_4170w:
mov $r14 0x4170 mov $r14 0x4170
sethi $r14 0x400000 sethi $r14 0x400000
call nv_rd32 call #nv_rd32
and $r15 0x10 and $r15 0x10
bra ne ctx_4170w bra ne #ctx_4170w
ret ret
// Disables various things, waits a bit, and re-enables them.. // Disables various things, waits a bit, and re-enables them..
...@@ -530,7 +530,7 @@ ctx_redswitch: ...@@ -530,7 +530,7 @@ ctx_redswitch:
mov $r15 8 mov $r15 8
ctx_redswitch_delay: ctx_redswitch_delay:
sub b32 $r15 1 sub b32 $r15 1
bra ne ctx_redswitch_delay bra ne #ctx_redswitch_delay
mov $r15 0x770 mov $r15 0x770
iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL
ret ret
...@@ -546,10 +546,10 @@ ctx_86c: ...@@ -546,10 +546,10 @@ ctx_86c:
iowr I[$r14] $r15 // HUB(0x86c) = val iowr I[$r14] $r15 // HUB(0x86c) = val
mov $r14 -0x75ec mov $r14 -0x75ec
sethi $r14 0x400000 sethi $r14 0x400000
call nv_wr32 // ROP(0xa14) = val call #nv_wr32 // ROP(0xa14) = val
mov $r14 -0x5794 mov $r14 -0x5794
sethi $r14 0x410000 sethi $r14 0x410000
call nv_wr32 // GPC(0x86c) = val call #nv_wr32 // GPC(0x86c) = val
ret ret
// ctx_load - load's a channel's ctxctl data, and selects its vm // ctx_load - load's a channel's ctxctl data, and selects its vm
...@@ -561,7 +561,7 @@ ctx_load: ...@@ -561,7 +561,7 @@ ctx_load:
// switch to channel, somewhat magic in parts.. // switch to channel, somewhat magic in parts..
mov $r10 12 // DONE_UNK12 mov $r10 12 // DONE_UNK12
call wait_donez call #wait_donez
mov $r1 0xa24 mov $r1 0xa24
shl b32 $r1 6 shl b32 $r1 6
iowr I[$r1 + 0x000] $r0 // 0x409a24 iowr I[$r1 + 0x000] $r0 // 0x409a24
...@@ -576,7 +576,7 @@ ctx_load: ...@@ -576,7 +576,7 @@ ctx_load:
ctx_chan_wait_0: ctx_chan_wait_0:
iord $r4 I[$r1 + 0x100] iord $r4 I[$r1 + 0x100]
and $r4 0x1f and $r4 0x1f
bra ne ctx_chan_wait_0 bra ne #ctx_chan_wait_0
iowr I[$r3 + 0x000] $r2 // CHAN_CUR iowr I[$r3 + 0x000] $r2 // CHAN_CUR
// load channel header, fetch PGRAPH context pointer // load channel header, fetch PGRAPH context pointer
...@@ -595,19 +595,19 @@ ctx_load: ...@@ -595,19 +595,19 @@ ctx_load:
sethi $r2 0x80000000 sethi $r2 0x80000000
iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram
mov $r1 0x10 // chan + 0x0210 mov $r1 0x10 // chan + 0x0210
mov $r2 xfer_data mov $r2 #xfer_data
sethi $r2 0x00020000 // 16 bytes sethi $r2 0x00020000 // 16 bytes
xdld $r1 $r2 xdld $r1 $r2
xdwait xdwait
trace_clr(T_LCHAN) trace_clr(T_LCHAN)
// update current context // update current context
ld b32 $r1 D[$r0 + xfer_data + 4] ld b32 $r1 D[$r0 + #xfer_data + 4]
shl b32 $r1 24 shl b32 $r1 24
ld b32 $r2 D[$r0 + xfer_data + 0] ld b32 $r2 D[$r0 + #xfer_data + 0]
shr b32 $r2 8 shr b32 $r2 8
or $r1 $r2 or $r1 $r2
st b32 D[$r0 + ctx_current] $r1 st b32 D[$r0 + #ctx_current] $r1
// set transfer base to start of context, and fetch context header // set transfer base to start of context, and fetch context header
trace_set(T_LCTXH) trace_set(T_LCTXH)
...@@ -618,7 +618,7 @@ ctx_load: ...@@ -618,7 +618,7 @@ ctx_load:
mov $r1 0xa20 mov $r1 0xa20
shl b32 $r1 6 shl b32 $r1 6
iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm
mov $r1 chan_data mov $r1 #chan_data
sethi $r1 0x00060000 // 256 bytes sethi $r1 0x00060000 // 256 bytes
xdld $r0 $r1 xdld $r0 $r1
xdwait xdwait
...@@ -635,10 +635,10 @@ ctx_load: ...@@ -635,10 +635,10 @@ ctx_load:
// In: $r2 channel address // In: $r2 channel address
// //
ctx_chan: ctx_chan:
call ctx_4160s call #ctx_4160s
call ctx_load call #ctx_load
mov $r10 12 // DONE_UNK12 mov $r10 12 // DONE_UNK12
call wait_donez call #wait_donez
mov $r1 0xa10 mov $r1 0xa10
shl b32 $r1 6 shl b32 $r1 6
mov $r2 5 mov $r2 5
...@@ -646,8 +646,8 @@ ctx_chan: ...@@ -646,8 +646,8 @@ ctx_chan:
ctx_chan_wait: ctx_chan_wait:
iord $r2 I[$r1 + 0x000] iord $r2 I[$r1 + 0x000]
or $r2 $r2 or $r2 $r2
bra ne ctx_chan_wait bra ne #ctx_chan_wait
call ctx_4160c call #ctx_4160c
ret ret
// Execute per-context state overrides list // Execute per-context state overrides list
...@@ -661,7 +661,7 @@ ctx_chan: ...@@ -661,7 +661,7 @@ ctx_chan:
// //
ctx_mmio_exec: ctx_mmio_exec:
// set transfer base to be the mmio list // set transfer base to be the mmio list
ld b32 $r3 D[$r0 + chan_mmio_address] ld b32 $r3 D[$r0 + #chan_mmio_address]
mov $r2 0xa04 mov $r2 0xa04
shl b32 $r2 6 shl b32 $r2 6
iowr I[$r2 + 0x000] $r3 // MEM_BASE iowr I[$r2 + 0x000] $r3 // MEM_BASE
...@@ -670,31 +670,31 @@ ctx_mmio_exec: ...@@ -670,31 +670,31 @@ ctx_mmio_exec:
ctx_mmio_loop: ctx_mmio_loop:
// fetch next 256 bytes of mmio list if necessary // fetch next 256 bytes of mmio list if necessary
and $r4 $r3 0xff and $r4 $r3 0xff
bra ne ctx_mmio_pull bra ne #ctx_mmio_pull
mov $r5 xfer_data mov $r5 #xfer_data
sethi $r5 0x00060000 // 256 bytes sethi $r5 0x00060000 // 256 bytes
xdld $r3 $r5 xdld $r3 $r5
xdwait xdwait
// execute a single list entry // execute a single list entry
ctx_mmio_pull: ctx_mmio_pull:
ld b32 $r14 D[$r4 + xfer_data + 0x00] ld b32 $r14 D[$r4 + #xfer_data + 0x00]
ld b32 $r15 D[$r4 + xfer_data + 0x04] ld b32 $r15 D[$r4 + #xfer_data + 0x04]
call nv_wr32 call #nv_wr32
// next! // next!
add b32 $r3 8 add b32 $r3 8
sub b32 $r1 1 sub b32 $r1 1
bra ne ctx_mmio_loop bra ne #ctx_mmio_loop
// set transfer base back to the current context // set transfer base back to the current context
ctx_mmio_done: ctx_mmio_done:
ld b32 $r3 D[$r0 + ctx_current] ld b32 $r3 D[$r0 + #ctx_current]
iowr I[$r2 + 0x000] $r3 // MEM_BASE iowr I[$r2 + 0x000] $r3 // MEM_BASE
// disable the mmio list now, we don't need/want to execute it again // disable the mmio list now, we don't need/want to execute it again
st b32 D[$r0 + chan_mmio_count] $r0 st b32 D[$r0 + #chan_mmio_count] $r0
mov $r1 chan_data mov $r1 #chan_data
sethi $r1 0x00060000 // 256 bytes sethi $r1 0x00060000 // 256 bytes
xdst $r0 $r1 xdst $r0 $r1
xdwait xdwait
...@@ -709,46 +709,46 @@ ctx_mmio_exec: ...@@ -709,46 +709,46 @@ ctx_mmio_exec:
// on load it means: "a save preceeded this load" // on load it means: "a save preceeded this load"
// //
ctx_xfer: ctx_xfer:
bra not $p1 ctx_xfer_pre bra not $p1 #ctx_xfer_pre
bra $p2 ctx_xfer_pre_load bra $p2 #ctx_xfer_pre_load
ctx_xfer_pre: ctx_xfer_pre:
mov $r15 0x10 mov $r15 0x10
call ctx_86c call #ctx_86c
call ctx_4160s call #ctx_4160s
bra not $p1 ctx_xfer_exec bra not $p1 #ctx_xfer_exec
ctx_xfer_pre_load: ctx_xfer_pre_load:
mov $r15 2 mov $r15 2
call ctx_4170s call #ctx_4170s
call ctx_4170w call #ctx_4170w
call ctx_redswitch call #ctx_redswitch
clear b32 $r15 clear b32 $r15
call ctx_4170s call #ctx_4170s
call ctx_load call #ctx_load
// fetch context pointer, and initiate xfer on all GPCs // fetch context pointer, and initiate xfer on all GPCs
ctx_xfer_exec: ctx_xfer_exec:
ld b32 $r1 D[$r0 + ctx_current] ld b32 $r1 D[$r0 + #ctx_current]
mov $r2 0x414 mov $r2 0x414
shl b32 $r2 6 shl b32 $r2 6
iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset
mov $r14 -0x5b00 mov $r14 -0x5b00
sethi $r14 0x410000 sethi $r14 0x410000
mov b32 $r15 $r1 mov b32 $r15 $r1
call nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer call #nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer
add b32 $r14 4 add b32 $r14 4
xbit $r15 $flags $p1 xbit $r15 $flags $p1
xbit $r2 $flags $p2 xbit $r2 $flags $p2
shl b32 $r2 1 shl b32 $r2 1
or $r15 $r2 or $r15 $r2
call nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type) call #nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
// strands // strands
mov $r1 0x4afc mov $r1 0x4afc
sethi $r1 0x20000 sethi $r1 0x20000
mov $r2 0xc mov $r2 0xc
iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
call strand_wait call #strand_wait
mov $r2 0x47fc mov $r2 0x47fc
sethi $r2 0x20000 sethi $r2 0x20000
iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
...@@ -760,22 +760,22 @@ ctx_xfer: ...@@ -760,22 +760,22 @@ ctx_xfer:
xbit $r10 $flags $p1 // direction xbit $r10 $flags $p1 // direction
or $r10 6 // first, last or $r10 6 // first, last
mov $r11 0 // base = 0 mov $r11 0 // base = 0
ld b32 $r12 D[$r0 + hub_mmio_list_head] ld b32 $r12 D[$r0 + #hub_mmio_list_head]
ld b32 $r13 D[$r0 + hub_mmio_list_tail] ld b32 $r13 D[$r0 + #hub_mmio_list_tail]
mov $r14 0 // not multi mov $r14 0 // not multi
call mmctx_xfer call #mmctx_xfer
// wait for GPCs to all complete // wait for GPCs to all complete
mov $r10 8 // DONE_BAR mov $r10 8 // DONE_BAR
call wait_doneo call #wait_doneo
// wait for strand xfer to complete // wait for strand xfer to complete
call strand_wait call #strand_wait
// post-op // post-op
bra $p1 ctx_xfer_post bra $p1 #ctx_xfer_post
mov $r10 12 // DONE_UNK12 mov $r10 12 // DONE_UNK12
call wait_donez call #wait_donez
mov $r1 0xa10 mov $r1 0xa10
shl b32 $r1 6 shl b32 $r1 6
mov $r2 5 mov $r2 5
...@@ -783,27 +783,27 @@ ctx_xfer: ...@@ -783,27 +783,27 @@ ctx_xfer:
ctx_xfer_post_save_wait: ctx_xfer_post_save_wait:
iord $r2 I[$r1] iord $r2 I[$r1]
or $r2 $r2 or $r2 $r2
bra ne ctx_xfer_post_save_wait bra ne #ctx_xfer_post_save_wait
bra $p2 ctx_xfer_done bra $p2 #ctx_xfer_done
ctx_xfer_post: ctx_xfer_post:
mov $r15 2 mov $r15 2
call ctx_4170s call #ctx_4170s
clear b32 $r15 clear b32 $r15
call ctx_86c call #ctx_86c
call strand_post call #strand_post
call ctx_4170w call #ctx_4170w
clear b32 $r15 clear b32 $r15
call ctx_4170s call #ctx_4170s
bra not $p1 ctx_xfer_no_post_mmio bra not $p1 #ctx_xfer_no_post_mmio
ld b32 $r1 D[$r0 + chan_mmio_count] ld b32 $r1 D[$r0 + #chan_mmio_count]
or $r1 $r1 or $r1 $r1
bra e ctx_xfer_no_post_mmio bra e #ctx_xfer_no_post_mmio
call ctx_mmio_exec call #ctx_mmio_exec
ctx_xfer_no_post_mmio: ctx_xfer_no_post_mmio:
call ctx_4160c call #ctx_4160c
ctx_xfer_done: ctx_xfer_done:
ret ret
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册