morestack.S 7.6 KB
Newer Older
1 2 3 4 5
// Mark stack as non-executable
#if defined(__linux__) && defined(__ELF__)
.section	.note.GNU-stack, "", @progbits
#endif

6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
	__morestack

	This function implements stack growth using the mechanism
	devised by Ian Lance Taylor for gccgo, described here:

	http://gcc.gnu.org/wiki/SplitStacks

	The Rust stack is composed of a linked list of stack segments,
	and each stack segment contains two parts: the work area,
	where Rust functions are allowed to execute; and the red zone,
	where no Rust code can execute, but where short runtime
	functions (including __morestack), the dynamic linker, signal
	handlers, and the unwinder can run.

	Each Rust function contains an LLVM-generated prologue that
	compares the stack space required for the current function to
23
	the space remaining in the current stack segment,
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
	maintained in a platform-specific TLS slot.  The stack limit
	is strategically maintained by the Rust runtime so that it is
	always in place whenever a Rust function is running.

	When there is not enough room to run the function, the function
	prologue makes a call to __morestack to allocate a new stack
	segment, copy any stack-based arguments to it, switch stacks,
	then resume execution of the original function.

	-- The __morestack calling convention --

	For reasons of efficiency the __morestack calling convention
	is bizarre. The calling function does not attempt to align the
	stack for the call, and on x86_64 the arguments to __morestack
	are passed in scratch registers in order to preserve the
	original function's arguments.

	Once __morestack has switched to the new stack, instead of
	returning, it then calls into the original function, resuming
	execution at the instruction following the call to
	__morestack. Thus, when the original function returns it
	actually returns to __morestack, which then deallocates the
	stack and returns again to the original function's caller.

	-- Unwinding --

	All this trickery causes hell when it comes time for the
	unwinder to navigate it's way through this function. What
	will happen is the original function will be unwound first
	without any special effort, then the unwinder encounters
	the __morestack frame, which is sitting just above a
	tiny fraction of a frame (containing just a return pointer
	and, on 32-bit, the arguments to __morestack).

58
	We deal with this by claiming that little bit of stack
59 60 61 62 63 64 65 66 67 68 69 70 71 72
	is actually part of the __morestack frame, encoded as
	DWARF call frame instructions (CFI) by .cfi assembler
	pseudo-ops.

	One final complication (that took me a week to figure out)
	is that OS X 10.6+ uses its own 'compact unwind info',
	an undocumented format generated by the linker from
	the DWARF CFI. This compact unwind info doesn't correctly
	capture the nuance of the __morestack frame, and as a
	result all of our linking on OS X uses the -no_compact_unwind
	flag.
*/

.text
73

74 75
#if defined(__APPLE__)
#define RUST_GET_TASK           L_rust_get_task$stub
76
#define UPCALL_NEW_STACK        L_upcall_new_stack$stub
77
#define UPCALL_DEL_STACK        L_upcall_del_stack$stub
P
Patrick Walton 已提交
78
#define MORESTACK               ___morestack
79
#else
U
User Jyyou 已提交
80
#if defined(__linux__) || defined(__FreeBSD__)
B
Brian Anderson 已提交
81 82
#define UPCALL_NEW_STACK        upcall_new_stack
#define UPCALL_DEL_STACK        upcall_del_stack
P
Patrick Walton 已提交
83 84
#define RUST_GET_TASK           rust_get_task
#define MORESTACK               __morestack
85 86 87 88
#else
#define UPCALL_NEW_STACK        _upcall_new_stack
#define UPCALL_DEL_STACK        _upcall_del_stack
#define RUST_GET_TASK           _rust_get_task
89
#define MORESTACK               ___morestack
90
#endif
91 92
#endif

93
#ifndef __APPLE__
94
.globl UPCALL_NEW_STACK
95
.globl UPCALL_DEL_STACK
P
Patrick Walton 已提交
96
.globl RUST_GET_TASK
97
#endif
98
.globl MORESTACK
99

100
// FIXME: What about __WIN32__?
U
User Jyyou 已提交
101
#if defined(__linux__) || defined(__FreeBSD__)
102 103 104 105 106 107 108
	.hidden MORESTACK
#else
#if defined(__APPLE__)
	.private_extern MORESTACK
#endif
#endif

109 110 111 112
#ifdef __ELF__
	.type MORESTACK,@function
#endif

113
MORESTACK:
114
#if defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__WIN32__)
115 116
	.cfi_startproc
#endif
P
Patrick Walton 已提交
117

118 119 120 121
	// This base pointer setup differs from most in that we are
	// telling the unwinder to consider the Canonical Frame
	// Address (CFA) for this frame to be the value of the stack
	// pointer prior to entry to the original function, whereas
122
	// the CFA would typically be the value of the stack
123 124 125 126 127 128 129 130 131
	// pointer prior to entry to this function. This will allow
	// the unwinder to understand how to skip the tiny partial
	// frame that the original function created by calling
	// __morestack.

	// In practical terms, our CFA is 12 bytes greater than it
	// would normally be, accounting for the two arguments to
	// __morestack, and an extra return address.

132
	pushl %ebp
133
#if defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__WIN32__)
134 135
	// The CFA is 20 bytes above the register that it is
	// associated with for this frame (which will be %ebp)
136
	.cfi_def_cfa_offset 20
137
	// %ebp is -20 bytes from the CFA
138
	.cfi_offset %ebp, -20
139
#endif
140
	movl %esp, %ebp
141
#if defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__WIN32__)
142
	// Calculate the CFA as an offset from %ebp
143 144
	.cfi_def_cfa_register %ebp
#endif
145

146
	// NB: This can be called with the fastcc convention so we
147
	// have to preserve any argument registers
148

149 150 151
	// NB: __morestack is called misaligned by 4 bytes, i.e.
	// subl $4, %esp would get us to a normal alignment

152
	subl $28,%esp
153 154

	// Save fastcc arguments
155 156
	movl %ecx, 16(%esp)
	movl %edx, 12(%esp)
157

158 159 160
	// FIXME (1388): it's possible we also need to save/restore some
	// SSE2 registers here, if floats-go-in-regs on x86+SSE2. Unclear.

161 162 163 164 165 166
	// FIXME (1226): main is compiled with the split-stack prologue,
	// causing it to call __morestack, so we have to jump back out
	calll RUST_GET_TASK
	testl %eax,%eax
	jz .L$bail

167 168 169
	// The arguments to upcall_new_stack

	// The size of the stack arguments to copy to the new stack,
170
	// and of the arguments to __morestack
171
	movl 40(%esp),%eax
172
	movl %eax,8(%esp)
173
	// The address of the stack arguments to the original function
174
	leal 48(%esp),%eax
175
	movl %eax,4(%esp)
176 177
	// The amount of stack needed for the original function,
	// the other argument to __morestack
178
	movl 36(%esp),%eax // The amount of stack needed
179
	movl %eax,(%esp)
180

181
	call UPCALL_NEW_STACK
182

183 184
	// Save the address of the new stack
	movl %eax, (%esp)
185

186
	// Grab the __morestack return pointer
187
	movl 32(%esp),%eax
188 189
	// Skip past the ret instruction in the parent fn
	inc  %eax
190

191
	// Restore the fastcc arguments to the original function
192 193
	movl 16(%esp), %ecx
	movl 12(%esp), %edx
194

195
        // Switch stacks
196
	movl (%esp),%esp
197 198
        // Re-enter the function that called us
	call *%eax
199

200 201
	// Now the function that called us has returned, so we need to
	// delete the old stack space
202 203

	// Switch back to the rust stack
204
	movl %ebp, %esp
205

206 207
	// Realign stack - remember that __morestack was called misaligned
	subl $12, %esp
208

209
	// Save the return value of the function we allocated space for
210
	movl %edx, 4(%esp)
211 212
	movl %eax, (%esp)

213
	call UPCALL_DEL_STACK
214

215 216
	// And restore it
	movl (%esp), %eax
217
	movl 4(%esp), %edx
218

219
	addl $12,%esp
220

221
	popl %ebp
222

223
	retl $8
P
Patrick Walton 已提交
224

225
.L$bail:
226 227
	movl 32(%esp),%eax
	inc %eax
228

229
	addl $44, %esp
230
	popl %ebp
231
	addl $4+8,%esp
232

233
	jmpl *%eax
P
Patrick Walton 已提交
234

235
#if defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__WIN32__)
236 237
	.cfi_endproc
#endif
238

P
Patrick Walton 已提交
239 240
#ifdef __APPLE__

241 242 243 244 245 246 247
.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5

	// Linker will replace the hlts (the ascii) with jmp
L_rust_get_task$stub:
	.indirect_symbol _rust_get_task
	.ascii	 "\364\364\364\364\364"

248 249 250
L_upcall_new_stack$stub:
	.indirect_symbol _upcall_new_stack
	.ascii	 "\364\364\364\364\364"
251

252 253 254 255
L_upcall_del_stack$stub:
	.indirect_symbol _upcall_del_stack
	.ascii	 "\364\364\364\364\364"

256
	.subsections_via_symbols
P
Patrick Walton 已提交
257
#endif