x86_64cpuid.pl 7.3 KB
Newer Older
1 2
#!/usr/bin/env perl

3 4 5
$flavour = shift;
$output  = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6

7
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8

9
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10 11 12 13
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";

14
open STDOUT,"| \"$^X\" $xlate $flavour $output";
15

16 17 18
($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") :	# Win64 order
				 ("%rdi","%rsi","%rdx","%rcx");	# Unix order

19 20
print<<___;
.extern		OPENSSL_cpuid_setup
21
.hidden		OPENSSL_cpuid_setup
22 23
.section	.init
	call	OPENSSL_cpuid_setup
A
Andy Polyakov 已提交
24

25
.hidden	OPENSSL_ia32cap_P
26
.comm	OPENSSL_ia32cap_P,16,4
27

28
.text
29 30

.globl	OPENSSL_atomic_add
31
.type	OPENSSL_atomic_add,\@abi-omnipotent
32 33
.align	16
OPENSSL_atomic_add:
34 35 36 37
	movl	($arg1),%eax
.Lspin:	leaq	($arg2,%rax),%r8
	.byte	0xf0		# lock
	cmpxchgl	%r8d,($arg1)
38
	jne	.Lspin
39
	movl	%r8d,%eax
40
	.byte	0x48,0x98	# cltq/cdqe
41 42 43
	ret
.size	OPENSSL_atomic_add,.-OPENSSL_atomic_add

A
Andy Polyakov 已提交
44 45 46 47 48 49 50 51 52 53
.globl	OPENSSL_rdtsc
.type	OPENSSL_rdtsc,\@abi-omnipotent
.align	16
OPENSSL_rdtsc:
	rdtsc
	shl	\$32,%rdx
	or	%rdx,%rax
	ret
.size	OPENSSL_rdtsc,.-OPENSSL_rdtsc

54
.globl	OPENSSL_ia32_cpuid
55
.type	OPENSSL_ia32_cpuid,\@function,1
56 57
.align	16
OPENSSL_ia32_cpuid:
58
	mov	%rbx,%r8		# save %rbx
59 60

	xor	%eax,%eax
61
	mov	%eax,8(%rdi)		# clear 3rd word
62
	cpuid
63 64
	mov	%eax,%r11d		# max value for standard query level

65 66 67 68 69 70 71 72 73
	xor	%eax,%eax
	cmp	\$0x756e6547,%ebx	# "Genu"
	setne	%al
	mov	%eax,%r9d
	cmp	\$0x49656e69,%edx	# "ineI"
	setne	%al
	or	%eax,%r9d
	cmp	\$0x6c65746e,%ecx	# "ntel"
	setne	%al
74 75 76 77 78 79 80 81 82 83 84 85 86 87
	or	%eax,%r9d		# 0 indicates Intel CPU
	jz	.Lintel

	cmp	\$0x68747541,%ebx	# "Auth"
	setne	%al
	mov	%eax,%r10d
	cmp	\$0x69746E65,%edx	# "enti"
	setne	%al
	or	%eax,%r10d
	cmp	\$0x444D4163,%ecx	# "cAMD"
	setne	%al
	or	%eax,%r10d		# 0 indicates AMD CPU
	jnz	.Lintel

88
	# AMD specific
89 90
	mov	\$0x80000000,%eax
	cpuid
91 92 93 94 95 96 97 98 99
	cmp	\$0x80000001,%eax
	jb	.Lintel
	mov	%eax,%r10d
	mov	\$0x80000001,%eax
	cpuid
	or	%ecx,%r9d
	and	\$0x00000801,%r9d	# isolate AMD XOP bit, 1<<11

	cmp	\$0x80000008,%r10d
100 101 102 103 104 105
	jb	.Lintel

	mov	\$0x80000008,%eax
	cpuid
	movzb	%cl,%r10		# number of cores - 1
	inc	%r10			# number of cores
106

107 108 109
	mov	\$1,%eax
	cpuid
	bt	\$28,%edx		# test hyper-threading bit
110
	jnc	.Lgeneric
111 112
	shr	\$16,%ebx		# number of logical processors
	cmp	%r10b,%bl
113
	ja	.Lgeneric
114
	and	\$0xefffffff,%edx	# ~(1<<28)
115
	jmp	.Lgeneric
116

117
.Lintel:
118 119 120 121 122 123 124 125 126 127 128
	cmp	\$4,%r11d
	mov	\$-1,%r10d
	jb	.Lnocacheinfo

	mov	\$4,%eax
	mov	\$0,%ecx		# query L1D
	cpuid
	mov	%eax,%r10d
	shr	\$14,%r10d
	and	\$0xfff,%r10d		# number of cores -1 per L1D

129 130 131 132 133 134 135 136
	cmp	\$7,%r11d
	jb	.Lnocacheinfo

	mov	\$7,%eax
	xor	%ecx,%ecx
	cpuid
	mov	%ebx,8(%rdi)

137
.Lnocacheinfo:
A
Andy Polyakov 已提交
138
	mov	\$1,%eax
139
	cpuid
140
	and	\$0xbfefffff,%edx	# force reserved bits to 0
A
Andy Polyakov 已提交
141
	cmp	\$0,%r9d
142
	jne	.Lnotintel
143
	or	\$0x40000000,%edx	# set reserved bit#30 on Intel CPUs
144 145
	and	\$15,%ah
	cmp	\$15,%ah		# examine Family ID
146 147
	jne	.Lnotintel
	or	\$0x00100000,%edx	# set reserved bit#20 to engage RC4_CHAR
148
.Lnotintel:
A
Andy Polyakov 已提交
149
	bt	\$28,%edx		# test hyper-threading bit
150
	jnc	.Lgeneric
151 152
	and	\$0xefffffff,%edx	# ~(1<<28)
	cmp	\$0,%r10d
153
	je	.Lgeneric
154 155

	or	\$0x10000000,%edx	# 1<<28
156
	shr	\$16,%ebx
157
	cmp	\$1,%bl			# see if cache is shared
158
	ja	.Lgeneric
A
Andy Polyakov 已提交
159
	and	\$0xefffffff,%edx	# ~(1<<28)
160 161 162
.Lgeneric:
	and	\$0x00000800,%r9d	# isolate AMD XOP flag
	and	\$0xfffff7ff,%ecx
163
	or	%ecx,%r9d		# merge AMD XOP flag
164

165 166
	mov	%edx,%r10d		# %r9d:%r10d is copy of %ecx:%edx
	bt	\$27,%r9d		# check OSXSAVE bit
167 168 169 170 171 172 173 174
	jnc	.Lclear_avx
	xor	%ecx,%ecx		# XCR0
	.byte	0x0f,0x01,0xd0		# xgetbv
	and	\$6,%eax		# isolate XMM and YMM state support
	cmp	\$6,%eax
	je	.Ldone
.Lclear_avx:
	mov	\$0xefffe7ff,%eax	# ~(1<<28|1<<12|1<<11)
175
	and	%eax,%r9d		# clear AVX, FMA and AMD XOP bits
176
	andl	\$0xffffffdf,8(%rdi)	# cleax AVX2, ~(1<<5)
177
.Ldone:
178 179
	shl	\$32,%r9
	mov	%r10d,%eax
180
	mov	%r8,%rbx		# restore %rbx
181
	or	%r9,%rax
182 183
	ret
.size	OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
184 185

.globl  OPENSSL_cleanse
186
.type   OPENSSL_cleanse,\@abi-omnipotent
187 188 189
.align  16
OPENSSL_cleanse:
	xor	%rax,%rax
190
	cmp	\$15,$arg2
191
	jae	.Lot
192 193
	cmp	\$0,$arg2
	je	.Lret
194
.Little:
195 196 197
	mov	%al,($arg1)
	sub	\$1,$arg2
	lea	1($arg1),$arg1
198
	jnz	.Little
199 200
.Lret:
	ret
201 202
.align	16
.Lot:
203
	test	\$7,$arg1
204
	jz	.Laligned
205 206 207
	mov	%al,($arg1)
	lea	-1($arg2),$arg2
	lea	1($arg1),$arg1
208 209
	jmp	.Lot
.Laligned:
210 211 212 213
	mov	%rax,($arg1)
	lea	-8($arg2),$arg2
	test	\$-8,$arg2
	lea	8($arg1),$arg1
214
	jnz	.Laligned
215
	cmp	\$0,$arg2
216 217 218
	jne	.Little
	ret
.size	OPENSSL_cleanse,.-OPENSSL_cleanse
219
___
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274

print<<___ if (!$win64);
.globl	OPENSSL_wipe_cpu
.type	OPENSSL_wipe_cpu,\@abi-omnipotent
.align	16
OPENSSL_wipe_cpu:
	pxor	%xmm0,%xmm0
	pxor	%xmm1,%xmm1
	pxor	%xmm2,%xmm2
	pxor	%xmm3,%xmm3
	pxor	%xmm4,%xmm4
	pxor	%xmm5,%xmm5
	pxor	%xmm6,%xmm6
	pxor	%xmm7,%xmm7
	pxor	%xmm8,%xmm8
	pxor	%xmm9,%xmm9
	pxor	%xmm10,%xmm10
	pxor	%xmm11,%xmm11
	pxor	%xmm12,%xmm12
	pxor	%xmm13,%xmm13
	pxor	%xmm14,%xmm14
	pxor	%xmm15,%xmm15
	xorq	%rcx,%rcx
	xorq	%rdx,%rdx
	xorq	%rsi,%rsi
	xorq	%rdi,%rdi
	xorq	%r8,%r8
	xorq	%r9,%r9
	xorq	%r10,%r10
	xorq	%r11,%r11
	leaq	8(%rsp),%rax
	ret
.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
___
print<<___ if ($win64);
.globl	OPENSSL_wipe_cpu
.type	OPENSSL_wipe_cpu,\@abi-omnipotent
.align	16
OPENSSL_wipe_cpu:
	pxor	%xmm0,%xmm0
	pxor	%xmm1,%xmm1
	pxor	%xmm2,%xmm2
	pxor	%xmm3,%xmm3
	pxor	%xmm4,%xmm4
	pxor	%xmm5,%xmm5
	xorq	%rcx,%rcx
	xorq	%rdx,%rdx
	xorq	%r8,%r8
	xorq	%r9,%r9
	xorq	%r10,%r10
	xorq	%r11,%r11
	leaq	8(%rsp),%rax
	ret
.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
___
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
{
my $out="%r10";
my $cnt="%rcx";
my $max="%r11";
my $lasttick="%r8d";
my $lastdiff="%r9d";
my $redzone=win64?8:-8;

print<<___;
.globl	OPENSSL_instrument_bus
.type	OPENSSL_instrument_bus,\@abi-omnipotent
.align	16
OPENSSL_instrument_bus:
	mov	$arg1,$out	# tribute to Win64
	mov	$arg2,$cnt
	mov	$arg2,$max

	rdtsc			# collect 1st tick
	mov	%eax,$lasttick	# lasttick = tick
	mov	\$0,$lastdiff	# lastdiff = 0
	clflush	($out)
296
	.byte	0xf0		# lock
297 298 299 300 301 302 303 304 305
	add	$lastdiff,($out)
	jmp	.Loop
.align	16
.Loop:	rdtsc
	mov	%eax,%edx
	sub	$lasttick,%eax
	mov	%edx,$lasttick
	mov	%eax,$lastdiff
	clflush	($out)
306
	.byte	0xf0		# lock
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
	add	%eax,($out)
	lea	4($out),$out
	sub	\$1,$cnt
	jnz	.Loop

	mov	$max,%rax
	ret
.size	OPENSSL_instrument_bus,.-OPENSSL_instrument_bus

.globl	OPENSSL_instrument_bus2
.type	OPENSSL_instrument_bus2,\@abi-omnipotent
.align	16
OPENSSL_instrument_bus2:
	mov	$arg1,$out	# tribute to Win64
	mov	$arg2,$cnt
	mov	$arg3,$max
	mov	$cnt,$redzone(%rsp)

	rdtsc			# collect 1st tick
	mov	%eax,$lasttick	# lasttick = tick
	mov	\$0,$lastdiff	# lastdiff = 0

	clflush	($out)
330
	.byte	0xf0		# lock
331 332 333 334 335 336 337 338 339
	add	$lastdiff,($out)

	rdtsc			# collect 1st diff
	mov	%eax,%edx
	sub	$lasttick,%eax	# diff
	mov	%edx,$lasttick	# lasttick = tick
	mov	%eax,$lastdiff	# lastdiff = diff
.Loop2:
	clflush	($out)
340
	.byte	0xf0		# lock
341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
	add	%eax,($out)	# accumulate diff

	sub	\$1,$max
	jz	.Ldone2

	rdtsc
	mov	%eax,%edx
	sub	$lasttick,%eax	# diff
	mov	%edx,$lasttick	# lasttick = tick
	cmp	$lastdiff,%eax
	mov	%eax,$lastdiff	# lastdiff = diff
	mov	\$0,%edx
	setne	%dl
	sub	%rdx,$cnt	# conditional --$cnt
	lea	($out,%rdx,4),$out	# conditional ++$out
	jnz	.Loop2

.Ldone2:
	mov	$redzone(%rsp),%rax
	sub	$cnt,%rax
	ret
.size	OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
___
}
365

366 367 368 369 370 371 372 373 374 375 376 377 378 379
print<<___;
.globl	OPENSSL_ia32_rdrand
.type	OPENSSL_ia32_rdrand,\@abi-omnipotent
.align	16
OPENSSL_ia32_rdrand:
	mov	\$8,%ecx
.Loop_rdrand:
	rdrand	%rax
	jc	.Lbreak_rdrand
	loop	.Loop_rdrand
.Lbreak_rdrand:
	cmp	\$0,%rax
	cmove	%rcx,%rax
	ret
A
Andy Polyakov 已提交
380
.size	OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
381 382
___

A
Andy Polyakov 已提交
383
close STDOUT;	# flush