x86_64cpuid.pl 7.1 KB
Newer Older
1 2
#!/usr/bin/env perl

3 4 5
$flavour = shift;
$output  = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6

7
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8

9
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10 11 12 13 14
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";

open STDOUT,"| $^X $xlate $flavour $output";
15

16 17 18
($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") :	# Win64 order
				 ("%rdi","%rsi","%rdx","%rcx");	# Unix order

19 20
print<<___;
.extern		OPENSSL_cpuid_setup
21
.hidden		OPENSSL_cpuid_setup
22 23
.section	.init
	call	OPENSSL_cpuid_setup
A
Andy Polyakov 已提交
24

25 26 27
.hidden	OPENSSL_ia32cap_P
.comm	OPENSSL_ia32cap_P,8

28
.text
29 30

.globl	OPENSSL_atomic_add
31
.type	OPENSSL_atomic_add,\@abi-omnipotent
32 33
.align	16
OPENSSL_atomic_add:
34 35 36 37
	movl	($arg1),%eax
.Lspin:	leaq	($arg2,%rax),%r8
	.byte	0xf0		# lock
	cmpxchgl	%r8d,($arg1)
38
	jne	.Lspin
39
	movl	%r8d,%eax
40
	.byte	0x48,0x98	# cltq/cdqe
41 42 43
	ret
.size	OPENSSL_atomic_add,.-OPENSSL_atomic_add

A
Andy Polyakov 已提交
44 45 46 47 48 49 50 51 52 53
.globl	OPENSSL_rdtsc
.type	OPENSSL_rdtsc,\@abi-omnipotent
.align	16
OPENSSL_rdtsc:
	rdtsc
	shl	\$32,%rdx
	or	%rdx,%rax
	ret
.size	OPENSSL_rdtsc,.-OPENSSL_rdtsc

54
.globl	OPENSSL_ia32_cpuid
A
Andy Polyakov 已提交
55
.type	OPENSSL_ia32_cpuid,\@abi-omnipotent
56 57
.align	16
OPENSSL_ia32_cpuid:
58
	mov	%rbx,%r8		# save %rbx
59 60 61

	xor	%eax,%eax
	cpuid
62 63
	mov	%eax,%r11d		# max value for standard query level

64 65 66 67 68 69 70 71 72
	xor	%eax,%eax
	cmp	\$0x756e6547,%ebx	# "Genu"
	setne	%al
	mov	%eax,%r9d
	cmp	\$0x49656e69,%edx	# "ineI"
	setne	%al
	or	%eax,%r9d
	cmp	\$0x6c65746e,%ecx	# "ntel"
	setne	%al
73 74 75 76 77 78 79 80 81 82 83 84 85 86
	or	%eax,%r9d		# 0 indicates Intel CPU
	jz	.Lintel

	cmp	\$0x68747541,%ebx	# "Auth"
	setne	%al
	mov	%eax,%r10d
	cmp	\$0x69746E65,%edx	# "enti"
	setne	%al
	or	%eax,%r10d
	cmp	\$0x444D4163,%ecx	# "cAMD"
	setne	%al
	or	%eax,%r10d		# 0 indicates AMD CPU
	jnz	.Lintel

87
	# AMD specific
88 89
	mov	\$0x80000000,%eax
	cpuid
90 91 92 93 94 95 96 97 98
	cmp	\$0x80000001,%eax
	jb	.Lintel
	mov	%eax,%r10d
	mov	\$0x80000001,%eax
	cpuid
	or	%ecx,%r9d
	and	\$0x00000801,%r9d	# isolate AMD XOP bit, 1<<11

	cmp	\$0x80000008,%r10d
99 100 101 102 103 104
	jb	.Lintel

	mov	\$0x80000008,%eax
	cpuid
	movzb	%cl,%r10		# number of cores - 1
	inc	%r10			# number of cores
105

106 107 108
	mov	\$1,%eax
	cpuid
	bt	\$28,%edx		# test hyper-threading bit
109
	jnc	.Lgeneric
110 111
	shr	\$16,%ebx		# number of logical processors
	cmp	%r10b,%bl
112
	ja	.Lgeneric
113
	and	\$0xefffffff,%edx	# ~(1<<28)
114
	jmp	.Lgeneric
115

116
.Lintel:
117 118 119 120 121 122 123 124 125 126 127 128
	cmp	\$4,%r11d
	mov	\$-1,%r10d
	jb	.Lnocacheinfo

	mov	\$4,%eax
	mov	\$0,%ecx		# query L1D
	cpuid
	mov	%eax,%r10d
	shr	\$14,%r10d
	and	\$0xfff,%r10d		# number of cores -1 per L1D

.Lnocacheinfo:
A
Andy Polyakov 已提交
129
	mov	\$1,%eax
130
	cpuid
131
	and	\$0xbfefffff,%edx	# force reserved bits to 0
A
Andy Polyakov 已提交
132
	cmp	\$0,%r9d
133
	jne	.Lnotintel
134
	or	\$0x40000000,%edx	# set reserved bit#30 on Intel CPUs
135 136
	and	\$15,%ah
	cmp	\$15,%ah		# examine Family ID
137 138
	jne	.Lnotintel
	or	\$0x00100000,%edx	# set reserved bit#20 to engage RC4_CHAR
139
.Lnotintel:
A
Andy Polyakov 已提交
140
	bt	\$28,%edx		# test hyper-threading bit
141
	jnc	.Lgeneric
142 143
	and	\$0xefffffff,%edx	# ~(1<<28)
	cmp	\$0,%r10d
144
	je	.Lgeneric
145 146

	or	\$0x10000000,%edx	# 1<<28
147
	shr	\$16,%ebx
148
	cmp	\$1,%bl			# see if cache is shared
149
	ja	.Lgeneric
A
Andy Polyakov 已提交
150
	and	\$0xefffffff,%edx	# ~(1<<28)
151 152 153
.Lgeneric:
	and	\$0x00000800,%r9d	# isolate AMD XOP flag
	and	\$0xfffff7ff,%ecx
154
	or	%ecx,%r9d		# merge AMD XOP flag
155

156 157
	mov	%edx,%r10d		# %r9d:%r10d is copy of %ecx:%edx
	bt	\$27,%r9d		# check OSXSAVE bit
158 159 160 161 162 163 164 165
	jnc	.Lclear_avx
	xor	%ecx,%ecx		# XCR0
	.byte	0x0f,0x01,0xd0		# xgetbv
	and	\$6,%eax		# isolate XMM and YMM state support
	cmp	\$6,%eax
	je	.Ldone
.Lclear_avx:
	mov	\$0xefffe7ff,%eax	# ~(1<<28|1<<12|1<<11)
166
	and	%eax,%r9d		# clear AVX, FMA and AMD XOP bits
167
.Ldone:
168 169
	shl	\$32,%r9
	mov	%r10d,%eax
170
	mov	%r8,%rbx		# restore %rbx
171
	or	%r9,%rax
172 173
	ret
.size	OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
174 175

.globl  OPENSSL_cleanse
176
.type   OPENSSL_cleanse,\@abi-omnipotent
177 178 179
.align  16
OPENSSL_cleanse:
	xor	%rax,%rax
180
	cmp	\$15,$arg2
181
	jae	.Lot
182 183
	cmp	\$0,$arg2
	je	.Lret
184
.Little:
185 186 187
	mov	%al,($arg1)
	sub	\$1,$arg2
	lea	1($arg1),$arg1
188
	jnz	.Little
189 190
.Lret:
	ret
191 192
.align	16
.Lot:
193
	test	\$7,$arg1
194
	jz	.Laligned
195 196 197
	mov	%al,($arg1)
	lea	-1($arg2),$arg2
	lea	1($arg1),$arg1
198 199
	jmp	.Lot
.Laligned:
200 201 202 203
	mov	%rax,($arg1)
	lea	-8($arg2),$arg2
	test	\$-8,$arg2
	lea	8($arg1),$arg1
204
	jnz	.Laligned
205
	cmp	\$0,$arg2
206 207 208
	jne	.Little
	ret
.size	OPENSSL_cleanse,.-OPENSSL_cleanse
209
___
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264

print<<___ if (!$win64);
.globl	OPENSSL_wipe_cpu
.type	OPENSSL_wipe_cpu,\@abi-omnipotent
.align	16
OPENSSL_wipe_cpu:
	pxor	%xmm0,%xmm0
	pxor	%xmm1,%xmm1
	pxor	%xmm2,%xmm2
	pxor	%xmm3,%xmm3
	pxor	%xmm4,%xmm4
	pxor	%xmm5,%xmm5
	pxor	%xmm6,%xmm6
	pxor	%xmm7,%xmm7
	pxor	%xmm8,%xmm8
	pxor	%xmm9,%xmm9
	pxor	%xmm10,%xmm10
	pxor	%xmm11,%xmm11
	pxor	%xmm12,%xmm12
	pxor	%xmm13,%xmm13
	pxor	%xmm14,%xmm14
	pxor	%xmm15,%xmm15
	xorq	%rcx,%rcx
	xorq	%rdx,%rdx
	xorq	%rsi,%rsi
	xorq	%rdi,%rdi
	xorq	%r8,%r8
	xorq	%r9,%r9
	xorq	%r10,%r10
	xorq	%r11,%r11
	leaq	8(%rsp),%rax
	ret
.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
___
print<<___ if ($win64);
.globl	OPENSSL_wipe_cpu
.type	OPENSSL_wipe_cpu,\@abi-omnipotent
.align	16
OPENSSL_wipe_cpu:
	pxor	%xmm0,%xmm0
	pxor	%xmm1,%xmm1
	pxor	%xmm2,%xmm2
	pxor	%xmm3,%xmm3
	pxor	%xmm4,%xmm4
	pxor	%xmm5,%xmm5
	xorq	%rcx,%rcx
	xorq	%rdx,%rdx
	xorq	%r8,%r8
	xorq	%r9,%r9
	xorq	%r10,%r10
	xorq	%r11,%r11
	leaq	8(%rsp),%rax
	ret
.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
___
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
{
my $out="%r10";
my $cnt="%rcx";
my $max="%r11";
my $lasttick="%r8d";
my $lastdiff="%r9d";
my $redzone=win64?8:-8;

print<<___;
.globl	OPENSSL_instrument_bus
.type	OPENSSL_instrument_bus,\@abi-omnipotent
.align	16
OPENSSL_instrument_bus:
	mov	$arg1,$out	# tribute to Win64
	mov	$arg2,$cnt
	mov	$arg2,$max

	rdtsc			# collect 1st tick
	mov	%eax,$lasttick	# lasttick = tick
	mov	\$0,$lastdiff	# lastdiff = 0
	clflush	($out)
286
	.byte	0xf0		# lock
287 288 289 290 291 292 293 294 295
	add	$lastdiff,($out)
	jmp	.Loop
.align	16
.Loop:	rdtsc
	mov	%eax,%edx
	sub	$lasttick,%eax
	mov	%edx,$lasttick
	mov	%eax,$lastdiff
	clflush	($out)
296
	.byte	0xf0		# lock
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
	add	%eax,($out)
	lea	4($out),$out
	sub	\$1,$cnt
	jnz	.Loop

	mov	$max,%rax
	ret
.size	OPENSSL_instrument_bus,.-OPENSSL_instrument_bus

.globl	OPENSSL_instrument_bus2
.type	OPENSSL_instrument_bus2,\@abi-omnipotent
.align	16
OPENSSL_instrument_bus2:
	mov	$arg1,$out	# tribute to Win64
	mov	$arg2,$cnt
	mov	$arg3,$max
	mov	$cnt,$redzone(%rsp)

	rdtsc			# collect 1st tick
	mov	%eax,$lasttick	# lasttick = tick
	mov	\$0,$lastdiff	# lastdiff = 0

	clflush	($out)
320
	.byte	0xf0		# lock
321 322 323 324 325 326 327 328 329
	add	$lastdiff,($out)

	rdtsc			# collect 1st diff
	mov	%eax,%edx
	sub	$lasttick,%eax	# diff
	mov	%edx,$lasttick	# lasttick = tick
	mov	%eax,$lastdiff	# lastdiff = diff
.Loop2:
	clflush	($out)
330
	.byte	0xf0		# lock
331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
	add	%eax,($out)	# accumulate diff

	sub	\$1,$max
	jz	.Ldone2

	rdtsc
	mov	%eax,%edx
	sub	$lasttick,%eax	# diff
	mov	%edx,$lasttick	# lasttick = tick
	cmp	$lastdiff,%eax
	mov	%eax,$lastdiff	# lastdiff = diff
	mov	\$0,%edx
	setne	%dl
	sub	%rdx,$cnt	# conditional --$cnt
	lea	($out,%rdx,4),$out	# conditional ++$out
	jnz	.Loop2

.Ldone2:
	mov	$redzone(%rsp),%rax
	sub	$cnt,%rax
	ret
.size	OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
___
}
355

356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
print<<___;
.globl	OPENSSL_ia32_rdrand
.type	OPENSSL_ia32_rdrand,\@abi-omnipotent
.align	16
OPENSSL_ia32_rdrand:
	mov	\$8,%ecx
.Loop_rdrand:
	rdrand	%rax
	jc	.Lbreak_rdrand
	loop	.Loop_rdrand
.Lbreak_rdrand:
	cmp	\$0,%rax
	cmove	%rcx,%rax
	ret
.size	OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdarnd
___

A
Andy Polyakov 已提交
373
close STDOUT;	# flush