crypto/bn/rsaz*: fix licensing note.
rsaz_exp.c: harmonize line terminating; asm/rsaz-*.pl: minor optimizations.
This commit is contained in:
		| @@ -1,54 +1,66 @@ | |||||||
| #!/usr/bin/env perl | #!/usr/bin/env perl | ||||||
|  |  | ||||||
| #****************************************************************************** | ############################################################################## | ||||||
| #* Copyright(c) 2012, Intel Corp.                                              | #                                                                            # | ||||||
| #* Developers and authors:                                                     | #  Copyright (c) 2012, Intel Corporation                                     # | ||||||
| #* Shay Gueron (1, 2), and Vlad Krasnov (1)                                    | #                                                                            # | ||||||
| #* (1) Intel Corporation, Israel Development Center, Haifa, Israel | #  All rights reserved.                                                      # | ||||||
| #* (2) University of Haifa, Israel                                               | #                                                                            # | ||||||
| #****************************************************************************** | #  Redistribution and use in source and binary forms, with or without        # | ||||||
| #* LICENSE:                                                                 | #  modification, are permitted provided that the following conditions are    # | ||||||
| #* This submission to OpenSSL is to be made available under the OpenSSL   | #  met:                                                                      # | ||||||
| #* license, and only to the OpenSSL project, in order to allow integration     | #                                                                            # | ||||||
| #* into the publicly distributed code.  | #  *  Redistributions of source code must retain the above copyright         # | ||||||
| #* The use of this code, or portions of this code, or concepts embedded in | #     notice, this list of conditions and the following disclaimer.          # | ||||||
| #* this code, or modification of this code and/or algorithm(s) in it, or the | #                                                                            # | ||||||
| #* use of this code for any other purpose than stated above, requires special | #  *  Redistributions in binary form must reproduce the above copyright      # | ||||||
| #* licensing.                                                                   | #     notice, this list of conditions and the following disclaimer in the    # | ||||||
| #****************************************************************************** | #     documentation and/or other materials provided with the                 # | ||||||
| #* DISCLAIMER:                                                                 | #     distribution.                                                          # | ||||||
| #* THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS AND THE COPYRIGHT OWNERS      | #                                                                            # | ||||||
| #* ``AS IS''. ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED  | #  *  Neither the name of the Intel Corporation nor the names of its         # | ||||||
| #* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR  | #     contributors may be used to endorse or promote products derived from   # | ||||||
| #* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS OR THE COPYRIGHT | #     this software without specific prior written permission.               # | ||||||
| #* OWNERS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,  | #                                                                            # | ||||||
| #* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF     | #                                                                            # | ||||||
| #* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS    | #  THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY          # | ||||||
| #* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN     | #  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE         # | ||||||
| #* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)     | #  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR        # | ||||||
| #* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE  | #  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR            # | ||||||
| #* POSSIBILITY OF SUCH DAMAGE.                                                 | #  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     # | ||||||
| #****************************************************************************** | #  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,       # | ||||||
| #* Reference:                                                                  | #  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR        # | ||||||
| #* [1]	S. Gueron, V. Krasnov: "Software Implementation of Modular | #  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF    # | ||||||
| #*	Exponentiation,  Using Advanced Vector Instructions Architectures", | #  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING      # | ||||||
| #*	F. Ozbudak and F. Rodriguez-Henriquez (Eds.): WAIFI 2012, LNCS 7369, | #  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS        # | ||||||
| #*	pp. 119?135, 2012. Springer-Verlag Berlin Heidelberg 2012 | #  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              # | ||||||
| #* [2]	S. Gueron: "Efficient Software Implementations of Modular | #                                                                            # | ||||||
| #*	Exponentiation", Journal of Cryptographic Engineering 2:31-43 (2012). | ############################################################################## | ||||||
| #* [3]	S. Gueron, V. Krasnov: "Speeding up Big-numbers Squaring",IEEE | # Developers and authors:                                                    # | ||||||
| #*	Proceedings of 9th International Conference on Information Technology: | # Shay Gueron (1, 2), and Vlad Krasnov (1)                                   # | ||||||
| #*	New Generations (ITNG 2012), pp.821-823 (2012) | # (1) Intel Corporation, Israel Development Center, Haifa, Israel            # | ||||||
| #* [4]	S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis | # (2) University of Haifa, Israel                                            # | ||||||
| #*	resistant 1024-bit modular exponentiation, for optimizing RSA2048 | ############################################################################## | ||||||
| #*	on AVX2 capable x86_64 platforms", | # Reference:                                                                 # | ||||||
| #*	http://rt.openssl.org/Ticket/Display.html?id=2850&user=guest&pass=guest | # [1] S. Gueron, V. Krasnov: "Software Implementation of Modular             # | ||||||
| #****************************************************************************** | #     Exponentiation,  Using Advanced Vector Instructions Architectures",    # | ||||||
|  | #     F. Ozbudak and F. Rodriguez-Henriquez (Eds.): WAIFI 2012, LNCS 7369,   # | ||||||
| # +10% improvement by <appro@openssl.org> | #     pp. 119?135, 2012. Springer-Verlag Berlin Heidelberg 2012              # | ||||||
|  | # [2] S. Gueron: "Efficient Software Implementations of Modular              # | ||||||
|  | #     Exponentiation", Journal of Cryptographic Engineering 2:31-43 (2012).  # | ||||||
|  | # [3] S. Gueron, V. Krasnov: "Speeding up Big-numbers Squaring",IEEE         # | ||||||
|  | #     Proceedings of 9th International Conference on Information Technology: # | ||||||
|  | #     New Generations (ITNG 2012), pp.821-823 (2012)                         # | ||||||
|  | # [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis    # | ||||||
|  | #     resistant 1024-bit modular exponentiation, for optimizing RSA2048      # | ||||||
|  | #     on AVX2 capable x86_64 platforms",                                     # | ||||||
|  | #     http://rt.openssl.org/Ticket/Display.html?id=2850&user=guest&pass=guest# | ||||||
|  | ############################################################################## | ||||||
|  | # | ||||||
|  | # +13% improvement over original submission by <appro@openssl.org> | ||||||
| # | # | ||||||
| # rsa2048 sign/sec	OpenSSL 1.0.1	scalar(*)	this | # rsa2048 sign/sec	OpenSSL 1.0.1	scalar(*)	this | ||||||
| # 2.3GHz Haswell	621		732/+18%	1112/+79% | # 2.3GHz Haswell	621		765/+23%	1113/+79% | ||||||
| # | # | ||||||
| # (*)	if system doesn't support AVX2, for reference purposes; | # (*)	if system doesn't support AVX2, for reference purposes; | ||||||
|  |  | ||||||
| @@ -143,24 +155,24 @@ rsaz_1024_sqr_avx2:		# 702 cycles, 14% faster than rsaz_1024_mul_avx2 | |||||||
| 	push	%r13 | 	push	%r13 | ||||||
| 	push	%r14 | 	push	%r14 | ||||||
| 	push	%r15 | 	push	%r15 | ||||||
|  | 	vzeroupper | ||||||
| ___ | ___ | ||||||
| $code.=<<___ if ($win64); | $code.=<<___ if ($win64); | ||||||
| 	lea	-0xa8(%rsp),%rsp | 	lea	-0xa8(%rsp),%rsp | ||||||
| 	movaps  %xmm6,-0xd8(%rax) | 	vmovaps	%xmm6,-0xd8(%rax) | ||||||
| 	movaps  %xmm7,-0xc8(%rax) | 	vmovaps	%xmm7,-0xc8(%rax) | ||||||
| 	movaps  %xmm8,-0xb8(%rax) | 	vmovaps	%xmm8,-0xb8(%rax) | ||||||
| 	movaps  %xmm9,-0xa8(%rax) | 	vmovaps	%xmm9,-0xa8(%rax) | ||||||
| 	movaps  %xmm10,-0x98(%rax) | 	vmovaps	%xmm10,-0x98(%rax) | ||||||
| 	movaps  %xmm11,-0x88(%rax) | 	vmovaps	%xmm11,-0x88(%rax) | ||||||
| 	movaps  %xmm12,-0x78(%rax) | 	vmovaps	%xmm12,-0x78(%rax) | ||||||
| 	movaps  %xmm13,-0x68(%rax) | 	vmovaps	%xmm13,-0x68(%rax) | ||||||
| 	movaps  %xmm14,-0x58(%rax) | 	vmovaps	%xmm14,-0x58(%rax) | ||||||
| 	movaps  %xmm15,-0x48(%rax) | 	vmovaps	%xmm15,-0x48(%rax) | ||||||
| .Lsqr_1024_body: | .Lsqr_1024_body: | ||||||
| ___ | ___ | ||||||
| $code.=<<___; | $code.=<<___; | ||||||
| 	mov	%rax,%rbp | 	mov	%rax,%rbp | ||||||
| 	vzeroall |  | ||||||
| 	mov	%rdx, $np			# reassigned argument | 	mov	%rdx, $np			# reassigned argument | ||||||
| 	sub	\$$FrameSize, %rsp | 	sub	\$$FrameSize, %rsp | ||||||
| 	mov	$np, $tmp | 	mov	$np, $tmp | ||||||
| @@ -171,6 +183,7 @@ $code.=<<___; | |||||||
| 	and	\$4095, $tmp			# see if $np crosses page | 	and	\$4095, $tmp			# see if $np crosses page | ||||||
| 	add	\$32*10, $tmp | 	add	\$32*10, $tmp | ||||||
| 	shr	\$12, $tmp | 	shr	\$12, $tmp | ||||||
|  | 	vpxor	$ACC9,$ACC9,$ACC9 | ||||||
| 	jz	.Lsqr_1024_no_n_copy | 	jz	.Lsqr_1024_no_n_copy | ||||||
|  |  | ||||||
| 	# unaligned 256-bit load that crosses page boundary can | 	# unaligned 256-bit load that crosses page boundary can | ||||||
| @@ -198,7 +211,7 @@ $code.=<<___; | |||||||
| 	vmovdqu		$ACC6, 32*6-128($np) | 	vmovdqu		$ACC6, 32*6-128($np) | ||||||
| 	vmovdqu		$ACC7, 32*7-128($np) | 	vmovdqu		$ACC7, 32*7-128($np) | ||||||
| 	vmovdqu		$ACC8, 32*8-128($np) | 	vmovdqu		$ACC8, 32*8-128($np) | ||||||
| 	vmovdqu		$ACC9, 32*9-128($np)	# $ACC9 is zero after vzeroall | 	vmovdqu		$ACC9, 32*9-128($np)	# $ACC9 is zero | ||||||
|  |  | ||||||
| .Lsqr_1024_no_n_copy: | .Lsqr_1024_no_n_copy: | ||||||
| 	and		\$-1024, %rsp | 	and		\$-1024, %rsp | ||||||
| @@ -876,17 +889,18 @@ rsaz_1024_mul_avx2: | |||||||
| 	push	%r15 | 	push	%r15 | ||||||
| ___ | ___ | ||||||
| $code.=<<___ if ($win64); | $code.=<<___ if ($win64); | ||||||
|  | 	vzeroupper | ||||||
| 	lea	-0xa8(%rsp),%rsp | 	lea	-0xa8(%rsp),%rsp | ||||||
| 	movaps  %xmm6,-0xd8(%rax) | 	vmovaps	%xmm6,-0xd8(%rax) | ||||||
| 	movaps  %xmm7,-0xc8(%rax) | 	vmovaps	%xmm7,-0xc8(%rax) | ||||||
| 	movaps  %xmm8,-0xb8(%rax) | 	vmovaps	%xmm8,-0xb8(%rax) | ||||||
| 	movaps  %xmm9,-0xa8(%rax) | 	vmovaps	%xmm9,-0xa8(%rax) | ||||||
| 	movaps  %xmm10,-0x98(%rax) | 	vmovaps	%xmm10,-0x98(%rax) | ||||||
| 	movaps  %xmm11,-0x88(%rax) | 	vmovaps	%xmm11,-0x88(%rax) | ||||||
| 	movaps  %xmm12,-0x78(%rax) | 	vmovaps	%xmm12,-0x78(%rax) | ||||||
| 	movaps  %xmm13,-0x68(%rax) | 	vmovaps	%xmm13,-0x68(%rax) | ||||||
| 	movaps  %xmm14,-0x58(%rax) | 	vmovaps	%xmm14,-0x58(%rax) | ||||||
| 	movaps  %xmm15,-0x48(%rax) | 	vmovaps	%xmm15,-0x48(%rax) | ||||||
| .Lmul_1024_body: | .Lmul_1024_body: | ||||||
| ___ | ___ | ||||||
| $code.=<<___; | $code.=<<___; | ||||||
| @@ -900,6 +914,7 @@ $code.=<<___; | |||||||
| 	# cross page boundary, swap it with $bp [meaning that caller | 	# cross page boundary, swap it with $bp [meaning that caller | ||||||
| 	# is advised to lay down $ap and $bp next to each other, so | 	# is advised to lay down $ap and $bp next to each other, so | ||||||
| 	# that only one can cross page boundary]. | 	# that only one can cross page boundary]. | ||||||
|  | 	.byte	0x67,0x67 | ||||||
| 	mov	$ap, $tmp | 	mov	$ap, $tmp | ||||||
| 	and	\$4095, $tmp | 	and	\$4095, $tmp | ||||||
| 	add	\$32*10, $tmp | 	add	\$32*10, $tmp | ||||||
| @@ -915,6 +930,7 @@ $code.=<<___; | |||||||
|  |  | ||||||
| 	and	\$4095, $tmp	# see if $np crosses page | 	and	\$4095, $tmp	# see if $np crosses page | ||||||
| 	add	\$32*10, $tmp | 	add	\$32*10, $tmp | ||||||
|  | 	.byte	0x67,0x67 | ||||||
| 	shr	\$12, $tmp | 	shr	\$12, $tmp | ||||||
| 	jz	.Lmul_1024_no_n_copy | 	jz	.Lmul_1024_no_n_copy | ||||||
|  |  | ||||||
| @@ -960,6 +976,7 @@ $code.=<<___; | |||||||
| 	vpbroadcastq ($bp), $Bi | 	vpbroadcastq ($bp), $Bi | ||||||
| 	vmovdqu	$ACC0, (%rsp)			# clear top of stack | 	vmovdqu	$ACC0, (%rsp)			# clear top of stack | ||||||
| 	xor	$r0, $r0 | 	xor	$r0, $r0 | ||||||
|  | 	.byte	0x67 | ||||||
| 	xor	$r1, $r1 | 	xor	$r1, $r1 | ||||||
| 	xor	$r2, $r2 | 	xor	$r2, $r2 | ||||||
| 	xor	$r3, $r3 | 	xor	$r3, $r3 | ||||||
| @@ -1564,22 +1581,22 @@ rsaz_1024_gather5_avx2: | |||||||
| ___ | ___ | ||||||
| $code.=<<___ if ($win64); | $code.=<<___ if ($win64); | ||||||
| 	lea	-0x88(%rsp),%rax | 	lea	-0x88(%rsp),%rax | ||||||
|  | 	vzeroupper | ||||||
| .LSEH_begin_rsaz_1024_gather5: | .LSEH_begin_rsaz_1024_gather5: | ||||||
| 	# I can't trust assembler to use specific encoding:-( | 	# I can't trust assembler to use specific encoding:-( | ||||||
| 	.byte	0x48,0x8d,0x60,0xe0		#lea	-0x20(%rax),%rsp | 	.byte	0x48,0x8d,0x60,0xe0		#lea	-0x20(%rax),%rsp | ||||||
| 	.byte	0x0f,0x29,0x70,0xe0		#movaps	%xmm6,-0x20(%rax) | 	.byte	0xc5,0xf8,0x29,0x70,0xe0	#vmovaps %xmm6,-0x20(%rax) | ||||||
| 	.byte	0x0f,0x29,0x78,0xf0		#movaps	%xmm7,-0x10(%rax) | 	.byte	0xc5,0xf8,0x29,0x78,0xf0	#vmovaps %xmm7,-0x10(%rax) | ||||||
| 	.byte	0x44,0x0f,0x29,0x00		#movaps	%xmm8,0(%rax) | 	.byte	0xc5,0x78,0x29,0x40,0x00	#vmovaps %xmm8,0(%rax) | ||||||
| 	.byte	0x44,0x0f,0x29,0x48,0x10	#movaps	%xmm9,0x10(%rax) | 	.byte	0xc5,0x78,0x29,0x48,0x10	#vmovaps %xmm9,0x10(%rax) | ||||||
| 	.byte	0x44,0x0f,0x29,0x50,0x20	#movaps	%xmm10,0x20(%rax) | 	.byte	0xc5,0x78,0x29,0x50,0x20	#vmovaps %xmm10,0x20(%rax) | ||||||
| 	.byte	0x44,0x0f,0x29,0x58,0x30	#movaps	%xmm11,0x30(%rax) | 	.byte	0xc5,0x78,0x29,0x58,0x30	#vmovaps %xmm11,0x30(%rax) | ||||||
| 	.byte	0x44,0x0f,0x29,0x60,0x40	#movaps	%xmm12,0x40(%rax) | 	.byte	0xc5,0x78,0x29,0x60,0x40	#vmovaps %xmm12,0x40(%rax) | ||||||
| 	.byte	0x44,0x0f,0x29,0x68,0x50	#movaps	%xmm13,0x50(%rax) | 	.byte	0xc5,0x78,0x29,0x68,0x50	#vmovaps %xmm13,0x50(%rax) | ||||||
| 	.byte	0x44,0x0f,0x29,0x70,0x60	#movaps	%xmm14,0x60(%rax) | 	.byte	0xc5,0x78,0x29,0x70,0x60	#vmovaps %xmm14,0x60(%rax) | ||||||
| 	.byte	0x44,0x0f,0x29,0x78,0x70	#movaps	%xmm15,0x70(%rax) | 	.byte	0xc5,0x78,0x29,0x78,0x70	#vmovaps %xmm15,0x70(%rax) | ||||||
| ___ | ___ | ||||||
| $code.=<<___; | $code.=<<___; | ||||||
| 	vzeroupper |  | ||||||
| 	lea	.Lgather_table(%rip),%r11 | 	lea	.Lgather_table(%rip),%r11 | ||||||
| 	mov	$power,%eax | 	mov	$power,%eax | ||||||
| 	and	\$3,$power | 	and	\$3,$power | ||||||
| @@ -1596,25 +1613,25 @@ $code.=<<___; | |||||||
| 	vpbroadcastb	2(%r11,%rax), %xmm14 | 	vpbroadcastb	2(%r11,%rax), %xmm14 | ||||||
| 	vpbroadcastb	1(%r11,%rax), %xmm15 | 	vpbroadcastb	1(%r11,%rax), %xmm15 | ||||||
|  |  | ||||||
| 	lea	($inp,$power),$inp | 	lea	64($inp,$power),$inp | ||||||
| 	mov	\$64,%r11			# size optimization | 	mov	\$64,%r11			# size optimization | ||||||
| 	mov	\$9,%eax | 	mov	\$9,%eax | ||||||
| 	jmp	.Loop_gather_1024 | 	jmp	.Loop_gather_1024 | ||||||
|  |  | ||||||
| .align	32 | .align	32 | ||||||
| .Loop_gather_1024: | .Loop_gather_1024: | ||||||
| 	vpand		($inp),			%xmm8,%xmm0 | 	vpand		-64($inp),		%xmm8,%xmm0 | ||||||
| 	vpand		($inp,%r11),		%xmm9,%xmm1 | 	vpand		($inp),			%xmm9,%xmm1 | ||||||
| 	vpand		($inp,%r11,2),		%xmm10,%xmm2 | 	vpand		64($inp),		%xmm10,%xmm2 | ||||||
| 	vpand		64($inp,%r11,2),	%xmm11,%xmm3 | 	vpand		($inp,%r11,2),		%xmm11,%xmm3 | ||||||
| 	 vpor					%xmm0,%xmm1,%xmm1 | 	 vpor					%xmm0,%xmm1,%xmm1 | ||||||
| 	vpand		($inp,%r11,4),		%xmm12,%xmm4 | 	vpand		64($inp,%r11,2),	%xmm12,%xmm4 | ||||||
| 	 vpor					%xmm2,%xmm3,%xmm3 | 	 vpor					%xmm2,%xmm3,%xmm3 | ||||||
| 	vpand		64($inp,%r11,4),	%xmm13,%xmm5 | 	vpand		($inp,%r11,4),		%xmm13,%xmm5 | ||||||
| 	 vpor					%xmm1,%xmm3,%xmm3 | 	 vpor					%xmm1,%xmm3,%xmm3 | ||||||
| 	vpand		-128($inp,%r11,8),	%xmm14,%xmm6 | 	vpand		64($inp,%r11,4),	%xmm14,%xmm6 | ||||||
| 	 vpor					%xmm4,%xmm5,%xmm5 | 	 vpor					%xmm4,%xmm5,%xmm5 | ||||||
| 	vpand		-64($inp,%r11,8),	%xmm15,%xmm2 | 	vpand		-128($inp,%r11,8),	%xmm15,%xmm2 | ||||||
| 	lea		($inp,%r11,8),$inp | 	lea		($inp,%r11,8),$inp | ||||||
| 	 vpor					%xmm3,%xmm5,%xmm5 | 	 vpor					%xmm3,%xmm5,%xmm5 | ||||||
| 	 vpor					%xmm2,%xmm6,%xmm6 | 	 vpor					%xmm2,%xmm6,%xmm6 | ||||||
| @@ -1798,16 +1815,16 @@ rsaz_se_handler: | |||||||
| 	.rva	.Lmul_1024_body,.Lmul_1024_epilogue | 	.rva	.Lmul_1024_body,.Lmul_1024_epilogue | ||||||
| .LSEH_info_rsaz_1024_gather5: | .LSEH_info_rsaz_1024_gather5: | ||||||
| 	.byte	0x01,0x33,0x16,0x00 | 	.byte	0x01,0x33,0x16,0x00 | ||||||
| 	.byte	0x33,0xf8,0x09,0x00	#movaps 0x90(rsp),xmm15 | 	.byte	0x36,0xf8,0x09,0x00	#vmovaps 0x90(rsp),xmm15 | ||||||
| 	.byte	0x2e,0xe8,0x08,0x00	#movaps 0x80(rsp),xmm14 | 	.byte	0x31,0xe8,0x08,0x00	#vmovaps 0x80(rsp),xmm14 | ||||||
| 	.byte	0x29,0xd8,0x07,0x00	#movaps 0x70(rsp),xmm13 | 	.byte	0x2c,0xd8,0x07,0x00	#vmovaps 0x70(rsp),xmm13 | ||||||
| 	.byte	0x24,0xc8,0x06,0x00	#movaps 0x60(rsp),xmm12 | 	.byte	0x27,0xc8,0x06,0x00	#vmovaps 0x60(rsp),xmm12 | ||||||
| 	.byte	0x1f,0xb8,0x05,0x00	#movaps 0x50(rsp),xmm11 | 	.byte	0x22,0xb8,0x05,0x00	#vmovaps 0x50(rsp),xmm11 | ||||||
| 	.byte	0x1a,0xa8,0x04,0x00	#movaps 0x40(rsp),xmm10 | 	.byte	0x1d,0xa8,0x04,0x00	#vmovaps 0x40(rsp),xmm10 | ||||||
| 	.byte	0x15,0x98,0x03,0x00	#movaps 0x30(rsp),xmm9 | 	.byte	0x18,0x98,0x03,0x00	#vmovaps 0x30(rsp),xmm9 | ||||||
| 	.byte	0x10,0x88,0x02,0x00	#movaps 0x20(rsp),xmm8 | 	.byte	0x13,0x88,0x02,0x00	#vmovaps 0x20(rsp),xmm8 | ||||||
| 	.byte	0x0c,0x78,0x01,0x00	#movaps 0x10(rsp),xmm7 | 	.byte	0x0e,0x78,0x01,0x00	#vmovaps 0x10(rsp),xmm7 | ||||||
| 	.byte	0x08,0x68,0x00,0x00	#movaps 0x00(rsp),xmm6 | 	.byte	0x09,0x68,0x00,0x00	#vmovaps 0x00(rsp),xmm6 | ||||||
| 	.byte	0x04,0x01,0x15,0x00	#sub	rsp,0xa8 | 	.byte	0x04,0x01,0x15,0x00	#sub	rsp,0xa8 | ||||||
| ___ | ___ | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,48 +1,60 @@ | |||||||
| #!/usr/bin/env perl | #!/usr/bin/env perl | ||||||
|  |  | ||||||
| #******************************************************************************# | ############################################################################## | ||||||
| #* Copyright(c) 2012, Intel Corp.                                             *# | #                                                                            # | ||||||
| #* Developers and authors:                                                    *# | #  Copyright (c) 2012, Intel Corporation                                     # | ||||||
| #* Shay Gueron (1, 2), and Vlad Krasnov (1)                                   *# | #                                                                            # | ||||||
| #* (1) Intel Architecture Group, Microprocessor and Chipset Development,      *# | #  All rights reserved.                                                      # | ||||||
| #*     Israel Development Center, Haifa, Israel                               *# | #                                                                            # | ||||||
| #* (2) University of Haifa                                                    *# | #  Redistribution and use in source and binary forms, with or without        # | ||||||
| #******************************************************************************# | #  modification, are permitted provided that the following conditions are    # | ||||||
| #* This submission to OpenSSL is to be made available under the OpenSSL       *# | #  met:                                                                      # | ||||||
| #* license, and only to the OpenSSL project, in order to allow integration    *# | #                                                                            # | ||||||
| #* into the publicly distributed code. ?                                      *# | #  *  Redistributions of source code must retain the above copyright         # | ||||||
| #* The use of this code, or portions of this code, or concepts embedded in    *# | #     notice, this list of conditions and the following disclaimer.          # | ||||||
| #* this code, or modification of this code and/or algorithm(s) in it, or the  *# | #                                                                            # | ||||||
| #* use of this code for any other purpose than stated above, requires special *# | #  *  Redistributions in binary form must reproduce the above copyright      # | ||||||
| #* licensing.                                                                 *# | #     notice, this list of conditions and the following disclaimer in the    # | ||||||
| #******************************************************************************# | #     documentation and/or other materials provided with the                 # | ||||||
| #******************************************************************************# | #     distribution.                                                          # | ||||||
| #* DISCLAIMER:                                                                *# | #                                                                            # | ||||||
| #* THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS AND THE COPYRIGHT OWNERS     *# | #  *  Neither the name of the Intel Corporation nor the names of its         # | ||||||
| #* ``AS IS''. ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *# | #     contributors may be used to endorse or promote products derived from   # | ||||||
| #* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR *# | #     this software without specific prior written permission.               # | ||||||
| #* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS OR THE COPYRIGHT*# | #                                                                            # | ||||||
| #* OWNERS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, *# | #                                                                            # | ||||||
| #* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF    *# | #  THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY          # | ||||||
| #* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS   *# | #  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE         # | ||||||
| #* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN    *# | #  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR        # | ||||||
| #* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)    *# | #  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR            # | ||||||
| #* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE *# | #  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     # | ||||||
| #* POSSIBILITY OF SUCH DAMAGE.                                                *# | #  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,       # | ||||||
| #******************************************************************************# | #  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR        # | ||||||
| #* Reference:                                                                 *# | #  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF    # | ||||||
| #* [1] S. Gueron, "Efficient Software Implementations of Modular              *# | #  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING      # | ||||||
| #*     Exponentiation", http://eprint.iacr.org/2011/239                       *# | #  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS        # | ||||||
| #* [2] S. Gueron, V. Krasnov. "Speeding up Big-Numbers Squaring".             *# | #  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              # | ||||||
| #*     IEEE Proceedings of 9th International Conference on Information        *# | #                                                                            # | ||||||
| #*     Technology: New Generations (ITNG 2012), 821-823 (2012).               *# | ############################################################################## | ||||||
| #* [3] S. Gueron, Efficient Software Implementations of Modular Exponentiation*# | # Developers and authors:                                                    # | ||||||
| #*     Journal of Cryptographic Engineering 2:31-43 (2012).                   *# | # Shay Gueron (1, 2), and Vlad Krasnov (1)                                   # | ||||||
| #* [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis    *# | # (1) Intel Architecture Group, Microprocessor and Chipset Development,      # | ||||||
| #*     resistant 512-bit and 1024-bit modular exponentiation for optimizing   *# | #     Israel Development Center, Haifa, Israel                               # | ||||||
| #*     RSA1024 and RSA2048 on x86_64 platforms",                              *# | # (2) University of Haifa                                                    # | ||||||
| #*     http://rt.openssl.org/Ticket/Display.html?id=2582&user=guest&pass=guest*# | ############################################################################## | ||||||
| ################################################################################ | # Reference:                                                                 # | ||||||
|  | # [1] S. Gueron, "Efficient Software Implementations of Modular              # | ||||||
|  | #     Exponentiation", http://eprint.iacr.org/2011/239                       # | ||||||
|  | # [2] S. Gueron, V. Krasnov. "Speeding up Big-Numbers Squaring".             # | ||||||
|  | #     IEEE Proceedings of 9th International Conference on Information        # | ||||||
|  | #     Technology: New Generations (ITNG 2012), 821-823 (2012).               # | ||||||
|  | # [3] S. Gueron, Efficient Software Implementations of Modular Exponentiation# | ||||||
|  | #     Journal of Cryptographic Engineering 2:31-43 (2012).                   # | ||||||
|  | # [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis    # | ||||||
|  | #     resistant 512-bit and 1024-bit modular exponentiation for optimizing   # | ||||||
|  | #     RSA1024 and RSA2048 on x86_64 platforms",                              # | ||||||
|  | #     http://rt.openssl.org/Ticket/Display.html?id=2582&user=guest&pass=guest# | ||||||
|  | ############################################################################## | ||||||
|  |  | ||||||
| # While original submission covers 512- and 1024-bit exponentiation, | # While original submission covers 512- and 1024-bit exponentiation, | ||||||
| # this module is limited to 512-bit version only (and as such | # this module is limited to 512-bit version only (and as such | ||||||
| @@ -1812,33 +1824,33 @@ $code.=<<___; | |||||||
| .align	32 | .align	32 | ||||||
| __rsaz_512_mulx: | __rsaz_512_mulx: | ||||||
| 	mulx	($ap), %rbx, %r8	# initial %rdx preloaded by caller | 	mulx	($ap), %rbx, %r8	# initial %rdx preloaded by caller | ||||||
| 	xor	$zero, $zero		# cf=0,of=0 | 	mov	\$-6, %rcx | ||||||
|  |  | ||||||
| 	mulx	8($ap), %rax, %r9 | 	mulx	8($ap), %rax, %r9 | ||||||
| 	movq	%rbx, 8(%rsp) | 	movq	%rbx, 8(%rsp) | ||||||
|  |  | ||||||
| 	mulx	16($ap), %rbx, %r10 | 	mulx	16($ap), %rbx, %r10 | ||||||
| 	adcx	%rax, %r8 | 	adc	%rax, %r8 | ||||||
|  |  | ||||||
| 	mulx	24($ap), %rax, %r11 | 	mulx	24($ap), %rax, %r11 | ||||||
| 	adcx	%rbx, %r9 | 	adc	%rbx, %r9 | ||||||
|  |  | ||||||
| 	.byte	0xc4,0x62,0xe3,0xf6,0xa6,0x20,0x00,0x00,0x00	# mulx	32($ap), %rbx, %r12 | 	mulx	32($ap), %rbx, %r12 | ||||||
| 	adcx	%rax, %r10 | 	adc	%rax, %r10 | ||||||
|  |  | ||||||
| 	mulx	40($ap), %rax, %r13 | 	mulx	40($ap), %rax, %r13 | ||||||
| 	adcx	%rbx, %r11 | 	adc	%rbx, %r11 | ||||||
|  |  | ||||||
| 	mulx	48($ap), %rbx, %r14 | 	mulx	48($ap), %rbx, %r14 | ||||||
| 	adcx	%rax, %r12 | 	adc	%rax, %r12 | ||||||
|  |  | ||||||
| 	mulx	56($ap), %rax, %r15 | 	mulx	56($ap), %rax, %r15 | ||||||
| 	 mov	8($bp), %rdx | 	 mov	8($bp), %rdx | ||||||
| 	adcx	%rbx, %r13 | 	adc	%rbx, %r13 | ||||||
| 	adcx	%rax, %r14 | 	adc	%rax, %r14 | ||||||
| 	adcx	$zero, %r15		# cf=0 | 	adc	\$0, %r15 | ||||||
|  |  | ||||||
| 	mov	\$-6, %rcx | 	xor	$zero, $zero		# cf=0,of=0 | ||||||
| 	jmp	.Loop_mulx | 	jmp	.Loop_mulx | ||||||
|  |  | ||||||
| .align	32 | .align	32 | ||||||
|   | |||||||
| @@ -1,32 +1,44 @@ | |||||||
| /****************************************************************************** | /***************************************************************************** | ||||||
| * Copyright(c) 2012, Intel Corp.                                              | *                                                                            * | ||||||
| * Developers and authors:                                                     | *  Copyright (c) 2012, Intel Corporation                                     * | ||||||
| * Shay Gueron (1, 2), and Vlad Krasnov (1)                                    | *                                                                            * | ||||||
| * (1) Intel Corporation, Israel Development Center, Haifa, Israel                                | *  All rights reserved.                                                      * | ||||||
| * (2) University of Haifa, Israel                                               | *                                                                            * | ||||||
|  | *  Redistribution and use in source and binary forms, with or without        * | ||||||
|  | *  modification, are permitted provided that the following conditions are    * | ||||||
|  | *  met:                                                                      * | ||||||
|  | *                                                                            * | ||||||
|  | *  *  Redistributions of source code must retain the above copyright         * | ||||||
|  | *     notice, this list of conditions and the following disclaimer.          * | ||||||
|  | *                                                                            * | ||||||
|  | *  *  Redistributions in binary form must reproduce the above copyright      * | ||||||
|  | *     notice, this list of conditions and the following disclaimer in the    * | ||||||
|  | *     documentation and/or other materials provided with the                 * | ||||||
|  | *     distribution.                                                          * | ||||||
|  | *                                                                            * | ||||||
|  | *  *  Neither the name of the Intel Corporation nor the names of its         * | ||||||
|  | *     contributors may be used to endorse or promote products derived from   * | ||||||
|  | *     this software without specific prior written permission.               * | ||||||
|  | *                                                                            * | ||||||
|  | *                                                                            * | ||||||
|  | *  THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY          * | ||||||
|  | *  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE         * | ||||||
|  | *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR        * | ||||||
|  | *  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR            * | ||||||
|  | *  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     * | ||||||
|  | *  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,       * | ||||||
|  | *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR        * | ||||||
|  | *  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF    * | ||||||
|  | *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING      * | ||||||
|  | *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS        * | ||||||
|  | *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              * | ||||||
|  | *                                                                            * | ||||||
| ****************************************************************************** | ****************************************************************************** | ||||||
| * LICENSE:                                                                 | * Developers and authors:                                                    * | ||||||
| * This submission to OpenSSL is to be made available under the OpenSSL   | * Shay Gueron (1, 2), and Vlad Krasnov (1)                                   * | ||||||
| * license, and only to the OpenSSL project, in order to allow integration     | * (1) Intel Corporation, Israel Development Center, Haifa, Israel            * | ||||||
| * into the publicly distributed code.  | * (2) University of Haifa, Israel                                            * | ||||||
| * The use of this code, or portions of this code, or concepts embedded in | *****************************************************************************/ | ||||||
| * this code, or modification of this code and/or algorithm(s) in it, or the |  | ||||||
| * use of this code for any other purpose than stated above, requires special |  | ||||||
| * licensing.                                                                   |  | ||||||
| ****************************************************************************** |  | ||||||
| * DISCLAIMER:                                                                 |  | ||||||
| * THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS AND THE COPYRIGHT OWNERS      |  | ||||||
| * ``AS IS''. ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED  |  | ||||||
| * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR  |  | ||||||
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS OR THE COPYRIGHT |  | ||||||
| * OWNERS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,  |  | ||||||
| * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF     |  | ||||||
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS    |  | ||||||
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN     |  | ||||||
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)     |  | ||||||
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE  |  | ||||||
| * POSSIBILITY OF SUCH DAMAGE.                                                 |  | ||||||
| ******************************************************************************/ |  | ||||||
|  |  | ||||||
| #include "rsaz_exp.h" | #include "rsaz_exp.h" | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Andy Polyakov
					Andy Polyakov