diff options
Diffstat (limited to '')
-rw-r--r-- | mpi/Makefile.am | 1 | ||||
-rw-r--r-- | mpi/Makefile.in | 15 | ||||
-rw-r--r-- | mpi/config.links | 6 | ||||
-rw-r--r-- | mpi/i586/README | 26 | ||||
-rw-r--r-- | mpi/i586/distfiles | 8 | ||||
-rw-r--r-- | mpi/i586/mpih-add1.S | 134 | ||||
-rw-r--r-- | mpi/i586/mpih-mul1.S | 89 | ||||
-rw-r--r-- | mpi/i586/mpih-mul2.S | 94 | ||||
-rw-r--r-- | mpi/i586/mpih-mul3.S | 94 | ||||
-rw-r--r-- | mpi/i586/mpih-shift.S | 426 | ||||
-rw-r--r-- | mpi/i586/mpih-sub1.S | 143 | ||||
-rw-r--r-- | mpi/mpi-inv.c | 103 | ||||
-rw-r--r-- | mpi/mpi-mpow.c | 119 |
13 files changed, 1247 insertions, 11 deletions
diff --git a/mpi/Makefile.am b/mpi/Makefile.am index 1c32e1312..2801a7519 100644 --- a/mpi/Makefile.am +++ b/mpi/Makefile.am @@ -24,6 +24,7 @@ libmpi_a_SOURCES = longlong.h \ mpi-inv.c \ mpi-mul.c \ mpi-pow.c \ + mpi-mpow.c \ mpi-scan.c \ mpicoder.c \ mpih-cmp.c \ diff --git a/mpi/Makefile.in b/mpi/Makefile.in index aae7160c0..bcbbc6416 100644 --- a/mpi/Makefile.in +++ b/mpi/Makefile.in @@ -106,6 +106,7 @@ libmpi_a_SOURCES = longlong.h \ mpi-inv.c \ mpi-mul.c \ mpi-pow.c \ + mpi-mpow.c \ mpi-scan.c \ mpicoder.c \ mpih-cmp.c \ @@ -138,13 +139,13 @@ LIBS = @LIBS@ libmpi_a_DEPENDENCIES = mpih-mul1.o mpih-mul2.o mpih-mul3.o mpih-add1.o \ mpih-sub1.o mpih-shift.o libmpi_a_OBJECTS = mpi-add.o mpi-bit.o mpi-cmp.o mpi-div.o mpi-gcd.o \ -mpi-inv.o mpi-mul.o mpi-pow.o mpi-scan.o mpicoder.o mpih-cmp.o \ -mpih-add.o mpih-sub.o mpih-div.o mpih-mul.o mpiutil.o +mpi-inv.o mpi-mul.o mpi-pow.o mpi-mpow.o mpi-scan.o mpicoder.o \ +mpih-cmp.o mpih-add.o mpih-sub.o mpih-div.o mpih-mul.o mpiutil.o AR = ar CFLAGS = @CFLAGS@ COMPILE = $(CC) $(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS) LINK = $(CC) $(CFLAGS) $(LDFLAGS) -o $@ -DIST_COMMON = Makefile.am Makefile.in +DIST_COMMON = ChangeLog Makefile.am Makefile.in DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) @@ -152,10 +153,10 @@ DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) TAR = tar GZIP = --best DEP_FILES = .deps/mpi-add.P .deps/mpi-bit.P .deps/mpi-cmp.P \ -.deps/mpi-div.P .deps/mpi-gcd.P .deps/mpi-inv.P .deps/mpi-mul.P \ -.deps/mpi-pow.P .deps/mpi-scan.P .deps/mpicoder.P .deps/mpih-add.P \ -.deps/mpih-cmp.P .deps/mpih-div.P .deps/mpih-mul.P .deps/mpih-sub.P \ -.deps/mpiutil.P +.deps/mpi-div.P .deps/mpi-gcd.P .deps/mpi-inv.P .deps/mpi-mpow.P \ +.deps/mpi-mul.P .deps/mpi-pow.P .deps/mpi-scan.P .deps/mpicoder.P \ +.deps/mpih-add.P .deps/mpih-cmp.P .deps/mpih-div.P .deps/mpih-mul.P \ +.deps/mpih-sub.P .deps/mpiutil.P SOURCES = $(libmpi_a_SOURCES) OBJECTS = $(libmpi_a_OBJECTS) diff --git a/mpi/config.links b/mpi/config.links index 923e18b0d..cf370e054 100644 --- a/mpi/config.links +++ b/mpi/config.links @@ -10,7 +10,7 @@ test -d ./mpi || mkdir ./mpi echo '/* created by config.links - do not edit */' >./mpi/asm-syntax.h case "${target}" in - i[345]86*-*-linuxaout* | i[345]86*-*-linuxoldld* | i[345]86*-*-*bsd*) + i[34]86*-*-linuxaout* | i[34]86*-*-linuxoldld* | i[34]86*-*-*bsd*) echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h path="i386" @@ -20,14 +20,14 @@ case "${target}" in cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h path="i586 i386" ;; - i[3456]86*-*-*) + i[34]86*-*-*) echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h path="i386" ;; i[56]86*-*-* | pentium-*-* | pentiumpro-*-*) echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h - cat $srcdir/mpi/i586/syntax.h >>./mpi/asm-syntax.h + cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h path="i586 i386" ;; alpha*-*-*) diff --git a/mpi/i586/README b/mpi/i586/README new file mode 100644 index 000000000..d73b08268 --- /dev/null +++ b/mpi/i586/README @@ -0,0 +1,26 @@ +This directory contains mpn functions optimized for Intel Pentium +processors. + +RELEVANT OPTIMIZATION ISSUES + +1. Pentium doesn't allocate cache lines on writes, unlike most other modern +processors. Since the functions in the mpn class do array writes, we have to +handle allocating the destination cache lines by reading a word from it in the +loops, to achieve the best performance. + +2. Pairing of memory operations requires that the two issued operations refer +to different cache banks. The simplest way to insure this is to read/write +two words from the same object. If we make operations on different objects, +they might or might not be to the same cache bank. + +STATUS + +1. mpn_lshift and mpn_rshift run at about 6 cycles/limb, but the Pentium +documentation indicates that they should take only 43/8 = 5.375 cycles/limb, +or 5 cycles/limb asymptotically. + +2. mpn_add_n and mpn_sub_n run at asymptotically 2 cycles/limb. Due to loop +overhead and other delays (cache refill?), they run at or near 2.5 cycles/limb. + +3. mpn_mul_1, mpn_addmul_1, mpn_submul_1 all run 1 cycle faster than they +should... diff --git a/mpi/i586/distfiles b/mpi/i586/distfiles new file mode 100644 index 000000000..951480fde --- /dev/null +++ b/mpi/i586/distfiles @@ -0,0 +1,8 @@ +mpih-add1.S +mpih-mul1.S +mpih-mul2.S +mpih-mul3.S +mpih-shift.S +mpih-sub1.S +README + diff --git a/mpi/i586/mpih-add1.S b/mpi/i586/mpih-add1.S new file mode 100644 index 000000000..e9883285d --- /dev/null +++ b/mpi/i586/mpih-add1.S @@ -0,0 +1,134 @@ +/* i80586 add_n -- Add two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * + * Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc. + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * mpihelp_add_n( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_ptr_t s2_ptr, (sp + 12) + * mpi_size_t size) (sp + 16) + */ + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(mpihelp_add_n) +C_SYMBOL_NAME(mpihelp_add_n:) + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s1_ptr */ + movl 28(%esp),%ebp /* s2_ptr */ + movl 32(%esp),%ecx /* size */ + + movl (%ebp),%ebx + + decl %ecx + movl %ecx,%edx + shrl $3,%ecx + andl $7,%edx + testl %ecx,%ecx /* zero carry flag */ + jz Lend + pushl %edx + + ALIGN (3) +Loop: movl 28(%edi),%eax /* fetch destination cache line */ + leal 32(%edi),%edi + +L1: movl (%esi),%eax + movl 4(%esi),%edx + adcl %ebx,%eax + movl 4(%ebp),%ebx + adcl %ebx,%edx + movl 8(%ebp),%ebx + movl %eax,-32(%edi) + movl %edx,-28(%edi) + +L2: movl 8(%esi),%eax + movl 12(%esi),%edx + adcl %ebx,%eax + movl 12(%ebp),%ebx + adcl %ebx,%edx + movl 16(%ebp),%ebx + movl %eax,-24(%edi) + movl %edx,-20(%edi) + +L3: movl 16(%esi),%eax + movl 20(%esi),%edx + adcl %ebx,%eax + movl 20(%ebp),%ebx + adcl %ebx,%edx + movl 24(%ebp),%ebx + movl %eax,-16(%edi) + movl %edx,-12(%edi) + +L4: movl 24(%esi),%eax + movl 28(%esi),%edx + adcl %ebx,%eax + movl 28(%ebp),%ebx + adcl %ebx,%edx + movl 32(%ebp),%ebx + movl %eax,-8(%edi) + movl %edx,-4(%edi) + + leal 32(%esi),%esi + leal 32(%ebp),%ebp + decl %ecx + jnz Loop + + popl %edx +Lend: + decl %edx /* test %edx w/o clobbering carry */ + js Lend2 + incl %edx +Loop2: + leal 4(%edi),%edi + movl (%esi),%eax + adcl %ebx,%eax + movl 4(%ebp),%ebx + movl %eax,-4(%edi) + leal 4(%esi),%esi + leal 4(%ebp),%ebp + decl %edx + jnz Loop2 +Lend2: + movl (%esi),%eax + adcl %ebx,%eax + movl %eax,(%edi) + + sbbl %eax,%eax + negl %eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + + diff --git a/mpi/i586/mpih-mul1.S b/mpi/i586/mpih-mul1.S new file mode 100644 index 000000000..c0bedec0a --- /dev/null +++ b/mpi/i586/mpih-mul1.S @@ -0,0 +1,89 @@ +/* i80586 mul_1 -- Multiply a limb vector with a limb and store + * the result in a second limb vector. + * Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * mpihelp_mul_1( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_size_t s1_size, (sp + 12) + * mpi_limb_t s2_limb) (sp + 16) + */ + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(mpihelp_mul_1) +C_SYMBOL_NAME(mpihelp_mul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) + +Loop: INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + + INSN1(mul,l ,R(s2_limb)) + + INSN2(add,l ,R(ebx),R(eax)) + + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) + INSN1(inc,l ,R(size)) + + INSN2(mov,l ,R(ebx),R(edx)) + INSN1(jnz, ,Loop) + + INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),R(ebx)) + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret + diff --git a/mpi/i586/mpih-mul2.S b/mpi/i586/mpih-mul2.S new file mode 100644 index 000000000..6b5646239 --- /dev/null +++ b/mpi/i586/mpih-mul2.S @@ -0,0 +1,94 @@ +/* i80586 addmul_1 -- Multiply a limb vector with a limb and add + * the result to a second limb vector. + * Copyright (c) 1997 by Werner Koch (dd9jn) + * Copyright (C) 1992, 1994 Free Software Foundation, Inc. + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * mpihelp_addmul_1( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_size_t s1_size, (sp + 12) + * mpi_limb_t s2_limb) (sp + 16) + */ + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(mpihelp_addmul_1) +C_SYMBOL_NAME(mpihelp_addmul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) + +Loop: INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + + INSN1(mul,l ,R(s2_limb)) + + INSN2(add,l ,R(eax),R(ebx)) + INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4)) + + INSN2(adc,l ,R(edx),$0) + INSN2(add,l ,R(ebx),R(eax)) + + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) + INSN1(inc,l ,R(size)) + + INSN2(mov,l ,R(ebx),R(edx)) + INSN1(jnz, ,Loop) + + INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),R(ebx)) + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret + diff --git a/mpi/i586/mpih-mul3.S b/mpi/i586/mpih-mul3.S new file mode 100644 index 000000000..69b7f4672 --- /dev/null +++ b/mpi/i586/mpih-mul3.S @@ -0,0 +1,94 @@ +/* i80586 submul_1 -- Multiply a limb vector with a limb and add + * the result to a second limb vector. + * Copyright (c) 1997 by Werner Koch (dd9jn) + * Copyright (C) 1992, 1994 Free Software Foundation, Inc. + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * mpihelp_submul_1( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_size_t s1_size, (sp + 12) + * mpi_limb_t s2_limb) (sp + 16) + */ + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(mpihelp_submul_1) +C_SYMBOL_NAME(mpihelp_submul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) + +Loop: INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + + INSN1(mul,l ,R(s2_limb)) + + INSN2(add,l ,R(eax),R(ebx)) + INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4)) + + INSN2(adc,l ,R(edx),$0) + INSN2(sub,l ,R(ebx),R(eax)) + + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) + INSN1(inc,l ,R(size)) + + INSN2(mov,l ,R(ebx),R(edx)) + INSN1(jnz, ,Loop) + + INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),R(ebx)) + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret + diff --git a/mpi/i586/mpih-shift.S b/mpi/i586/mpih-shift.S new file mode 100644 index 000000000..9f1563810 --- /dev/null +++ b/mpi/i586/mpih-shift.S @@ -0,0 +1,426 @@ +/* i80586 rshift, lshift + * Copyright (c) 1997 by Werner Koch (dd9jn) + * Copyright (C) 1992, 1994 Free Software Foundation, Inc. + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * mpihelp_lshift( mpi_ptr_t wp, (sp + 4) + * mpi_ptr_t up, (sp + 8) + * mpi_size_t usize, (sp + 12) + * unsigned cnt) (sp + 16) + */ + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(mpihelp_lshift) +C_SYMBOL_NAME(mpihelp_lshift:) + + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s_ptr */ + movl 28(%esp),%ebp /* size */ + movl 32(%esp),%ecx /* cnt */ + +/* We can use faster code for shift-by-1 under certain conditions. */ + cmp $1,%ecx + jne Lnormal + leal 4(%esi),%eax + cmpl %edi,%eax + jnc Lspecial /* jump if s_ptr + 1 >= res_ptr */ + leal (%esi,%ebp,4),%eax + cmpl %eax,%edi + jnc Lspecial /* jump if res_ptr >= s_ptr + size */ + +Lnormal: + leal -4(%edi,%ebp,4),%edi + leal -4(%esi,%ebp,4),%esi + + movl (%esi),%edx + subl $4,%esi + xorl %eax,%eax + shldl %cl,%edx,%eax /* compute carry limb */ + pushl %eax /* push carry limb onto stack */ + + decl %ebp + pushl %ebp + shrl $3,%ebp + jz Lend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +Loop: movl -28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl -4(%esi),%edx + shldl %cl,%eax,%ebx + shldl %cl,%edx,%eax + movl %ebx,(%edi) + movl %eax,-4(%edi) + + movl -8(%esi),%ebx + movl -12(%esi),%eax + shldl %cl,%ebx,%edx + shldl %cl,%eax,%ebx + movl %edx,-8(%edi) + movl %ebx,-12(%edi) + + movl -16(%esi),%edx + movl -20(%esi),%ebx + shldl %cl,%edx,%eax + shldl %cl,%ebx,%edx + movl %eax,-16(%edi) + movl %edx,-20(%edi) + + movl -24(%esi),%eax + movl -28(%esi),%edx + shldl %cl,%eax,%ebx + shldl %cl,%edx,%eax + movl %ebx,-24(%edi) + movl %eax,-28(%edi) + + subl $32,%esi + subl $32,%edi + decl %ebp + jnz Loop + +Lend: popl %ebp + andl $7,%ebp + jz Lend2 +Loop2: movl (%esi),%eax + shldl %cl,%eax,%edx + movl %edx,(%edi) + movl %eax,%edx + subl $4,%esi + subl $4,%edi + decl %ebp + jnz Loop2 + +Lend2: shll %cl,%edx /* compute least significant limb */ + movl %edx,(%edi) /* store it */ + + popl %eax /* pop carry limb */ + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + +/* We loop from least significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. +*/ + +Lspecial: + movl (%esi),%edx + addl $4,%esi + + decl %ebp + pushl %ebp + shrl $3,%ebp + + addl %edx,%edx + incl %ebp + decl %ebp + jz LLend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +LLoop: movl 28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl 4(%esi),%edx + adcl %eax,%eax + movl %ebx,(%edi) + adcl %edx,%edx + movl %eax,4(%edi) + + movl 8(%esi),%ebx + movl 12(%esi),%eax + adcl %ebx,%ebx + movl %edx,8(%edi) + adcl %eax,%eax + movl %ebx,12(%edi) + + movl 16(%esi),%edx + movl 20(%esi),%ebx + adcl %edx,%edx + movl %eax,16(%edi) + adcl %ebx,%ebx + movl %edx,20(%edi) + + movl 24(%esi),%eax + movl 28(%esi),%edx + adcl %eax,%eax + movl %ebx,24(%edi) + adcl %edx,%edx + movl %eax,28(%edi) + + leal 32(%esi),%esi /* use leal not to clobber carry */ + leal 32(%edi),%edi + decl %ebp + jnz LLoop + +LLend: popl %ebp + sbbl %eax,%eax /* save carry in %eax */ + andl $7,%ebp + jz LLend2 + addl %eax,%eax /* restore carry from eax */ +LLoop2: movl %edx,%ebx + movl (%esi),%edx + adcl %edx,%edx + movl %ebx,(%edi) + + leal 4(%esi),%esi /* use leal not to clobber carry */ + leal 4(%edi),%edi + decl %ebp + jnz LLoop2 + + jmp LL1 +LLend2: addl %eax,%eax /* restore carry from eax */ +LL1: movl %edx,(%edi) /* store last limb */ + + sbbl %eax,%eax + negl %eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + + + + +/******************* + * mpi_limb_t + * mpihelp_rshift( mpi_ptr_t wp, (sp + 4) + * mpi_ptr_t up, (sp + 8) + * mpi_size_t usize, (sp + 12) + * unsigned cnt) (sp + 16) + */ + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(mpihelp_rshift) +C_SYMBOL_NAME(mpihelp_rshift:) + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s_ptr */ + movl 28(%esp),%ebp /* size */ + movl 32(%esp),%ecx /* cnt */ + +/* We can use faster code for shift-by-1 under certain conditions. */ + cmp $1,%ecx + jne Rnormal + leal 4(%edi),%eax + cmpl %esi,%eax + jnc Rspecial /* jump if res_ptr + 1 >= s_ptr */ + leal (%edi,%ebp,4),%eax + cmpl %eax,%esi + jnc Rspecial /* jump if s_ptr >= res_ptr + size */ + +Rnormal: + movl (%esi),%edx + addl $4,%esi + xorl %eax,%eax + shrdl %cl,%edx,%eax /* compute carry limb */ + pushl %eax /* push carry limb onto stack */ + + decl %ebp + pushl %ebp + shrl $3,%ebp + jz Rend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +Roop: movl 28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl 4(%esi),%edx + shrdl %cl,%eax,%ebx + shrdl %cl,%edx,%eax + movl %ebx,(%edi) + movl %eax,4(%edi) + + movl 8(%esi),%ebx + movl 12(%esi),%eax + shrdl %cl,%ebx,%edx + shrdl %cl,%eax,%ebx + movl %edx,8(%edi) + movl %ebx,12(%edi) + + movl 16(%esi),%edx + movl 20(%esi),%ebx + shrdl %cl,%edx,%eax + shrdl %cl,%ebx,%edx + movl %eax,16(%edi) + movl %edx,20(%edi) + + movl 24(%esi),%eax + movl 28(%esi),%edx + shrdl %cl,%eax,%ebx + shrdl %cl,%edx,%eax + movl %ebx,24(%edi) + movl %eax,28(%edi) + + addl $32,%esi + addl $32,%edi + decl %ebp + jnz Roop + +Rend: popl %ebp + andl $7,%ebp + jz Rend2 +Roop2: movl (%esi),%eax + shrdl %cl,%eax,%edx /* compute result limb */ + movl %edx,(%edi) + movl %eax,%edx + addl $4,%esi + addl $4,%edi + decl %ebp + jnz Roop2 + +Rend2: shrl %cl,%edx /* compute most significant limb */ + movl %edx,(%edi) /* store it */ + + popl %eax /* pop carry limb */ + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + +/* We loop from least significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. +*/ + +Rspecial: + leal -4(%edi,%ebp,4),%edi + leal -4(%esi,%ebp,4),%esi + + movl (%esi),%edx + subl $4,%esi + + decl %ebp + pushl %ebp + shrl $3,%ebp + + shrl $1,%edx + incl %ebp + decl %ebp + jz RLend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +RLoop: movl -28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl -4(%esi),%edx + rcrl $1,%eax + movl %ebx,(%edi) + rcrl $1,%edx + movl %eax,-4(%edi) + + movl -8(%esi),%ebx + movl -12(%esi),%eax + rcrl $1,%ebx + movl %edx,-8(%edi) + rcrl $1,%eax + movl %ebx,-12(%edi) + + movl -16(%esi),%edx + movl -20(%esi),%ebx + rcrl $1,%edx + movl %eax,-16(%edi) + rcrl $1,%ebx + movl %edx,-20(%edi) + + movl -24(%esi),%eax + movl -28(%esi),%edx + rcrl $1,%eax + movl %ebx,-24(%edi) + rcrl $1,%edx + movl %eax,-28(%edi) + + leal -32(%esi),%esi /* use leal not to clobber carry */ + leal -32(%edi),%edi + decl %ebp + jnz RLoop + +RLend: popl %ebp + sbbl %eax,%eax /* save carry in %eax */ + andl $7,%ebp + jz RLend2 + addl %eax,%eax /* restore carry from eax */ +RLoop2: movl %edx,%ebx + movl (%esi),%edx + rcrl $1,%edx + movl %ebx,(%edi) + + leal -4(%esi),%esi /* use leal not to clobber carry */ + leal -4(%edi),%edi + decl %ebp + jnz RLoop2 + + jmp RL1 +RLend2: addl %eax,%eax /* restore carry from eax */ +RL1: movl %edx,(%edi) /* store last limb */ + + movl $0,%eax + rcrl $1,%eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + diff --git a/mpi/i586/mpih-sub1.S b/mpi/i586/mpih-sub1.S new file mode 100644 index 000000000..1f5c0bfdd --- /dev/null +++ b/mpi/i586/mpih-sub1.S @@ -0,0 +1,143 @@ +/* i80586 sub_n -- Sub two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * mpihelp_sub_n( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_ptr_t s2_ptr, (sp + 12) + * mpi_size_t size) (sp + 16) + */ + + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(mpihelp_sub_n) +C_SYMBOL_NAME(mpihelp_sub_n:) + + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s1_ptr */ + movl 28(%esp),%ebp /* s2_ptr */ + movl 32(%esp),%ecx /* size */ + + movl (%ebp),%ebx + + decl %ecx + movl %ecx,%edx + shrl $3,%ecx + andl $7,%edx + testl %ecx,%ecx /* zero carry flag */ + jz Lend + pushl %edx + + ALIGN (3) +Loop: movl 28(%edi),%eax /* fetch destination cache line */ + leal 32(%edi),%edi + +L1: movl (%esi),%eax + movl 4(%esi),%edx + sbbl %ebx,%eax + movl 4(%ebp),%ebx + sbbl %ebx,%edx + movl 8(%ebp),%ebx + movl %eax,-32(%edi) + movl %edx,-28(%edi) + +L2: movl 8(%esi),%eax + movl 12(%esi),%edx + sbbl %ebx,%eax + movl 12(%ebp),%ebx + sbbl %ebx,%edx + movl 16(%ebp),%ebx + movl %eax,-24(%edi) + movl %edx,-20(%edi) + +L3: movl 16(%esi),%eax + movl 20(%esi),%edx + sbbl %ebx,%eax + movl 20(%ebp),%ebx + sbbl %ebx,%edx + movl 24(%ebp),%ebx + movl %eax,-16(%edi) + movl %edx,-12(%edi) + +L4: movl 24(%esi),%eax + movl 28(%esi),%edx + sbbl %ebx,%eax + movl 28(%ebp),%ebx + sbbl %ebx,%edx + movl 32(%ebp),%ebx + movl %eax,-8(%edi) + movl %edx,-4(%edi) + + leal 32(%esi),%esi + leal 32(%ebp),%ebp + decl %ecx + jnz Loop + + popl %edx +Lend: + decl %edx /* test %edx w/o clobbering carry */ + js Lend2 + incl %edx +Loop2: + leal 4(%edi),%edi + movl (%esi),%eax + sbbl %ebx,%eax + movl 4(%ebp),%ebx + movl %eax,-4(%edi) + leal 4(%esi),%esi + leal 4(%ebp),%ebp + decl %edx + jnz Loop2 +Lend2: + movl (%esi),%eax + sbbl %ebx,%eax + movl %eax,(%edi) + + sbbl %eax,%eax + negl %eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + diff --git a/mpi/mpi-inv.c b/mpi/mpi-inv.c index 28cde00b6..53ef356b3 100644 --- a/mpi/mpi-inv.c +++ b/mpi/mpi-inv.c @@ -76,7 +76,7 @@ mpi_invm( MPI x, MPI a, MPI n ) mpi_free(t3); mpi_free(u); mpi_free(v); - #else + #elif 0 /* Extended Euclid's algorithm (See TAOPC Vol II, 4.5.2, Alg X) * modified according to Michael Penk's solution for Exercice 35 */ @@ -156,6 +156,107 @@ mpi_invm( MPI x, MPI a, MPI n ) mpi_free(t1); mpi_free(t2); mpi_free(t3); + #else + /* Extended Euclid's algorithm (See TAOPC Vol II, 4.5.2, Alg X) + * modified according to Michael Penk's solution for Exercice 35 + * with further enhancement */ + MPI u, v, u1, u2=NULL, u3, v1, v2=NULL, v3, t1, t2=NULL, t3; + unsigned k; + int sign; + int odd ; + + u = mpi_copy(a); + v = mpi_copy(n); + for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) { + mpi_rshift(u, u, 1); + mpi_rshift(v, v, 1); + } + odd = mpi_test_bit(v,0); + + u1 = mpi_alloc_set_ui(1); + if( !odd ) + u2 = mpi_alloc_set_ui(0); + u3 = mpi_copy(u); + v1 = mpi_copy(v); + if( !odd ) { + v2 = mpi_alloc( mpi_get_nlimbs(u) ); + mpi_sub( v2, u1, u ); /* U is used as const 1 */ + } + v3 = mpi_copy(v); + if( mpi_test_bit(u, 0) ) { /* u is odd */ + t1 = mpi_alloc_set_ui(0); + if( !odd ) { + t2 = mpi_alloc_set_ui(1); t2->sign = 1; + } + t3 = mpi_copy(v); t3->sign = !t3->sign; + goto Y4; + } + else { + t1 = mpi_alloc_set_ui(1); + if( !odd ) + t2 = mpi_alloc_set_ui(0); + t3 = mpi_copy(u); + } + do { + do { + if( !odd ) { + if( mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0) ) { /* one is odd */ + mpi_add(t1, t1, v); + mpi_sub(t2, t2, u); + } + mpi_rshift(t1, t1, 1); + mpi_rshift(t2, t2, 1); + mpi_rshift(t3, t3, 1); + } + else { + if( mpi_test_bit(t1, 0) ) + mpi_add(t1, t1, v); + mpi_rshift(t1, t1, 1); + mpi_rshift(t3, t3, 1); + } + Y4: + } while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */ + + if( !t3->sign ) { + mpi_set(u1, t1); + if( !odd ) + mpi_set(u2, t2); + mpi_set(u3, t3); + } + else { + mpi_sub(v1, v, t1); + sign = u->sign; u->sign = !u->sign; + if( !odd ) + mpi_sub(v2, u, t2); + u->sign = sign; + sign = t3->sign; t3->sign = !t3->sign; + mpi_set(v3, t3); + t3->sign = sign; + } + mpi_sub(t1, u1, v1); + if( !odd ) + mpi_sub(t2, u2, v2); + mpi_sub(t3, u3, v3); + if( t1->sign ) { + mpi_add(t1, t1, v); + if( !odd ) + mpi_sub(t2, t2, u); + } + } while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */ + /* mpi_lshift( u3, k ); */ + mpi_set(x, u1); + + mpi_free(u1); + mpi_free(v1); + mpi_free(t1); + if( !odd ) { + mpi_free(u2); + mpi_free(v2); + mpi_free(t2); + } + mpi_free(u3); + mpi_free(v3); + mpi_free(t3); #endif } diff --git a/mpi/mpi-mpow.c b/mpi/mpi-mpow.c new file mode 100644 index 000000000..5ac3c6399 --- /dev/null +++ b/mpi/mpi-mpow.c @@ -0,0 +1,119 @@ +/* mpi-mpow.c - MPI functions + * Copyright (c) 1998 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" +#include "longlong.h" +#include <assert.h> + +static int +build_index( MPI *exparray, int k, int i, int t ) +{ + int j, bitno; + int index = 0; + + bitno = t-i; + for(j=k-1; j >= 0; j-- ) { + index <<= 1; + if( mpi_test_bit( exparray[j], bitno ) ) + index |= 1; + } + /*log_debug("t=%d i=%d index=%d\n", t, i, index );*/ + return index; +} + +/**************** + * RES = (BASE[0] ^ EXP[0]) * (BASE[1] ^ EXP[1]) * ... * mod M + */ +void +mpi_mulpowm( MPI res, MPI *basearray, MPI *exparray, MPI m) +{ + int k; /* number of elements */ + int t; /* bit size of largest exponent */ + int i, j, idx; + MPI *G; /* table with precomputed values of size 2^k */ + MPI tmp; + + for(k=0; basearray[k]; k++ ) + ; + assert(k); + for(t=0, i=0; (tmp=exparray[i]); i++ ) { + /*log_mpidump("exp: ", tmp );*/ + j = mpi_get_nbits(tmp); + if( j > t ) + t = j; + } + /*log_mpidump("mod: ", m );*/ + assert(i==k); + assert(t); + assert( k < 10 ); + + G = m_alloc_clear( (1<<k) * sizeof *G ); + #if 0 + /* do the precomputation */ + G[0] = mpi_alloc_set_ui( 1 ); + for(i=1; i < (1<<k); i++ ) { + for(j=0; j < k; j++ ) { + if( (i & (1<<j) ) ) { + if( !G[i] ) + G[i] = mpi_copy( basearray[j] ); + else + mpi_mulm( G[i], G[i], basearray[j], m ); + } + } + if( !G[i] ) + G[i] = mpi_alloc(0); + } + #endif + /* and calculate */ + tmp = mpi_alloc( mpi_get_nlimbs(m)+1 ); + mpi_set_ui( res, 1 ); + for(i = 1; i <= t; i++ ) { + mpi_mulm(tmp, res, res, m ); + idx = build_index( exparray, k, i, t ); + assert( idx >= 0 && idx < (1<<k) ); + if( !G[idx] ) { + if( !idx ) + G[0] = mpi_alloc_set_ui( 1 ); + else { + for(j=0; j < k; j++ ) { + if( (idx & (1<<j) ) ) { + if( !G[idx] ) + G[idx] = mpi_copy( basearray[j] ); + else + mpi_mulm( G[idx], G[idx], basearray[j], m ); + } + } + if( !G[idx] ) + G[idx] = mpi_alloc(0); + } + } + mpi_mulm(res, tmp, G[idx], m ); + } + + /* cleanup */ + m_free(tmp); + for(i=0; i < (1<<k); i++ ) + mpi_free(G[i]); + m_free(G); +} + |