13 files changed, 1247 insertions, 11 deletions
diff --git a/mpi/Makefile.am b/mpi/Makefile.am
index 1c32e1312..2801a7519 100644
--- a/mpi/Makefile.am
+++ b/mpi/Makefile.am
@@ -24,6 +24,7 @@ libmpi_a_SOURCES = longlong.h	  \
 	      mpi-inv.c      \
 	      mpi-mul.c      \
 	      mpi-pow.c      \
+	      mpi-mpow.c     \
 	      mpi-scan.c     \
 	      mpicoder.c     \
 	      mpih-cmp.c     \
diff --git a/mpi/Makefile.in b/mpi/Makefile.in
index aae7160c0..bcbbc6416 100644
--- a/mpi/Makefile.in
+++ b/mpi/Makefile.in
@@ -106,6 +106,7 @@ libmpi_a_SOURCES = longlong.h	  \
 	      mpi-inv.c      \
 	      mpi-mul.c      \
 	      mpi-pow.c      \
+	      mpi-mpow.c     \
 	      mpi-scan.c     \
 	      mpicoder.c     \
 	      mpih-cmp.c     \
@@ -138,13 +139,13 @@ LIBS = @LIBS@
 libmpi_a_DEPENDENCIES =  mpih-mul1.o mpih-mul2.o mpih-mul3.o mpih-add1.o \
 mpih-sub1.o mpih-shift.o
 libmpi_a_OBJECTS =  mpi-add.o mpi-bit.o mpi-cmp.o mpi-div.o mpi-gcd.o \
-mpi-inv.o mpi-mul.o mpi-pow.o mpi-scan.o mpicoder.o mpih-cmp.o \
-mpih-add.o mpih-sub.o mpih-div.o mpih-mul.o mpiutil.o
+mpi-inv.o mpi-mul.o mpi-pow.o mpi-mpow.o mpi-scan.o mpicoder.o \
+mpih-cmp.o mpih-add.o mpih-sub.o mpih-div.o mpih-mul.o mpiutil.o
 AR = ar
 CFLAGS = @CFLAGS@
 COMPILE = $(CC) $(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS)
 LINK = $(CC) $(CFLAGS) $(LDFLAGS) -o $@
-DIST_COMMON =  Makefile.am Makefile.in
+DIST_COMMON =  ChangeLog Makefile.am Makefile.in
 
 
 DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST)
@@ -152,10 +153,10 @@ DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST)
 TAR = tar
 GZIP = --best
 DEP_FILES =  .deps/mpi-add.P .deps/mpi-bit.P .deps/mpi-cmp.P \
-.deps/mpi-div.P .deps/mpi-gcd.P .deps/mpi-inv.P .deps/mpi-mul.P \
-.deps/mpi-pow.P .deps/mpi-scan.P .deps/mpicoder.P .deps/mpih-add.P \
-.deps/mpih-cmp.P .deps/mpih-div.P .deps/mpih-mul.P .deps/mpih-sub.P \
-.deps/mpiutil.P
+.deps/mpi-div.P .deps/mpi-gcd.P .deps/mpi-inv.P .deps/mpi-mpow.P \
+.deps/mpi-mul.P .deps/mpi-pow.P .deps/mpi-scan.P .deps/mpicoder.P \
+.deps/mpih-add.P .deps/mpih-cmp.P .deps/mpih-div.P .deps/mpih-mul.P \
+.deps/mpih-sub.P .deps/mpiutil.P
 SOURCES = $(libmpi_a_SOURCES)
 OBJECTS = $(libmpi_a_OBJECTS)
 
diff --git a/mpi/config.links b/mpi/config.links
index 923e18b0d..cf370e054 100644
--- a/mpi/config.links
+++ b/mpi/config.links
@@ -10,7 +10,7 @@ test -d ./mpi || mkdir ./mpi
 echo '/* created by config.links - do not edit */' >./mpi/asm-syntax.h
 
 case "${target}" in
-    i[345]86*-*-linuxaout* | i[345]86*-*-linuxoldld* | i[345]86*-*-*bsd*)
+    i[34]86*-*-linuxaout* | i[34]86*-*-linuxoldld* | i[34]86*-*-*bsd*)
 	echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/i386/syntax.h	    >>./mpi/asm-syntax.h
 	path="i386"
@@ -20,14 +20,14 @@ case "${target}" in
 	cat  $srcdir/mpi/i386/syntax.h	    >>./mpi/asm-syntax.h
 	path="i586 i386"
 	;;
-    i[3456]86*-*-*)
+    i[34]86*-*-*)
 	echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/i386/syntax.h	    >>./mpi/asm-syntax.h
 	path="i386"
 	;;
     i[56]86*-*-* | pentium-*-* | pentiumpro-*-*)
 	echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
-	cat  $srcdir/mpi/i586/syntax.h	    >>./mpi/asm-syntax.h
+	cat  $srcdir/mpi/i386/syntax.h	    >>./mpi/asm-syntax.h
 	path="i586 i386"
 	;;
     alpha*-*-*)
diff --git a/mpi/i586/README b/mpi/i586/README
new file mode 100644
index 000000000..d73b08268
--- /dev/null
+++ b/mpi/i586/README
@@ -0,0 +1,26 @@
+This directory contains mpn functions optimized for Intel Pentium
+processors.
+
+RELEVANT OPTIMIZATION ISSUES
+
+1. Pentium doesn't allocate cache lines on writes, unlike most other modern
+processors.  Since the functions in the mpn class do array writes, we have to
+handle allocating the destination cache lines by reading a word from it in the
+loops, to achieve the best performance.
+
+2. Pairing of memory operations requires that the two issued operations refer
+to different cache banks.  The simplest way to insure this is to read/write
+two words from the same object.  If we make operations on different objects,
+they might or might not be to the same cache bank.
+
+STATUS
+
+1. mpn_lshift and mpn_rshift run at about 6 cycles/limb, but the Pentium
+documentation indicates that they should take only 43/8 = 5.375 cycles/limb,
+or 5 cycles/limb asymptotically.
+
+2. mpn_add_n and mpn_sub_n run at asymptotically 2 cycles/limb.  Due to loop
+overhead and other delays (cache refill?), they run at or near 2.5 cycles/limb.
+
+3. mpn_mul_1, mpn_addmul_1, mpn_submul_1 all run 1 cycle faster than they
+should...
diff --git a/mpi/i586/distfiles b/mpi/i586/distfiles
new file mode 100644
index 000000000..951480fde
--- /dev/null
+++ b/mpi/i586/distfiles
@@ -0,0 +1,8 @@
+mpih-add1.S
+mpih-mul1.S
+mpih-mul2.S
+mpih-mul3.S
+mpih-shift.S
+mpih-sub1.S
+README
+
diff --git a/mpi/i586/mpih-add1.S b/mpi/i586/mpih-add1.S
new file mode 100644
index 000000000..e9883285d
--- /dev/null
+++ b/mpi/i586/mpih-add1.S
@@ -0,0 +1,134 @@
+/* i80586 add_n -- Add two limb vectors of the same length > 0 and store
+ *		   sum in a third limb vector.
+ *
+ *	Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ *  mpi_limb_t
+ *  mpihelp_add_n( mpi_ptr_t res_ptr,	(sp + 4)
+ *		   mpi_ptr_t s1_ptr,	(sp + 8)
+ *		   mpi_ptr_t s2_ptr,	(sp + 12)
+ *		   mpi_size_t size)	(sp + 16)
+ */
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(mpihelp_add_n)
+C_SYMBOL_NAME(mpihelp_add_n:)
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	pushl	%ebp
+
+	movl	20(%esp),%edi		/* res_ptr */
+	movl	24(%esp),%esi		/* s1_ptr */
+	movl	28(%esp),%ebp		/* s2_ptr */
+	movl	32(%esp),%ecx		/* size */
+
+	movl	(%ebp),%ebx
+
+	decl	%ecx
+	movl	%ecx,%edx
+	shrl	$3,%ecx
+	andl	$7,%edx
+	testl	%ecx,%ecx		/* zero carry flag */
+	jz	Lend
+	pushl	%edx
+
+	ALIGN (3)
+Loop:	movl	28(%edi),%eax		/* fetch destination cache line */
+	leal	32(%edi),%edi
+
+L1:	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	adcl	%ebx,%eax
+	movl	4(%ebp),%ebx
+	adcl	%ebx,%edx
+	movl	8(%ebp),%ebx
+	movl	%eax,-32(%edi)
+	movl	%edx,-28(%edi)
+
+L2:	movl	8(%esi),%eax
+	movl	12(%esi),%edx
+	adcl	%ebx,%eax
+	movl	12(%ebp),%ebx
+	adcl	%ebx,%edx
+	movl	16(%ebp),%ebx
+	movl	%eax,-24(%edi)
+	movl	%edx,-20(%edi)
+
+L3:	movl	16(%esi),%eax
+	movl	20(%esi),%edx
+	adcl	%ebx,%eax
+	movl	20(%ebp),%ebx
+	adcl	%ebx,%edx
+	movl	24(%ebp),%ebx
+	movl	%eax,-16(%edi)
+	movl	%edx,-12(%edi)
+
+L4:	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	adcl	%ebx,%eax
+	movl	28(%ebp),%ebx
+	adcl	%ebx,%edx
+	movl	32(%ebp),%ebx
+	movl	%eax,-8(%edi)
+	movl	%edx,-4(%edi)
+
+	leal	32(%esi),%esi
+	leal	32(%ebp),%ebp
+	decl	%ecx
+	jnz	Loop
+
+	popl	%edx
+Lend:
+	decl	%edx			/* test %edx w/o clobbering carry */
+	js	Lend2
+	incl	%edx
+Loop2:
+	leal	4(%edi),%edi
+	movl	(%esi),%eax
+	adcl	%ebx,%eax
+	movl	4(%ebp),%ebx
+	movl	%eax,-4(%edi)
+	leal	4(%esi),%esi
+	leal	4(%ebp),%ebp
+	decl	%edx
+	jnz	Loop2
+Lend2:
+	movl	(%esi),%eax
+	adcl	%ebx,%eax
+	movl	%eax,(%edi)
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+
diff --git a/mpi/i586/mpih-mul1.S b/mpi/i586/mpih-mul1.S
new file mode 100644
index 000000000..c0bedec0a
--- /dev/null
+++ b/mpi/i586/mpih-mul1.S
@@ -0,0 +1,89 @@
+/* i80586 mul_1 -- Multiply a limb vector with a limb and store
+ *			 the result in a second limb vector.
+ *	Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_mul_1( mpi_ptr_t res_ptr,	(sp + 4)
+ *		  mpi_ptr_t s1_ptr,	(sp + 8)
+ *		  mpi_size_t s1_size,	(sp + 12)
+ *		  mpi_limb_t s2_limb)	(sp + 16)
+ */
+
+#define res_ptr edi
+#define s1_ptr	esi
+#define size	ecx
+#define s2_limb ebp
+
+	TEXT
+	ALIGN (3)
+	GLOBL	C_SYMBOL_NAME(mpihelp_mul_1)
+C_SYMBOL_NAME(mpihelp_mul_1:)
+
+	INSN1(push,l	,R(edi))
+	INSN1(push,l	,R(esi))
+	INSN1(push,l	,R(ebx))
+	INSN1(push,l	,R(ebp))
+
+	INSN2(mov,l	,R(res_ptr),MEM_DISP(esp,20))
+	INSN2(mov,l	,R(s1_ptr),MEM_DISP(esp,24))
+	INSN2(mov,l	,R(size),MEM_DISP(esp,28))
+	INSN2(mov,l	,R(s2_limb),MEM_DISP(esp,32))
+
+	INSN2(lea,l	,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+	INSN2(lea,l	,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+	INSN1(neg,l	,R(size))
+	INSN2(xor,l	,R(ebx),R(ebx))
+	ALIGN (3)
+
+Loop:	INSN2(adc,l	,R(ebx),$0)
+	INSN2(mov,l	,R(eax),MEM_INDEX(s1_ptr,size,4))
+
+	INSN1(mul,l	,R(s2_limb))
+
+	INSN2(add,l	,R(ebx),R(eax))
+
+	INSN2(mov,l	,MEM_INDEX(res_ptr,size,4),R(ebx))
+	INSN1(inc,l	,R(size))
+
+	INSN2(mov,l	,R(ebx),R(edx))
+	INSN1(jnz,	,Loop)
+
+	INSN2(adc,l	,R(ebx),$0)
+	INSN2(mov,l	,R(eax),R(ebx))
+	INSN1(pop,l	,R(ebp))
+	INSN1(pop,l	,R(ebx))
+	INSN1(pop,l	,R(esi))
+	INSN1(pop,l	,R(edi))
+	ret
+
diff --git a/mpi/i586/mpih-mul2.S b/mpi/i586/mpih-mul2.S
new file mode 100644
index 000000000..6b5646239
--- /dev/null
+++ b/mpi/i586/mpih-mul2.S
@@ -0,0 +1,94 @@
+/* i80586 addmul_1 -- Multiply a limb vector with a limb and add
+ *		      the result to a second limb vector.
+ *	Copyright (c) 1997 by Werner Koch (dd9jn)
+ *	Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_addmul_1( mpi_ptr_t res_ptr,      (sp + 4)
+ *		     mpi_ptr_t s1_ptr,	     (sp + 8)
+ *		     mpi_size_t s1_size,     (sp + 12)
+ *		     mpi_limb_t s2_limb)     (sp + 16)
+ */
+
+#define res_ptr edi
+#define s1_ptr	esi
+#define size	ecx
+#define s2_limb ebp
+
+	TEXT
+	ALIGN (3)
+	GLOBL	C_SYMBOL_NAME(mpihelp_addmul_1)
+C_SYMBOL_NAME(mpihelp_addmul_1:)
+
+	INSN1(push,l	,R(edi))
+	INSN1(push,l	,R(esi))
+	INSN1(push,l	,R(ebx))
+	INSN1(push,l	,R(ebp))
+
+	INSN2(mov,l	,R(res_ptr),MEM_DISP(esp,20))
+	INSN2(mov,l	,R(s1_ptr),MEM_DISP(esp,24))
+	INSN2(mov,l	,R(size),MEM_DISP(esp,28))
+	INSN2(mov,l	,R(s2_limb),MEM_DISP(esp,32))
+
+	INSN2(lea,l	,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+	INSN2(lea,l	,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+	INSN1(neg,l	,R(size))
+	INSN2(xor,l	,R(ebx),R(ebx))
+	ALIGN (3)
+
+Loop:	INSN2(adc,l	,R(ebx),$0)
+	INSN2(mov,l	,R(eax),MEM_INDEX(s1_ptr,size,4))
+
+	INSN1(mul,l	,R(s2_limb))
+
+	INSN2(add,l	,R(eax),R(ebx))
+	INSN2(mov,l	,R(ebx),MEM_INDEX(res_ptr,size,4))
+
+	INSN2(adc,l	,R(edx),$0)
+	INSN2(add,l	,R(ebx),R(eax))
+
+	INSN2(mov,l	,MEM_INDEX(res_ptr,size,4),R(ebx))
+	INSN1(inc,l	,R(size))
+
+	INSN2(mov,l	,R(ebx),R(edx))
+	INSN1(jnz,	,Loop)
+
+	INSN2(adc,l	,R(ebx),$0)
+	INSN2(mov,l	,R(eax),R(ebx))
+	INSN1(pop,l	,R(ebp))
+	INSN1(pop,l	,R(ebx))
+	INSN1(pop,l	,R(esi))
+	INSN1(pop,l	,R(edi))
+	ret
+
diff --git a/mpi/i586/mpih-mul3.S b/mpi/i586/mpih-mul3.S
new file mode 100644
index 000000000..69b7f4672
--- /dev/null
+++ b/mpi/i586/mpih-mul3.S
@@ -0,0 +1,94 @@
+/* i80586 submul_1 -- Multiply a limb vector with a limb and add
+ *		      the result to a second limb vector.
+ *	Copyright (c) 1997 by Werner Koch (dd9jn)
+ *	Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_submul_1( mpi_ptr_t res_ptr,      (sp + 4)
+ *		     mpi_ptr_t s1_ptr,	     (sp + 8)
+ *		     mpi_size_t s1_size,     (sp + 12)
+ *		     mpi_limb_t s2_limb)     (sp + 16)
+ */
+
+#define res_ptr edi
+#define s1_ptr	esi
+#define size	ecx
+#define s2_limb ebp
+
+	TEXT
+	ALIGN (3)
+	GLOBL	C_SYMBOL_NAME(mpihelp_submul_1)
+C_SYMBOL_NAME(mpihelp_submul_1:)
+
+	INSN1(push,l	,R(edi))
+	INSN1(push,l	,R(esi))
+	INSN1(push,l	,R(ebx))
+	INSN1(push,l	,R(ebp))
+
+	INSN2(mov,l	,R(res_ptr),MEM_DISP(esp,20))
+	INSN2(mov,l	,R(s1_ptr),MEM_DISP(esp,24))
+	INSN2(mov,l	,R(size),MEM_DISP(esp,28))
+	INSN2(mov,l	,R(s2_limb),MEM_DISP(esp,32))
+
+	INSN2(lea,l	,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+	INSN2(lea,l	,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+	INSN1(neg,l	,R(size))
+	INSN2(xor,l	,R(ebx),R(ebx))
+	ALIGN (3)
+
+Loop:	INSN2(adc,l	,R(ebx),$0)
+	INSN2(mov,l	,R(eax),MEM_INDEX(s1_ptr,size,4))
+
+	INSN1(mul,l	,R(s2_limb))
+
+	INSN2(add,l	,R(eax),R(ebx))
+	INSN2(mov,l	,R(ebx),MEM_INDEX(res_ptr,size,4))
+
+	INSN2(adc,l	,R(edx),$0)
+	INSN2(sub,l	,R(ebx),R(eax))
+
+	INSN2(mov,l	,MEM_INDEX(res_ptr,size,4),R(ebx))
+	INSN1(inc,l	,R(size))
+
+	INSN2(mov,l	,R(ebx),R(edx))
+	INSN1(jnz,	,Loop)
+
+	INSN2(adc,l	,R(ebx),$0)
+	INSN2(mov,l	,R(eax),R(ebx))
+	INSN1(pop,l	,R(ebp))
+	INSN1(pop,l	,R(ebx))
+	INSN1(pop,l	,R(esi))
+	INSN1(pop,l	,R(edi))
+	ret
+
diff --git a/mpi/i586/mpih-shift.S b/mpi/i586/mpih-shift.S
new file mode 100644
index 000000000..9f1563810
--- /dev/null
+++ b/mpi/i586/mpih-shift.S
@@ -0,0 +1,426 @@
+/* i80586   rshift, lshift
+ *	Copyright (c) 1997 by Werner Koch (dd9jn)
+ *	Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_lshift( mpi_ptr_t wp,	(sp + 4)
+ *		   mpi_ptr_t up,	(sp + 8)
+ *		   mpi_size_t usize,	(sp + 12)
+ *		   unsigned cnt)	(sp + 16)
+ */
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(mpihelp_lshift)
+C_SYMBOL_NAME(mpihelp_lshift:)
+
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	pushl	%ebp
+
+	movl	20(%esp),%edi		/* res_ptr */
+	movl	24(%esp),%esi		/* s_ptr */
+	movl	28(%esp),%ebp		/* size */
+	movl	32(%esp),%ecx		/* cnt */
+
+/* We can use faster code for shift-by-1 under certain conditions.  */
+	cmp	$1,%ecx
+	jne	Lnormal
+	leal	4(%esi),%eax
+	cmpl	%edi,%eax
+	jnc	Lspecial		/* jump if s_ptr + 1 >= res_ptr */
+	leal	(%esi,%ebp,4),%eax
+	cmpl	%eax,%edi
+	jnc	Lspecial		/* jump if res_ptr >= s_ptr + size */
+
+Lnormal:
+	leal	-4(%edi,%ebp,4),%edi
+	leal	-4(%esi,%ebp,4),%esi
+
+	movl	(%esi),%edx
+	subl	$4,%esi
+	xorl	%eax,%eax
+	shldl	%cl,%edx,%eax		/* compute carry limb */
+	pushl	%eax			/* push carry limb onto stack */
+
+	decl	%ebp
+	pushl	%ebp
+	shrl	$3,%ebp
+	jz	Lend
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+Loop:	movl	-28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebx
+
+	movl	(%esi),%eax
+	movl	-4(%esi),%edx
+	shldl	%cl,%eax,%ebx
+	shldl	%cl,%edx,%eax
+	movl	%ebx,(%edi)
+	movl	%eax,-4(%edi)
+
+	movl	-8(%esi),%ebx
+	movl	-12(%esi),%eax
+	shldl	%cl,%ebx,%edx
+	shldl	%cl,%eax,%ebx
+	movl	%edx,-8(%edi)
+	movl	%ebx,-12(%edi)
+
+	movl	-16(%esi),%edx
+	movl	-20(%esi),%ebx
+	shldl	%cl,%edx,%eax
+	shldl	%cl,%ebx,%edx
+	movl	%eax,-16(%edi)
+	movl	%edx,-20(%edi)
+
+	movl	-24(%esi),%eax
+	movl	-28(%esi),%edx
+	shldl	%cl,%eax,%ebx
+	shldl	%cl,%edx,%eax
+	movl	%ebx,-24(%edi)
+	movl	%eax,-28(%edi)
+
+	subl	$32,%esi
+	subl	$32,%edi
+	decl	%ebp
+	jnz	Loop
+
+Lend:	popl	%ebp
+	andl	$7,%ebp
+	jz	Lend2
+Loop2:	movl	(%esi),%eax
+	shldl	%cl,%eax,%edx
+	movl	%edx,(%edi)
+	movl	%eax,%edx
+	subl	$4,%esi
+	subl	$4,%edi
+	decl	%ebp
+	jnz	Loop2
+
+Lend2:	shll	%cl,%edx		/* compute least significant limb */
+	movl	%edx,(%edi)		/* store it */
+
+	popl	%eax			/* pop carry limb */
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+/* We loop from least significant end of the arrays, which is only
+   permissable if the source and destination don't overlap, since the
+   function is documented to work for overlapping source and destination.
+*/
+
+Lspecial:
+	movl	(%esi),%edx
+	addl	$4,%esi
+
+	decl	%ebp
+	pushl	%ebp
+	shrl	$3,%ebp
+
+	addl	%edx,%edx
+	incl	%ebp
+	decl	%ebp
+	jz	LLend
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+LLoop:	movl	28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebx
+
+	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	adcl	%eax,%eax
+	movl	%ebx,(%edi)
+	adcl	%edx,%edx
+	movl	%eax,4(%edi)
+
+	movl	8(%esi),%ebx
+	movl	12(%esi),%eax
+	adcl	%ebx,%ebx
+	movl	%edx,8(%edi)
+	adcl	%eax,%eax
+	movl	%ebx,12(%edi)
+
+	movl	16(%esi),%edx
+	movl	20(%esi),%ebx
+	adcl	%edx,%edx
+	movl	%eax,16(%edi)
+	adcl	%ebx,%ebx
+	movl	%edx,20(%edi)
+
+	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	adcl	%eax,%eax
+	movl	%ebx,24(%edi)
+	adcl	%edx,%edx
+	movl	%eax,28(%edi)
+
+	leal	32(%esi),%esi		/* use leal not to clobber carry */
+	leal	32(%edi),%edi
+	decl	%ebp
+	jnz	LLoop
+
+LLend:	popl	%ebp
+	sbbl	%eax,%eax		/* save carry in %eax */
+	andl	$7,%ebp
+	jz	LLend2
+	addl	%eax,%eax		/* restore carry from eax */
+LLoop2: movl	%edx,%ebx
+	movl	(%esi),%edx
+	adcl	%edx,%edx
+	movl	%ebx,(%edi)
+
+	leal	4(%esi),%esi		/* use leal not to clobber carry */
+	leal	4(%edi),%edi
+	decl	%ebp
+	jnz	LLoop2
+
+	jmp	LL1
+LLend2: addl	%eax,%eax		/* restore carry from eax */
+LL1:	movl	%edx,(%edi)		/* store last limb */
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_rshift( mpi_ptr_t wp,	(sp + 4)
+ *		   mpi_ptr_t up,	(sp + 8)
+ *		   mpi_size_t usize,	(sp + 12)
+ *		   unsigned cnt)	(sp + 16)
+ */
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(mpihelp_rshift)
+C_SYMBOL_NAME(mpihelp_rshift:)
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	pushl	%ebp
+
+	movl	20(%esp),%edi		/* res_ptr */
+	movl	24(%esp),%esi		/* s_ptr */
+	movl	28(%esp),%ebp		/* size */
+	movl	32(%esp),%ecx		/* cnt */
+
+/* We can use faster code for shift-by-1 under certain conditions.  */
+	cmp	$1,%ecx
+	jne	Rnormal
+	leal	4(%edi),%eax
+	cmpl	%esi,%eax
+	jnc	Rspecial		/* jump if res_ptr + 1 >= s_ptr */
+	leal	(%edi,%ebp,4),%eax
+	cmpl	%eax,%esi
+	jnc	Rspecial		/* jump if s_ptr >= res_ptr + size */
+
+Rnormal:
+	movl	(%esi),%edx
+	addl	$4,%esi
+	xorl	%eax,%eax
+	shrdl	%cl,%edx,%eax		/* compute carry limb */
+	pushl	%eax			/* push carry limb onto stack */
+
+	decl	%ebp
+	pushl	%ebp
+	shrl	$3,%ebp
+	jz	Rend
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+Roop:	movl	28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebx
+
+	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	shrdl	%cl,%eax,%ebx
+	shrdl	%cl,%edx,%eax
+	movl	%ebx,(%edi)
+	movl	%eax,4(%edi)
+
+	movl	8(%esi),%ebx
+	movl	12(%esi),%eax
+	shrdl	%cl,%ebx,%edx
+	shrdl	%cl,%eax,%ebx
+	movl	%edx,8(%edi)
+	movl	%ebx,12(%edi)
+
+	movl	16(%esi),%edx
+	movl	20(%esi),%ebx
+	shrdl	%cl,%edx,%eax
+	shrdl	%cl,%ebx,%edx
+	movl	%eax,16(%edi)
+	movl	%edx,20(%edi)
+
+	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	shrdl	%cl,%eax,%ebx
+	shrdl	%cl,%edx,%eax
+	movl	%ebx,24(%edi)
+	movl	%eax,28(%edi)
+
+	addl	$32,%esi
+	addl	$32,%edi
+	decl	%ebp
+	jnz	Roop
+
+Rend:	popl	%ebp
+	andl	$7,%ebp
+	jz	Rend2
+Roop2:	movl	(%esi),%eax
+	shrdl	%cl,%eax,%edx		/* compute result limb */
+	movl	%edx,(%edi)
+	movl	%eax,%edx
+	addl	$4,%esi
+	addl	$4,%edi
+	decl	%ebp
+	jnz	Roop2
+
+Rend2:	shrl	%cl,%edx		/* compute most significant limb */
+	movl	%edx,(%edi)		/* store it */
+
+	popl	%eax			/* pop carry limb */
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+/* We loop from least significant end of the arrays, which is only
+   permissable if the source and destination don't overlap, since the
+   function is documented to work for overlapping source and destination.
+*/
+
+Rspecial:
+	leal	-4(%edi,%ebp,4),%edi
+	leal	-4(%esi,%ebp,4),%esi
+
+	movl	(%esi),%edx
+	subl	$4,%esi
+
+	decl	%ebp
+	pushl	%ebp
+	shrl	$3,%ebp
+
+	shrl	$1,%edx
+	incl	%ebp
+	decl	%ebp
+	jz	RLend
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+RLoop:	movl	-28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebx
+
+	movl	(%esi),%eax
+	movl	-4(%esi),%edx
+	rcrl	$1,%eax
+	movl	%ebx,(%edi)
+	rcrl	$1,%edx
+	movl	%eax,-4(%edi)
+
+	movl	-8(%esi),%ebx
+	movl	-12(%esi),%eax
+	rcrl	$1,%ebx
+	movl	%edx,-8(%edi)
+	rcrl	$1,%eax
+	movl	%ebx,-12(%edi)
+
+	movl	-16(%esi),%edx
+	movl	-20(%esi),%ebx
+	rcrl	$1,%edx
+	movl	%eax,-16(%edi)
+	rcrl	$1,%ebx
+	movl	%edx,-20(%edi)
+
+	movl	-24(%esi),%eax
+	movl	-28(%esi),%edx
+	rcrl	$1,%eax
+	movl	%ebx,-24(%edi)
+	rcrl	$1,%edx
+	movl	%eax,-28(%edi)
+
+	leal	-32(%esi),%esi		/* use leal not to clobber carry */
+	leal	-32(%edi),%edi
+	decl	%ebp
+	jnz	RLoop
+
+RLend:	popl	%ebp
+	sbbl	%eax,%eax		/* save carry in %eax */
+	andl	$7,%ebp
+	jz	RLend2
+	addl	%eax,%eax		/* restore carry from eax */
+RLoop2: movl	%edx,%ebx
+	movl	(%esi),%edx
+	rcrl	$1,%edx
+	movl	%ebx,(%edi)
+
+	leal	-4(%esi),%esi		/* use leal not to clobber carry */
+	leal	-4(%edi),%edi
+	decl	%ebp
+	jnz	RLoop2
+
+	jmp	RL1
+RLend2: addl	%eax,%eax		/* restore carry from eax */
+RL1:	movl	%edx,(%edi)		/* store last limb */
+
+	movl	$0,%eax
+	rcrl	$1,%eax
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
diff --git a/mpi/i586/mpih-sub1.S b/mpi/i586/mpih-sub1.S
new file mode 100644
index 000000000..1f5c0bfdd
--- /dev/null
+++ b/mpi/i586/mpih-sub1.S
@@ -0,0 +1,143 @@
+/* i80586 sub_n -- Sub two limb vectors of the same length > 0 and store
+ *		   sum in a third limb vector.
+ *	Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+ *	Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ *  mpi_limb_t
+ *  mpihelp_sub_n( mpi_ptr_t res_ptr,	(sp + 4)
+ *		   mpi_ptr_t s1_ptr,	(sp + 8)
+ *		   mpi_ptr_t s2_ptr,	(sp + 12)
+ *		   mpi_size_t size)	(sp + 16)
+ */
+
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(mpihelp_sub_n)
+C_SYMBOL_NAME(mpihelp_sub_n:)
+
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	pushl	%ebp
+
+	movl	20(%esp),%edi		/* res_ptr */
+	movl	24(%esp),%esi		/* s1_ptr */
+	movl	28(%esp),%ebp		/* s2_ptr */
+	movl	32(%esp),%ecx		/* size */
+
+	movl	(%ebp),%ebx
+
+	decl	%ecx
+	movl	%ecx,%edx
+	shrl	$3,%ecx
+	andl	$7,%edx
+	testl	%ecx,%ecx		/* zero carry flag */
+	jz	Lend
+	pushl	%edx
+
+	ALIGN (3)
+Loop:	movl	28(%edi),%eax		/* fetch destination cache line */
+	leal	32(%edi),%edi
+
+L1:	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	sbbl	%ebx,%eax
+	movl	4(%ebp),%ebx
+	sbbl	%ebx,%edx
+	movl	8(%ebp),%ebx
+	movl	%eax,-32(%edi)
+	movl	%edx,-28(%edi)
+
+L2:	movl	8(%esi),%eax
+	movl	12(%esi),%edx
+	sbbl	%ebx,%eax
+	movl	12(%ebp),%ebx
+	sbbl	%ebx,%edx
+	movl	16(%ebp),%ebx
+	movl	%eax,-24(%edi)
+	movl	%edx,-20(%edi)
+
+L3:	movl	16(%esi),%eax
+	movl	20(%esi),%edx
+	sbbl	%ebx,%eax
+	movl	20(%ebp),%ebx
+	sbbl	%ebx,%edx
+	movl	24(%ebp),%ebx
+	movl	%eax,-16(%edi)
+	movl	%edx,-12(%edi)
+
+L4:	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	sbbl	%ebx,%eax
+	movl	28(%ebp),%ebx
+	sbbl	%ebx,%edx
+	movl	32(%ebp),%ebx
+	movl	%eax,-8(%edi)
+	movl	%edx,-4(%edi)
+
+	leal	32(%esi),%esi
+	leal	32(%ebp),%ebp
+	decl	%ecx
+	jnz	Loop
+
+	popl	%edx
+Lend:
+	decl	%edx			/* test %edx w/o clobbering carry */
+	js	Lend2
+	incl	%edx
+Loop2:
+	leal	4(%edi),%edi
+	movl	(%esi),%eax
+	sbbl	%ebx,%eax
+	movl	4(%ebp),%ebx
+	movl	%eax,-4(%edi)
+	leal	4(%esi),%esi
+	leal	4(%ebp),%ebp
+	decl	%edx
+	jnz	Loop2
+Lend2:
+	movl	(%esi),%eax
+	sbbl	%ebx,%eax
+	movl	%eax,(%edi)
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
diff --git a/mpi/mpi-inv.c b/mpi/mpi-inv.c
index 28cde00b6..53ef356b3 100644
--- a/mpi/mpi-inv.c
+++ b/mpi/mpi-inv.c
@@ -76,7 +76,7 @@ mpi_invm( MPI x, MPI a, MPI n )
     mpi_free(t3);
     mpi_free(u);
     mpi_free(v);
-  #else
+  #elif 0
     /* Extended Euclid's algorithm (See TAOPC Vol II, 4.5.2, Alg X)
      * modified according to Michael Penk's solution for Exercice 35 */
 
@@ -156,6 +156,107 @@ mpi_invm( MPI x, MPI a, MPI n )
     mpi_free(t1);
     mpi_free(t2);
     mpi_free(t3);
+  #else
+    /* Extended Euclid's algorithm (See TAOPC Vol II, 4.5.2, Alg X)
+     * modified according to Michael Penk's solution for Exercice 35
+     * with further enhancement */
+    MPI u, v, u1, u2=NULL, u3, v1, v2=NULL, v3, t1, t2=NULL, t3;
+    unsigned k;
+    int sign;
+    int odd ;
+
+    u = mpi_copy(a);
+    v = mpi_copy(n);
+    for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) {
+	mpi_rshift(u, u, 1);
+	mpi_rshift(v, v, 1);
+    }
+    odd = mpi_test_bit(v,0);
+
+    u1 = mpi_alloc_set_ui(1);
+    if( !odd )
+	u2 = mpi_alloc_set_ui(0);
+    u3 = mpi_copy(u);
+    v1 = mpi_copy(v);
+    if( !odd ) {
+	v2 = mpi_alloc( mpi_get_nlimbs(u) );
+	mpi_sub( v2, u1, u ); /* U is used as const 1 */
+    }
+    v3 = mpi_copy(v);
+    if( mpi_test_bit(u, 0) ) { /* u is odd */
+	t1 = mpi_alloc_set_ui(0);
+	if( !odd ) {
+	    t2 = mpi_alloc_set_ui(1); t2->sign = 1;
+	}
+	t3 = mpi_copy(v); t3->sign = !t3->sign;
+	goto Y4;
+    }
+    else {
+	t1 = mpi_alloc_set_ui(1);
+	if( !odd )
+	    t2 = mpi_alloc_set_ui(0);
+	t3 = mpi_copy(u);
+    }
+    do {
+	do {
+	    if( !odd ) {
+		if( mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0) ) { /* one is odd */
+		    mpi_add(t1, t1, v);
+		    mpi_sub(t2, t2, u);
+		}
+		mpi_rshift(t1, t1, 1);
+		mpi_rshift(t2, t2, 1);
+		mpi_rshift(t3, t3, 1);
+	    }
+	    else {
+		if( mpi_test_bit(t1, 0) )
+		    mpi_add(t1, t1, v);
+		mpi_rshift(t1, t1, 1);
+		mpi_rshift(t3, t3, 1);
+	    }
+	  Y4:
+	} while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */
+
+	if( !t3->sign ) {
+	    mpi_set(u1, t1);
+	    if( !odd )
+		mpi_set(u2, t2);
+	    mpi_set(u3, t3);
+	}
+	else {
+	    mpi_sub(v1, v, t1);
+	    sign = u->sign; u->sign = !u->sign;
+	    if( !odd )
+		mpi_sub(v2, u, t2);
+	    u->sign = sign;
+	    sign = t3->sign; t3->sign = !t3->sign;
+	    mpi_set(v3, t3);
+	    t3->sign = sign;
+	}
+	mpi_sub(t1, u1, v1);
+	if( !odd )
+	    mpi_sub(t2, u2, v2);
+	mpi_sub(t3, u3, v3);
+	if( t1->sign ) {
+	    mpi_add(t1, t1, v);
+	    if( !odd )
+		mpi_sub(t2, t2, u);
+	}
+    } while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */
+    /* mpi_lshift( u3, k ); */
+    mpi_set(x, u1);
+
+    mpi_free(u1);
+    mpi_free(v1);
+    mpi_free(t1);
+    if( !odd ) {
+	mpi_free(u2);
+	mpi_free(v2);
+	mpi_free(t2);
+    }
+    mpi_free(u3);
+    mpi_free(v3);
+    mpi_free(t3);
   #endif
 }
 
diff --git a/mpi/mpi-mpow.c b/mpi/mpi-mpow.c
new file mode 100644
index 000000000..5ac3c6399
--- /dev/null
+++ b/mpi/mpi-mpow.c
@@ -0,0 +1,119 @@
+/* mpi-mpow.c  -  MPI functions
+ *	Copyright (c) 1998 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+#include <assert.h>
+
+static int
+build_index( MPI *exparray, int k, int i, int t )
+{
+    int j, bitno;
+    int index = 0;
+
+    bitno = t-i;
+    for(j=k-1; j >= 0; j-- ) {
+	index <<= 1;
+	if( mpi_test_bit( exparray[j], bitno ) )
+	    index |= 1;
+    }
+    /*log_debug("t=%d i=%d index=%d\n", t, i, index );*/
+    return index;
+}
+
+/****************
+ * RES = (BASE[0] ^ EXP[0]) *  (BASE[1] ^ EXP[1]) * ... * mod M
+ */
+void
+mpi_mulpowm( MPI res, MPI *basearray, MPI *exparray, MPI m)
+{
+    int k;	/* number of elements */
+    int t;	/* bit size of largest exponent */
+    int i, j, idx;
+    MPI *G;	/* table with precomputed values of size 2^k */
+    MPI tmp;
+
+    for(k=0; basearray[k]; k++ )
+	;
+    assert(k);
+    for(t=0, i=0; (tmp=exparray[i]); i++ ) {
+	/*log_mpidump("exp: ", tmp );*/
+	j = mpi_get_nbits(tmp);
+	if( j > t )
+	    t = j;
+    }
+    /*log_mpidump("mod: ", m );*/
+    assert(i==k);
+    assert(t);
+    assert( k < 10 );
+
+    G = m_alloc_clear( (1<<k) * sizeof *G );
+  #if 0
+    /* do the precomputation */
+    G[0] = mpi_alloc_set_ui( 1 );
+    for(i=1; i < (1<<k); i++ ) {
+	for(j=0; j < k; j++ ) {
+	    if( (i & (1<<j) ) ) {
+		if( !G[i] )
+		    G[i] = mpi_copy( basearray[j] );
+		else
+		    mpi_mulm( G[i], G[i], basearray[j], m );
+	    }
+	}
+	if( !G[i] )
+	    G[i] = mpi_alloc(0);
+    }
+  #endif
+    /* and calculate */
+    tmp =  mpi_alloc( mpi_get_nlimbs(m)+1 );
+    mpi_set_ui( res, 1 );
+    for(i = 1; i <= t; i++ ) {
+	mpi_mulm(tmp, res, res, m );
+	idx = build_index( exparray, k, i, t );
+	assert( idx >= 0 && idx < (1<<k) );
+	if( !G[idx] ) {
+	    if( !idx )
+		 G[0] = mpi_alloc_set_ui( 1 );
+	    else {
+		for(j=0; j < k; j++ ) {
+		    if( (idx & (1<<j) ) ) {
+			if( !G[idx] )
+			    G[idx] = mpi_copy( basearray[j] );
+			else
+			    mpi_mulm( G[idx], G[idx], basearray[j], m );
+		    }
+		}
+		if( !G[idx] )
+		    G[idx] = mpi_alloc(0);
+	    }
+	}
+	mpi_mulm(res, tmp, G[idx], m );
+    }
+
+    /* cleanup */
+    m_free(tmp);
+    for(i=0; i < (1<<k); i++ )
+	mpi_free(G[i]);
+    m_free(G);
+}
+