diff options
Diffstat (limited to 'mpi')
-rw-r--r-- | mpi/ChangeLog | 12 | ||||
-rw-r--r-- | mpi/mpi-internal.h | 17 | ||||
-rw-r--r-- | mpi/mpi-mpow.c | 129 | ||||
-rw-r--r-- | mpi/mpi-pow.c | 19 | ||||
-rw-r--r-- | mpi/mpih-mul.c | 118 |
5 files changed, 137 insertions, 158 deletions
diff --git a/mpi/ChangeLog b/mpi/ChangeLog index 80b2d8de5..ee1cd06fe 100644 --- a/mpi/ChangeLog +++ b/mpi/ChangeLog @@ -1,3 +1,15 @@ +Thu Jan 13 19:31:58 CET 2000 Werner Koch <[email protected]> + + * mpi-internal.h (karatsuba_ctx): New. + * mpih-mul.c (mpihelp_release_karatsuba_ctx): New. + (mpihelp_mul_karatsuba_case): New. + (mpihelp_mul): Splitted to make use of the new functions. + * mpi-pow.c (mpi_powm): Make use of the new splitted function + to avoid multiple allocation of temorary memory during the + karatsuba operations. + + * mpi_mpow.c: Removed the unused Barrett code. + Sun Dec 19 15:22:26 CET 1999 Werner Koch <[email protected]> * power/ : Converted more comments to C comments because some AS diff --git a/mpi/mpi-internal.h b/mpi/mpi-internal.h index 035d33cb3..2b521c952 100644 --- a/mpi/mpi-internal.h +++ b/mpi/mpi-internal.h @@ -186,6 +186,17 @@ mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, int mpihelp_cmp( mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size ); /*-- mpihelp-mul.c --*/ + +struct karatsuba_ctx { + struct karatsuba_ctx *next; + mpi_ptr_t tspace; + mpi_size_t tspace_size; + mpi_ptr_t tp; + mpi_size_t tp_size; +}; + +void mpihelp_release_karatsuba_ctx( struct karatsuba_ctx *ctx ); + mpi_limb_t mpihelp_addmul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb); mpi_limb_t mpihelp_submul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, @@ -198,6 +209,12 @@ void mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size ); void mpih_sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace); +void mpihelp_mul_karatsuba_case( mpi_ptr_t prodp, + mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize, + struct karatsuba_ctx *ctx ); + + /*-- mpihelp-mul_1.c (or xxx/cpu/ *.S) --*/ mpi_limb_t mpihelp_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb); diff --git a/mpi/mpi-mpow.c b/mpi/mpi-mpow.c index a8c561dd1..001802191 100644 --- a/mpi/mpi-mpow.c +++ b/mpi/mpi-mpow.c @@ -1,5 +1,5 @@ /* mpi-mpow.c - MPI functions - * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. * * This file is part of GnuPG. * @@ -25,22 +25,6 @@ #include "longlong.h" #include <assert.h> -/* Barrett is slower than the classical way. It can be tweaked by - * using partial multiplications - */ -/*#define USE_BARRETT*/ - - - -#ifdef USE_BARRETT -static void barrett_mulm( MPI w, MPI u, MPI v, MPI m, MPI y, int k, MPI r1, MPI r2 ); -static MPI init_barrett( MPI m, int *k, MPI *r1, MPI *r2 ); -static int calc_barrett( MPI r, MPI x, MPI m, MPI y, int k, MPI r1, MPI r2 ); -#else -#define barrett_mulm( w, u, v, m, y, k, r1, r2 ) mpi_mulm( (w), (u), (v), (m) ) -#endif - - static int build_index( MPI *exparray, int k, int i, int t ) { @@ -53,7 +37,6 @@ build_index( MPI *exparray, int k, int i, int t ) if( mpi_test_bit( exparray[j], bitno ) ) index |= 1; } - /*log_debug("t=%d i=%d index=%d\n", t, i, index );*/ return index; } @@ -68,35 +51,25 @@ mpi_mulpowm( MPI res, MPI *basearray, MPI *exparray, MPI m) int i, j, idx; MPI *G; /* table with precomputed values of size 2^k */ MPI tmp; - #ifdef USE_BARRETT - MPI barrett_y, barrett_r1, barrett_r2; - int barrett_k; - #endif for(k=0; basearray[k]; k++ ) ; assert(k); for(t=0, i=0; (tmp=exparray[i]); i++ ) { - /*log_mpidump("exp: ", tmp );*/ j = mpi_get_nbits(tmp); if( j > t ) t = j; } - /*log_mpidump("mod: ", m );*/ assert(i==k); assert(t); assert( k < 10 ); G = m_alloc_clear( (1<<k) * sizeof *G ); - #ifdef USE_BARRETT - barrett_y = init_barrett( m, &barrett_k, &barrett_r1, &barrett_r2 ); - #endif /* and calculate */ tmp = mpi_alloc( mpi_get_nlimbs(m)+1 ); mpi_set_ui( res, 1 ); for(i = 1; i <= t; i++ ) { - barrett_mulm(tmp, res, res, m, barrett_y, barrett_k, - barrett_r1, barrett_r2 ); + mpi_mulm(tmp, res, res, m ); idx = build_index( exparray, k, i, t ); assert( idx >= 0 && idx < (1<<k) ); if( !G[idx] ) { @@ -108,115 +81,21 @@ mpi_mulpowm( MPI res, MPI *basearray, MPI *exparray, MPI m) if( !G[idx] ) G[idx] = mpi_copy( basearray[j] ); else - barrett_mulm( G[idx], G[idx], basearray[j], - m, barrett_y, barrett_k, barrett_r1, barrett_r2 ); + mpi_mulm( G[idx], G[idx], basearray[j], m ); } } if( !G[idx] ) G[idx] = mpi_alloc(0); } } - barrett_mulm(res, tmp, G[idx], m, barrett_y, barrett_k, barrett_r1, barrett_r2 ); + mpi_mulm(res, tmp, G[idx], m ); } /* cleanup */ mpi_free(tmp); - #ifdef USE_BARRETT - mpi_free(barrett_y); - mpi_free(barrett_r1); - mpi_free(barrett_r2); - #endif for(i=0; i < (1<<k); i++ ) mpi_free(G[i]); m_free(G); } - -#ifdef USE_BARRETT -static void -barrett_mulm( MPI w, MPI u, MPI v, MPI m, MPI y, int k, MPI r1, MPI r2 ) -{ - mpi_mul(w, u, v); - if( calc_barrett( w, w, m, y, k, r1, r2 ) ) - mpi_fdiv_r( w, w, m ); -} - -/**************** - * Barrett precalculation: y = floor(b^(2k) / m) - */ -static MPI -init_barrett( MPI m, int *k, MPI *r1, MPI *r2 ) -{ - MPI tmp; - - mpi_normalize( m ); - *k = mpi_get_nlimbs( m ); - tmp = mpi_alloc( *k + 1 ); - mpi_set_ui( tmp, 1 ); - mpi_lshift_limbs( tmp, 2 * *k ); - mpi_fdiv_q( tmp, tmp, m ); - *r1 = mpi_alloc( 2* *k + 1 ); - *r2 = mpi_alloc( 2* *k + 1 ); - return tmp; -} - -/**************** - * Barrett reduction: We assume that these conditions are met: - * Given x =(x_2k-1 ...x_0)_b - * m =(m_k-1 ....m_0)_b with m_k-1 != 0 - * Output r = x mod m - * Before using this function init_barret must be used to calucalte y and k. - * Returns: false = no error - * true = can't perform barret reduction - */ -static int -calc_barrett( MPI r, MPI x, MPI m, MPI y, int k, MPI r1, MPI r2 ) -{ - int xx = k > 3 ? k-3:0; - - mpi_normalize( x ); - if( mpi_get_nlimbs(x) > 2*k ) - return 1; /* can't do it */ - - /* 1. q1 = floor( x / b^k-1) - * q2 = q1 * y - * q3 = floor( q2 / b^k+1 ) - * Actually, we don't need qx, we can work direct on r2 - */ - mpi_set( r2, x ); - mpi_rshift_limbs( r2, k-1 ); - mpi_mul( r2, r2, y ); - mpi_rshift_limbs( r2, k+1 ); - - /* 2. r1 = x mod b^k+1 - * r2 = q3 * m mod b^k+1 - * r = r1 - r2 - * 3. if r < 0 then r = r + b^k+1 - */ - mpi_set( r1, x ); - if( r1->nlimbs > k+1 ) /* quick modulo operation */ - r1->nlimbs = k+1; - mpi_mul( r2, r2, m ); - if( r2->nlimbs > k+1 ) /* quick modulo operation */ - r2->nlimbs = k+1; - mpi_sub( r, r1, r2 ); - - if( mpi_is_neg( r ) ) { - MPI tmp; - - tmp = mpi_alloc( k + 2 ); - mpi_set_ui( tmp, 1 ); - mpi_lshift_limbs( tmp, k+1 ); - mpi_add( r, r, tmp ); - mpi_free(tmp); - } - - /* 4. while r >= m do r = r - m */ - while( mpi_cmp( r, m ) >= 0 ) - mpi_sub( r, r, m ); - - return 0; -} -#endif /* USE_BARRETT */ - diff --git a/mpi/mpi-pow.c b/mpi/mpi-pow.c index e8d55f9b9..fbd2cb8ef 100644 --- a/mpi/mpi-pow.c +++ b/mpi/mpi-pow.c @@ -1,6 +1,6 @@ /* mpi-pow.c - MPI functions * Copyright (C) 1998 Free Software Foundation, Inc. - * Copyright (C) 1994, 1996 Free Software Foundation, Inc. + * Copyright (C) 1994, 1996, 2000 Free Software Foundation, Inc. * * This file is part of GnuPG. * @@ -30,6 +30,7 @@ #include <config.h> #include <stdio.h> #include <stdlib.h> +#include <string.h> #include "mpi-internal.h" #include "longlong.h" #include <assert.h> @@ -159,7 +160,9 @@ mpi_powm( MPI res, MPI base, MPI exp, MPI mod) int c; mpi_limb_t e; mpi_limb_t carry_limb; + struct karatsuba_ctx karactx; + memset( &karactx, 0, sizeof karactx ); negative_result = (ep[0] & 1) && base->sign; i = esize - 1; @@ -177,6 +180,7 @@ mpi_powm( MPI res, MPI base, MPI exp, MPI mod) * by RP (==RES->d), and with 50% probability in the area originally * pointed to by XP. */ + for(;;) { while( c ) { mpi_ptr_t tp; @@ -194,7 +198,6 @@ mpi_powm( MPI res, MPI base, MPI exp, MPI mod) mpi_free_limb_space( tspace ); tsize = 2 * rsize; tspace = mpi_alloc_limb_space( tsize, 0 ); - } mpih_sqr_n( xp, rp, rsize, tspace ); } @@ -209,7 +212,15 @@ mpi_powm( MPI res, MPI base, MPI exp, MPI mod) rsize = xsize; if( (mpi_limb_signed_t)e < 0 ) { - mpihelp_mul( xp, rp, rsize, bp, bsize ); + /*mpihelp_mul( xp, rp, rsize, bp, bsize );*/ + if( bsize < KARATSUBA_THRESHOLD ) { + mpihelp_mul( xp, rp, rsize, bp, bsize ); + } + else { + mpihelp_mul_karatsuba_case( + xp, rp, rsize, bp, bsize, &karactx ); + } + xsize = rsize + bsize; if( xsize > msize ) { mpihelp_divrem(xp + msize, 0, xp, xsize, mp, msize); @@ -258,6 +269,8 @@ mpi_powm( MPI res, MPI base, MPI exp, MPI mod) if( mod_shift_cnt ) mpihelp_rshift( rp, rp, rsize, mod_shift_cnt); MPN_NORMALIZE (rp, rsize); + + mpihelp_release_karatsuba_ctx( &karactx ); } if( negative_result && rsize ) { diff --git a/mpi/mpih-mul.c b/mpi/mpih-mul.c index 7707c0e30..3422f6541 100644 --- a/mpi/mpih-mul.c +++ b/mpi/mpih-mul.c @@ -1,5 +1,5 @@ /* mpihelp-mul.c - MPI helper functions - * Copyright (C) 1994, 1996, 1998, 1999 Free Software Foundation, Inc. + * Copyright (C) 1994, 1996, 1998, 1999, 2000 Free Software Foundation, Inc. * * This file is part of GnuPG. * @@ -29,6 +29,7 @@ #include <config.h> #include <stdio.h> #include <stdlib.h> +#include <string.h> #include "mpi-internal.h" #include "longlong.h" @@ -372,6 +373,88 @@ mpihelp_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) } + +void +mpihelp_mul_karatsuba_case( mpi_ptr_t prodp, + mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize, + struct karatsuba_ctx *ctx ) +{ + mpi_limb_t cy; + + if( !ctx->tspace || ctx->tspace_size < vsize ) { + if( ctx->tspace ) + mpi_free_limb_space( ctx->tspace ); + ctx->tspace = mpi_alloc_limb_space( 2 * vsize, + m_is_secure( up ) || m_is_secure( vp ) ); + ctx->tspace_size = vsize; + } + + MPN_MUL_N_RECURSE( prodp, up, vp, vsize, ctx->tspace ); + + prodp += vsize; + up += vsize; + usize -= vsize; + if( usize >= vsize ) { + if( !ctx->tp || ctx->tp_size < vsize ) { + if( ctx->tp ) + mpi_free_limb_space( ctx->tp ); + ctx->tp = mpi_alloc_limb_space( 2 * vsize, m_is_secure( up ) + || m_is_secure( vp ) ); + ctx->tp_size = vsize; + } + + do { + MPN_MUL_N_RECURSE( ctx->tp, up, vp, vsize, ctx->tspace ); + cy = mpihelp_add_n( prodp, prodp, ctx->tp, vsize ); + mpihelp_add_1( prodp + vsize, ctx->tp + vsize, vsize, cy ); + prodp += vsize; + up += vsize; + usize -= vsize; + } while( usize >= vsize ); + } + + if( usize ) { + #warning Must test this CODE!!! + g10_log_debug("this code path is not yet tested\n"); + if( usize < KARATSUBA_THRESHOLD ) { + mpihelp_mul( ctx->tspace, vp, vsize, up, usize ); + } + else { + if( !ctx->next ) { + ctx->next = m_alloc_clear( sizeof *ctx ); + } + mpihelp_mul_karatsuba_case( ctx->tspace, + vp, vsize, + up, usize, + ctx->next ); + } + + cy = mpihelp_add_n( prodp, prodp, ctx->tspace, vsize); + mpihelp_add_1( prodp + vsize, ctx->tspace + vsize, usize, cy ); + } +} + + +void +mpihelp_release_karatsuba_ctx( struct karatsuba_ctx *ctx ) +{ + struct karatsuba_ctx *ctx2; + + if( ctx->tp ) + mpi_free_limb_space( ctx->tp ); + if( ctx->tspace ) + mpi_free_limb_space( ctx->tspace ); + for( ctx=ctx->next; ctx; ctx = ctx2 ) { + ctx2 = ctx->next; + if( ctx->tp ) + mpi_free_limb_space( ctx->tp ); + if( ctx->tspace ) + mpi_free_limb_space( ctx->tspace ); + m_free( ctx ); + } +} + /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) * and v (pointed to by VP, with VSIZE limbs), and store the result at * PRODP. USIZE + VSIZE limbs are always stored, but if the input @@ -393,7 +476,7 @@ mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, { mpi_ptr_t prod_endp = prodp + usize + vsize - 1; mpi_limb_t cy; - mpi_ptr_t tspace; + struct karatsuba_ctx ctx; if( vsize < KARATSUBA_THRESHOLD ) { mpi_size_t i; @@ -437,34 +520,9 @@ mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, return cy; } - tspace = mpi_alloc_limb_space( 2 * vsize, - m_is_secure( up ) || m_is_secure( vp ) ); - MPN_MUL_N_RECURSE( prodp, up, vp, vsize, tspace ); - - prodp += vsize; - up += vsize; - usize -= vsize; - if( usize >= vsize ) { - mpi_ptr_t tp = mpi_alloc_limb_space( 2 * vsize, m_is_secure( up ) - || m_is_secure( vp ) ); - do { - MPN_MUL_N_RECURSE( tp, up, vp, vsize, tspace ); - cy = mpihelp_add_n( prodp, prodp, tp, vsize ); - mpihelp_add_1( prodp + vsize, tp + vsize, vsize, cy ); - prodp += vsize; - up += vsize; - usize -= vsize; - } while( usize >= vsize ); - mpi_free_limb_space( tp ); - } - - if( usize ) { - mpihelp_mul( tspace, vp, vsize, up, usize ); - cy = mpihelp_add_n( prodp, prodp, tspace, vsize); - mpihelp_add_1( prodp + vsize, tspace + vsize, usize, cy ); - } - - mpi_free_limb_space( tspace ); + memset( &ctx, 0, sizeof ctx ); + mpihelp_mul_karatsuba_case( prodp, up, usize, vp, vsize, &ctx ); + mpihelp_release_karatsuba_ctx( &ctx ); return *prod_endp; } |