aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWerner Koch <[email protected]>2008-03-22 17:01:37 +0000
committerWerner Koch <[email protected]>2008-03-22 17:01:37 +0000
commit537cada38ede5174f8500d0b81459d87b3da16ac (patch)
tree6ebef3677b121c9154f2e912ccab9da654cec469
parent* parse-packet.c (parse_key): Parse a secret key encrypted with (diff)
downloadgnupg-537cada38ede5174f8500d0b81459d87b3da16ac.tar.gz
gnupg-537cada38ede5174f8500d0b81459d87b3da16ac.zip
Improved AES performance.
-rw-r--r--NEWS8
-rw-r--r--cipher/ChangeLog10
-rw-r--r--cipher/algorithms.h8
-rw-r--r--cipher/cipher.c301
-rw-r--r--cipher/rijndael.c290
-rw-r--r--configure.ac2
-rw-r--r--doc/ChangeLog6
-rw-r--r--doc/README.W325
-rw-r--r--scripts/ChangeLog4
-rw-r--r--scripts/w32installer.nsi2
10 files changed, 393 insertions, 243 deletions
diff --git a/NEWS b/NEWS
index 8ab4e0896..03e9ae6dc 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,11 @@
+Noteworthy changes in version 1.4.9 (unreleased)
+------------------------------------------------
+
+ * Improved AES encryption performance by more than 20% (on ia32).
+ Decryption is also a bit faster.
+
+
+
Noteworthy changes in version 1.4.8 (2007-12-20)
------------------------------------------------
diff --git a/cipher/ChangeLog b/cipher/ChangeLog
index b01d0a6ef..ce3f04349 100644
--- a/cipher/ChangeLog
+++ b/cipher/ChangeLog
@@ -1,3 +1,13 @@
+2008-03-22 Werner Koch <[email protected]>
+
+ * cipher.c (struct cipher_handle_s): Make sure IV is u32
+ aligned. Change all users of IV.
+ (do_cfb_encrypt): Optimize and use bulk code for AES.
+ (do_cfb_decrypt): Ditto.
+ * rijndael.c (do_encrypt): Remove.
+ (do_encrypt_aligned, do_encrypt): New. Taken from libgcrypt.
+ (rijndael_cfb_enc, rijndael_cfb_dec): New.
+
2007-12-12 Werner Koch <[email protected]>
* pubkey.c (pubkey_encrypt, pubkey_decrypt): Allow type 20 keys.
diff --git a/cipher/algorithms.h b/cipher/algorithms.h
index 5c972f291..9deefb4d8 100644
--- a/cipher/algorithms.h
+++ b/cipher/algorithms.h
@@ -118,8 +118,6 @@ twofish_get_info( int algo, size_t *keylen,
void (**decryptf)( void *c, byte *outbuf, const byte *inbuf )
);
-/* this is just a kludge for the time we have not yet changed the cipher
- * stuff to the scheme we use for random and digests */
const char *
rijndael_get_info( int algo, size_t *keylen,
size_t *blocksize, size_t *contextsize,
@@ -127,6 +125,12 @@ rijndael_get_info( int algo, size_t *keylen,
void (**encryptf)(void *c, byte *outbuf, const byte *inbuf),
void (**decryptf)(void *c, byte *outbuf, const byte *inbuf)
);
+void rijndael_cfb_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ unsigned int nblocks);
+void rijndael_cfb_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ unsigned int nblocks);
const char *
idea_get_info( int algo, size_t *keylen,
diff --git a/cipher/cipher.c b/cipher/cipher.c
index a6489d9f5..9d9c82293 100644
--- a/cipher/cipher.c
+++ b/cipher/cipher.c
@@ -1,6 +1,6 @@
/* cipher.c - cipher dispatcher
* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
- * 2007 Free Software Foundation, Inc.
+ * 2007, 2008 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
@@ -52,17 +52,26 @@ static struct cipher_table_s cipher_table[TABLE_SIZE];
static int disabled_algos[TABLE_SIZE];
-struct cipher_handle_s {
- int algo;
- int mode;
- size_t blocksize;
- byte iv[MAX_BLOCKSIZE]; /* (this should be ulong aligned) */
- byte lastiv[MAX_BLOCKSIZE];
- int unused; /* in IV */
- int (*setkey)( void *c, const byte *key, unsigned keylen );
- void (*encrypt)( void *c, byte *outbuf, const byte *inbuf );
- void (*decrypt)( void *c, byte *outbuf, const byte *inbuf );
- PROPERLY_ALIGNED_TYPE context;
+struct cipher_handle_s
+{
+ int algo;
+ int mode;
+ size_t blocksize;
+
+ /* The initialization vector. To help code optimization we make
+ sure that it is aligned on an unsigned long and u32 boundary. */
+ union {
+ unsigned long dummy_ul_iv;
+ u32 dummy_u32_iv;
+ unsigned char iv[MAX_BLOCKSIZE];
+ } u_iv;
+
+ byte lastiv[MAX_BLOCKSIZE];
+ int unused; /* in IV */
+ int (*setkey)( void *c, const byte *key, unsigned keylen );
+ void (*encrypt)( void *c, byte *outbuf, const byte *inbuf );
+ void (*decrypt)( void *c, byte *outbuf, const byte *inbuf );
+ PROPERLY_ALIGNED_TYPE context;
};
@@ -459,14 +468,14 @@ cipher_setkey( CIPHER_HANDLE c, byte *key, unsigned keylen )
void
cipher_setiv( CIPHER_HANDLE c, const byte *iv, unsigned ivlen )
{
- memset( c->iv, 0, c->blocksize );
+ memset( c->u_iv.iv, 0, c->blocksize );
if( iv ) {
if( ivlen != c->blocksize )
log_info("WARNING: cipher_setiv: ivlen=%u blklen=%u\n",
ivlen, (unsigned)c->blocksize );
if( ivlen > c->blocksize )
ivlen = c->blocksize;
- memcpy( c->iv, iv, ivlen );
+ memcpy( c->u_iv.iv, iv, ivlen );
}
c->unused = 0;
}
@@ -507,10 +516,10 @@ do_cbc_encrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nblocks )
/* fixme: the xor should works on words and not on
* bytes. Maybe it is a good idea to enhance the cipher backend
* API to allow for CBC handling in the backend */
- for(ivp=c->iv,i=0; i < blocksize; i++ )
+ for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
outbuf[i] = inbuf[i] ^ *ivp++;
(*c->encrypt)( &c->context.c, outbuf, outbuf );
- memcpy(c->iv, outbuf, blocksize );
+ memcpy(c->u_iv.iv, outbuf, blocksize );
inbuf += c->blocksize;
outbuf += c->blocksize;
}
@@ -530,9 +539,9 @@ do_cbc_decrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nblocks )
* for this here because it is not used otherwise */
memcpy(c->lastiv, inbuf, blocksize );
(*c->decrypt)( &c->context.c, outbuf, inbuf );
- for(ivp=c->iv,i=0; i < blocksize; i++ )
+ for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
outbuf[i] ^= *ivp++;
- memcpy(c->iv, c->lastiv, blocksize );
+ memcpy(c->u_iv.iv, c->lastiv, blocksize );
inbuf += c->blocksize;
outbuf += c->blocksize;
}
@@ -542,119 +551,181 @@ do_cbc_decrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nblocks )
static void
do_cfb_encrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nbytes )
{
- byte *ivp;
- size_t blocksize = c->blocksize;
+ byte *ivp;
+ size_t blocksize = c->blocksize;
+ size_t blocksize_x_2 = blocksize + blocksize;
- if( nbytes <= c->unused ) {
- /* short enough to be encoded by the remaining XOR mask */
- /* XOR the input with the IV and store input into IV */
- for(ivp=c->iv+c->blocksize - c->unused; nbytes; nbytes--, c->unused-- )
+ if ( nbytes <= c->unused )
+ {
+ /* Short enough to be encoded by the remaining XOR mask. XOR
+ the input with the IV and store input into IV. */
+ for (ivp=c->u_iv.iv+c->blocksize - c->unused; nbytes;
+ nbytes--, c->unused-- )
*outbuf++ = (*ivp++ ^= *inbuf++);
return;
}
-
- if( c->unused ) {
- /* XOR the input with the IV and store input into IV */
- nbytes -= c->unused;
- for(ivp=c->iv+blocksize - c->unused; c->unused; c->unused-- )
- *outbuf++ = (*ivp++ ^= *inbuf++);
+
+ if ( c->unused )
+ {
+ /* XOR the input with the IV and store input into IV. */
+ nbytes -= c->unused;
+ for (ivp=c->u_iv.iv+blocksize - c->unused; c->unused; c->unused-- )
+ *outbuf++ = (*ivp++ ^= *inbuf++);
}
- /* Now we can process complete blocks. */
-#if 0
- /* Experimental code. We may only use this for standard CFB
- because for Phil's mode we need to save the IV of before the
- last encryption - we don't want to do this in tghe fasf CFB
- encryption routine. */
- if (c->algo == CIPHER_ALGO_AES
- && nbytes >= blocksize
- && c->mode != CIPHER_MODE_PHILS_CFB) {
- size_t n;
-
- memcpy( c->lastiv, c->iv, blocksize );
- n = (nbytes / blocksize) * blocksize;
- rijndael_cfb_encrypt (&c->context.c, c->iv, outbuf, inbuf, n);
- inbuf += n;
- outbuf += n;
- nbytes -= n;
+ /* Now we can process complete blocks. We use a loop as long as we
+ have at least 2 blocks and use conditions for the rest. This
+ also allows to use a bulk encryption function if available. */
+#ifdef USE_AES
+ if (nbytes >= blocksize_x_2
+ && (c->algo == CIPHER_ALGO_AES
+ || c->algo == CIPHER_ALGO_AES256
+ || c->algo == CIPHER_ALGO_AES192))
+ {
+ unsigned int nblocks = nbytes / blocksize;
+ rijndael_cfb_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks);
+ outbuf += nblocks * blocksize;
+ inbuf += nblocks * blocksize;
+ nbytes -= nblocks * blocksize;
}
-#endif
- while( nbytes >= blocksize ) {
- int i;
- /* encrypt the IV (and save the current one) */
- memcpy( c->lastiv, c->iv, blocksize );
- (*c->encrypt)( &c->context.c, c->iv, c->iv );
- /* XOR the input with the IV and store input into IV */
- for(ivp=c->iv,i=0; i < blocksize; i++ )
- *outbuf++ = (*ivp++ ^= *inbuf++);
- nbytes -= blocksize;
+ else
+#endif /*USE_AES*/
+ {
+ while ( nbytes >= blocksize_x_2 )
+ {
+ int i;
+ /* Encrypt the IV. */
+ c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+ /* XOR the input with the IV and store input into IV. */
+ for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
+ *outbuf++ = (*ivp++ ^= *inbuf++);
+ nbytes -= blocksize;
+ }
}
- if( nbytes ) { /* process the remaining bytes */
- /* encrypt the IV (and save the current one) */
- memcpy( c->lastiv, c->iv, blocksize );
- (*c->encrypt)( &c->context.c, c->iv, c->iv );
- c->unused = blocksize;
- /* and apply the xor */
- c->unused -= nbytes;
- for(ivp=c->iv; nbytes; nbytes-- )
- *outbuf++ = (*ivp++ ^= *inbuf++);
+
+ if ( nbytes >= blocksize )
+ {
+ int i;
+ /* Save the current IV and then encrypt the IV. */
+ memcpy( c->lastiv, c->u_iv.iv, blocksize );
+ c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+ /* XOR the input with the IV and store input into IV */
+ for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
+ *outbuf++ = (*ivp++ ^= *inbuf++);
+ nbytes -= blocksize;
+ }
+ if ( nbytes )
+ {
+ /* Save the current IV and then encrypt the IV. */
+ memcpy (c->lastiv, c->u_iv.iv, blocksize );
+ c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+ c->unused = blocksize;
+ /* Apply the XOR. */
+ c->unused -= nbytes;
+ for(ivp=c->u_iv.iv; nbytes; nbytes-- )
+ *outbuf++ = (*ivp++ ^= *inbuf++);
}
}
+
static void
do_cfb_decrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nbytes )
{
- byte *ivp;
- ulong temp;
- size_t blocksize = c->blocksize;
-
- if( nbytes <= c->unused ) {
- /* short enough to be encoded by the remaining XOR mask */
- /* XOR the input with the IV and store input into IV */
- for(ivp=c->iv+blocksize - c->unused; nbytes; nbytes--,c->unused--){
- temp = *inbuf++;
- *outbuf++ = *ivp ^ temp;
- *ivp++ = temp;
- }
- return;
+ unsigned char *ivp;
+ unsigned long temp;
+ int i;
+ size_t blocksize = c->blocksize;
+ size_t blocksize_x_2 = blocksize + blocksize;
+
+ if (nbytes <= c->unused)
+ {
+ /* Short enough to be encoded by the remaining XOR mask. */
+ /* XOR the input with the IV and store input into IV. */
+ for (ivp=c->u_iv.iv+blocksize - c->unused;
+ nbytes;
+ nbytes--, c->unused--)
+ {
+ temp = *inbuf++;
+ *outbuf++ = *ivp ^ temp;
+ *ivp++ = temp;
+ }
+ return;
}
-
- if( c->unused ) {
- /* XOR the input with the IV and store input into IV */
- nbytes -= c->unused;
- for(ivp=c->iv+blocksize - c->unused; c->unused; c->unused-- ) {
- temp = *inbuf++;
- *outbuf++ = *ivp ^ temp;
- *ivp++ = temp;
- }
+
+ if (c->unused)
+ {
+ /* XOR the input with the IV and store input into IV. */
+ nbytes -= c->unused;
+ for (ivp=c->u_iv.iv+blocksize - c->unused; c->unused; c->unused-- )
+ {
+ temp = *inbuf++;
+ *outbuf++ = *ivp ^ temp;
+ *ivp++ = temp;
+ }
+ }
+
+ /* Now we can process complete blocks. We use a loop as long as we
+ have at least 2 blocks and use conditions for the rest. This
+ also allows to use a bulk encryption function if available. */
+#ifdef USE_AES
+ if (nbytes >= blocksize_x_2
+ && (c->algo == CIPHER_ALGO_AES
+ || c->algo == CIPHER_ALGO_AES256
+ || c->algo == CIPHER_ALGO_AES192))
+ {
+ unsigned int nblocks = nbytes / blocksize;
+ rijndael_cfb_dec (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks);
+ outbuf += nblocks * blocksize;
+ inbuf += nblocks * blocksize;
+ nbytes -= nblocks * blocksize;
+ }
+ else
+#endif /*USE_AES*/
+ {
+ while (nbytes >= blocksize_x_2 )
+ {
+ /* Encrypt the IV. */
+ c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+ /* XOR the input with the IV and store input into IV. */
+ for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
+ {
+ temp = *inbuf++;
+ *outbuf++ = *ivp ^ temp;
+ *ivp++ = temp;
+ }
+ nbytes -= blocksize;
+ }
}
- /* now we can process complete blocks */
- while( nbytes >= blocksize ) {
- int i;
- /* encrypt the IV (and save the current one) */
- memcpy( c->lastiv, c->iv, blocksize );
- (*c->encrypt)( &c->context.c, c->iv, c->iv );
- /* XOR the input with the IV and store input into IV */
- for(ivp=c->iv,i=0; i < blocksize; i++ ) {
- temp = *inbuf++;
- *outbuf++ = *ivp ^ temp;
- *ivp++ = temp;
- }
- nbytes -= blocksize;
+ if (nbytes >= blocksize )
+ {
+ /* Save the current IV and then encrypt the IV. */
+ memcpy ( c->lastiv, c->u_iv.iv, blocksize);
+ c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+ /* XOR the input with the IV and store input into IV */
+ for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
+ {
+ temp = *inbuf++;
+ *outbuf++ = *ivp ^ temp;
+ *ivp++ = temp;
+ }
+ nbytes -= blocksize;
}
- if( nbytes ) { /* process the remaining bytes */
- /* encrypt the IV (and save the current one) */
- memcpy( c->lastiv, c->iv, blocksize );
- (*c->encrypt)( &c->context.c, c->iv, c->iv );
- c->unused = blocksize;
- /* and apply the xor */
- c->unused -= nbytes;
- for(ivp=c->iv; nbytes; nbytes-- ) {
- temp = *inbuf++;
- *outbuf++ = *ivp ^ temp;
- *ivp++ = temp;
- }
+
+ if (nbytes)
+ {
+ /* Save the current IV and then encrypt the IV. */
+ memcpy ( c->lastiv, c->u_iv.iv, blocksize );
+ c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+ c->unused = blocksize;
+ /* Apply the XOR. */
+ c->unused -= nbytes;
+ for (ivp=c->u_iv.iv; nbytes; nbytes-- )
+ {
+ temp = *inbuf++;
+ *outbuf++ = *ivp ^ temp;
+ *ivp++ = temp;
+ }
}
}
@@ -732,8 +803,8 @@ void
cipher_sync( CIPHER_HANDLE c )
{
if( c->mode == CIPHER_MODE_PHILS_CFB && c->unused ) {
- memmove(c->iv + c->unused, c->iv, c->blocksize - c->unused );
- memcpy(c->iv, c->lastiv + c->blocksize - c->unused, c->unused);
+ memmove(c->u_iv.iv + c->unused, c->u_iv.iv, c->blocksize - c->unused );
+ memcpy(c->u_iv.iv, c->lastiv + c->blocksize - c->unused, c->unused);
c->unused = 0;
}
}
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 644b75c42..a67cbc56f 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -1,5 +1,5 @@
/* Rijndael (AES) for GnuPG
- * Copyright (C) 2000, 2001 Free Software Foundation, Inc.
+ * Copyright (C) 2000, 2001, 2008 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
@@ -1866,142 +1866,125 @@ prepare_decryption( RIJNDAEL_context *ctx )
-/* Encrypt one block. A and B may be the same. */
+/* Encrypt one block. A and B need to be aligned on a 4 byte
+ boundary. A and B may be the same. */
static void
-do_encrypt (const RIJNDAEL_context *ctx, byte *b, const byte *a)
+do_encrypt_aligned (const RIJNDAEL_context *ctx,
+ unsigned char *b, const unsigned char *a)
{
- int r;
- byte temp[4][4];
- int ROUNDS = ctx->ROUNDS;
#define rk (ctx->keySched)
+ int ROUNDS = ctx->ROUNDS;
+ int r;
+ union
+ {
+ u32 tempu32[4]; /* Force correct alignment. */
+ byte temp[4][4];
+ } u;
- *((u32*)temp[0]) = *((u32*)(a )) ^ *((u32*)rk[0][0]);
- *((u32*)temp[1]) = *((u32*)(a+ 4)) ^ *((u32*)rk[0][1]);
- *((u32*)temp[2]) = *((u32*)(a+ 8)) ^ *((u32*)rk[0][2]);
- *((u32*)temp[3]) = *((u32*)(a+12)) ^ *((u32*)rk[0][3]);
- *((u32*)(b )) = *((u32*)T1[temp[0][0]])
- ^ *((u32*)T2[temp[1][1]])
- ^ *((u32*)T3[temp[2][2]])
- ^ *((u32*)T4[temp[3][3]]);
- *((u32*)(b + 4)) = *((u32*)T1[temp[1][0]])
- ^ *((u32*)T2[temp[2][1]])
- ^ *((u32*)T3[temp[3][2]])
- ^ *((u32*)T4[temp[0][3]]);
- *((u32*)(b + 8)) = *((u32*)T1[temp[2][0]])
- ^ *((u32*)T2[temp[3][1]])
- ^ *((u32*)T3[temp[0][2]])
- ^ *((u32*)T4[temp[1][3]]);
- *((u32*)(b +12)) = *((u32*)T1[temp[3][0]])
- ^ *((u32*)T2[temp[0][1]])
- ^ *((u32*)T3[temp[1][2]])
- ^ *((u32*)T4[temp[2][3]]);
- for (r = 1; r < ROUNDS-1; r++) {
- *((u32*)temp[0]) = *((u32*)(b )) ^ *((u32*)rk[r][0]);
- *((u32*)temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[r][1]);
- *((u32*)temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[r][2]);
- *((u32*)temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[r][3]);
-
- *((u32*)(b )) = *((u32*)T1[temp[0][0]])
- ^ *((u32*)T2[temp[1][1]])
- ^ *((u32*)T3[temp[2][2]])
- ^ *((u32*)T4[temp[3][3]]);
- *((u32*)(b + 4)) = *((u32*)T1[temp[1][0]])
- ^ *((u32*)T2[temp[2][1]])
- ^ *((u32*)T3[temp[3][2]])
- ^ *((u32*)T4[temp[0][3]]);
- *((u32*)(b + 8)) = *((u32*)T1[temp[2][0]])
- ^ *((u32*)T2[temp[3][1]])
- ^ *((u32*)T3[temp[0][2]])
- ^ *((u32*)T4[temp[1][3]]);
- *((u32*)(b +12)) = *((u32*)T1[temp[3][0]])
- ^ *((u32*)T2[temp[0][1]])
- ^ *((u32*)T3[temp[1][2]])
- ^ *((u32*)T4[temp[2][3]]);
+ *((u32*)u.temp[0]) = *((u32*)(a )) ^ *((u32*)rk[0][0]);
+ *((u32*)u.temp[1]) = *((u32*)(a+ 4)) ^ *((u32*)rk[0][1]);
+ *((u32*)u.temp[2]) = *((u32*)(a+ 8)) ^ *((u32*)rk[0][2]);
+ *((u32*)u.temp[3]) = *((u32*)(a+12)) ^ *((u32*)rk[0][3]);
+ *((u32*)(b )) = (*((u32*)T1[u.temp[0][0]])
+ ^ *((u32*)T2[u.temp[1][1]])
+ ^ *((u32*)T3[u.temp[2][2]])
+ ^ *((u32*)T4[u.temp[3][3]]));
+ *((u32*)(b + 4)) = (*((u32*)T1[u.temp[1][0]])
+ ^ *((u32*)T2[u.temp[2][1]])
+ ^ *((u32*)T3[u.temp[3][2]])
+ ^ *((u32*)T4[u.temp[0][3]]));
+ *((u32*)(b + 8)) = (*((u32*)T1[u.temp[2][0]])
+ ^ *((u32*)T2[u.temp[3][1]])
+ ^ *((u32*)T3[u.temp[0][2]])
+ ^ *((u32*)T4[u.temp[1][3]]));
+ *((u32*)(b +12)) = (*((u32*)T1[u.temp[3][0]])
+ ^ *((u32*)T2[u.temp[0][1]])
+ ^ *((u32*)T3[u.temp[1][2]])
+ ^ *((u32*)T4[u.temp[2][3]]));
+
+ for (r = 1; r < ROUNDS-1; r++)
+ {
+ *((u32*)u.temp[0]) = *((u32*)(b )) ^ *((u32*)rk[r][0]);
+ *((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[r][1]);
+ *((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[r][2]);
+ *((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[r][3]);
+
+ *((u32*)(b )) = (*((u32*)T1[u.temp[0][0]])
+ ^ *((u32*)T2[u.temp[1][1]])
+ ^ *((u32*)T3[u.temp[2][2]])
+ ^ *((u32*)T4[u.temp[3][3]]));
+ *((u32*)(b + 4)) = (*((u32*)T1[u.temp[1][0]])
+ ^ *((u32*)T2[u.temp[2][1]])
+ ^ *((u32*)T3[u.temp[3][2]])
+ ^ *((u32*)T4[u.temp[0][3]]));
+ *((u32*)(b + 8)) = (*((u32*)T1[u.temp[2][0]])
+ ^ *((u32*)T2[u.temp[3][1]])
+ ^ *((u32*)T3[u.temp[0][2]])
+ ^ *((u32*)T4[u.temp[1][3]]));
+ *((u32*)(b +12)) = (*((u32*)T1[u.temp[3][0]])
+ ^ *((u32*)T2[u.temp[0][1]])
+ ^ *((u32*)T3[u.temp[1][2]])
+ ^ *((u32*)T4[u.temp[2][3]]));
}
- /* last round is special */
- *((u32*)temp[0]) = *((u32*)(b )) ^ *((u32*)rk[ROUNDS-1][0]);
- *((u32*)temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[ROUNDS-1][1]);
- *((u32*)temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[ROUNDS-1][2]);
- *((u32*)temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[ROUNDS-1][3]);
- b[ 0] = T1[temp[0][0]][1];
- b[ 1] = T1[temp[1][1]][1];
- b[ 2] = T1[temp[2][2]][1];
- b[ 3] = T1[temp[3][3]][1];
- b[ 4] = T1[temp[1][0]][1];
- b[ 5] = T1[temp[2][1]][1];
- b[ 6] = T1[temp[3][2]][1];
- b[ 7] = T1[temp[0][3]][1];
- b[ 8] = T1[temp[2][0]][1];
- b[ 9] = T1[temp[3][1]][1];
- b[10] = T1[temp[0][2]][1];
- b[11] = T1[temp[1][3]][1];
- b[12] = T1[temp[3][0]][1];
- b[13] = T1[temp[0][1]][1];
- b[14] = T1[temp[1][2]][1];
- b[15] = T1[temp[2][3]][1];
- *((u32*)(b )) ^= *((u32*)rk[ROUNDS][0]);
- *((u32*)(b+ 4)) ^= *((u32*)rk[ROUNDS][1]);
- *((u32*)(b+ 8)) ^= *((u32*)rk[ROUNDS][2]);
- *((u32*)(b+12)) ^= *((u32*)rk[ROUNDS][3]);
+
+ /* Last round is special. */
+ *((u32*)u.temp[0]) = *((u32*)(b )) ^ *((u32*)rk[ROUNDS-1][0]);
+ *((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[ROUNDS-1][1]);
+ *((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[ROUNDS-1][2]);
+ *((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[ROUNDS-1][3]);
+ b[ 0] = T1[u.temp[0][0]][1];
+ b[ 1] = T1[u.temp[1][1]][1];
+ b[ 2] = T1[u.temp[2][2]][1];
+ b[ 3] = T1[u.temp[3][3]][1];
+ b[ 4] = T1[u.temp[1][0]][1];
+ b[ 5] = T1[u.temp[2][1]][1];
+ b[ 6] = T1[u.temp[3][2]][1];
+ b[ 7] = T1[u.temp[0][3]][1];
+ b[ 8] = T1[u.temp[2][0]][1];
+ b[ 9] = T1[u.temp[3][1]][1];
+ b[10] = T1[u.temp[0][2]][1];
+ b[11] = T1[u.temp[1][3]][1];
+ b[12] = T1[u.temp[3][0]][1];
+ b[13] = T1[u.temp[0][1]][1];
+ b[14] = T1[u.temp[1][2]][1];
+ b[15] = T1[u.temp[2][3]][1];
+ *((u32*)(b )) ^= *((u32*)rk[ROUNDS][0]);
+ *((u32*)(b+ 4)) ^= *((u32*)rk[ROUNDS][1]);
+ *((u32*)(b+ 8)) ^= *((u32*)rk[ROUNDS][2]);
+ *((u32*)(b+12)) ^= *((u32*)rk[ROUNDS][3]);
#undef rk
}
+
static void
-rijndael_encrypt (void *ctx, byte *b, const byte *a)
+do_encrypt (const RIJNDAEL_context *ctx,
+ unsigned char *bx, const unsigned char *ax)
{
- do_encrypt (ctx, b, a);
- burn_stack (16 + 2*sizeof(int));
+ /* BX and AX are not necessary correctly aligned. Thus we need to
+ copy them here. */
+ union
+ {
+ u32 dummy[4];
+ byte a[16];
+ } a;
+ union
+ {
+ u32 dummy[4];
+ byte b[16];
+ } b;
+
+ memcpy (a.a, ax, 16);
+ do_encrypt_aligned (ctx, b.b, a.a);
+ memcpy (bx, b.b, 16);
}
-#if 0
-/* Experimental code. Needs to be generalized and we might want to
- have variants for all possible sizes of the largest scalar type.
- Also need to make sure that INBUF and OUTBUF are properlu
- aligned. */
-void
-rijndael_cfb_encrypt (void *ctx, byte *iv,
- byte *outbuf, const byte *inbuf, size_t nbytes)
+
+static void
+rijndael_encrypt (void *ctx, byte *b, const byte *a)
{
-/* if ( ((unsigned long)inbuf & 3) || ((unsigned long)outbuf & 3) ) */
-/* { */
- /* Not properly aligned, use the slow version. Actually the
- compiler might even optimize it this pretty well if the
- target CPU has relaxed alignment requirements. Thus it is
- questionable whether we should at all go into the hassles of
- doing alignment wise optimizations by ourself. A quick test
- with gcc 4.0 on ia32 did showed any advantages. */
- byte *ivp;
- int i;
-
- while (nbytes >= 16)
- {
- do_encrypt (ctx, iv, iv);
- for (i=0, ivp = iv; i < 16; i++)
- *outbuf++ = (*ivp++ ^= *inbuf++);
- nbytes -= 16;
- }
-/* } */
-/* else */
-/* { */
-/* u32 *ivp; */
-/* u32 *ob = (u32*)outbuf; */
-/* const u32 *ib = (const u32*)inbuf; */
-
-/* while (nbytes >= 16) */
-/* { */
-/* do_encrypt (ctx, iv, iv); */
-/* ivp = iv; */
-/* *ob++ = (*ivp++ ^= *ib++); */
-/* *ob++ = (*ivp++ ^= *ib++); */
-/* *ob++ = (*ivp++ ^= *ib++); */
-/* *ob++ = (*ivp ^= *ib++); */
-/* nbytes -= 16; */
-/* } */
-/* } */
- burn_stack (16 + 2*sizeof(int));
+ do_encrypt (ctx, b, a);
+ burn_stack (16 + 2*sizeof(int));
}
-#endif
-
@@ -2097,6 +2080,67 @@ rijndael_decrypt (void *ctx, byte *b, const byte *a)
do_decrypt (ctx, b, a);
burn_stack (16+2*sizeof(int));
}
+
+
+
+/* Bulk encryption of complete blocks in CFB mode. Caller needs to
+ make sure that IV is aligned on an unsigned long boundary. This
+ function is only intended for the bulk encryption feature of
+ cipher.c. */
+void
+rijndael_cfb_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ unsigned int nblocks)
+{
+ RIJNDAEL_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char *ivp;
+ int i;
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* Encrypt the IV. */
+ do_encrypt_aligned (ctx, iv, iv);
+ /* XOR the input with the IV and store input into IV. */
+ for (ivp=iv,i=0; i < 16; i++ )
+ *outbuf++ = (*ivp++ ^= *inbuf++);
+ }
+
+ burn_stack (16 + 2*sizeof(int));
+}
+
+/* Bulk decryption of complete blocks in CFB mode. Caller needs to
+ make sure that IV is aligned on an unisgned lonhg boundary. This
+ function is only intended for the bulk encryption feature of
+ cipher.c. */
+void
+rijndael_cfb_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ unsigned int nblocks)
+{
+ RIJNDAEL_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char *ivp;
+ unsigned char temp;
+ int i;
+
+ for ( ;nblocks; nblocks-- )
+ {
+ do_encrypt_aligned (ctx, iv, iv);
+ for (ivp=iv,i=0; i < 16; i++ )
+ {
+ temp = *inbuf++;
+ *outbuf++ = *ivp ^ temp;
+ *ivp++ = temp;
+ }
+ }
+
+ burn_stack (16 + 2*sizeof(int));
+}
+
+
/* Test a single encryption and decryption with each key size. */
diff --git a/configure.ac b/configure.ac
index 9974ca1e4..340f9eb33 100644
--- a/configure.ac
+++ b/configure.ac
@@ -25,7 +25,7 @@ min_automake_version="1.9.3"
# Remember to change the version number immediately *after* a release.
# Set my_issvn to "yes" for non-released code. Remember to run an
# "svn up" and "autogen.sh --force" right before creating a distribution.
-m4_define([my_version], [1.4.9rc1])
+m4_define([my_version], [1.4.9])
m4_define([my_issvn], [yes])
m4_define([svn_revision], m4_esyscmd([echo $((svn info 2>/dev/null \
diff --git a/doc/ChangeLog b/doc/ChangeLog
index d7bf91b2b..80dd2bc8e 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,9 @@
+2007-12-21 Werner Koch <[email protected]>
+
+ * README.W32: Tell that Vista is supported and that at least NT-4
+ is required. It might still work on older systems, but I don't
+ know for sure.
+
2007-12-12 Werner Koch <[email protected]>
* gpg.texi, specify-user-id.texi: Update from gnupg-2.
diff --git a/doc/README.W32 b/doc/README.W32
index b734cb32b..8abfc24a7 100644
--- a/doc/README.W32
+++ b/doc/README.W32
@@ -1,7 +1,8 @@
README.W32 -*- text -*-
-This is a binary package with GnuPG for MS-Windows 95, 98, WNT, W2000
-and XP. See the file README for generic instructions and usage hints.
+This is a binary package with GnuPG for MS-Windows NT-4, W2000, XP and
+Vista. A native version for 64 bit is not available. See the file
+README for generic instructions and usage hints.
A FAQ comes with this package and a probably more recent one can be
found online at http://www.gnupg.org/faq.html. See
diff --git a/scripts/ChangeLog b/scripts/ChangeLog
index 92d5999ec..a6b536f12 100644
--- a/scripts/ChangeLog
+++ b/scripts/ChangeLog
@@ -1,3 +1,7 @@
+2008-01-30 Werner Koch <[email protected]>
+
+ * w32installer.nsi: Set the OutPath back.
+
2007-12-12 Werner Koch <[email protected]>
* config.sub, config.guess: Update to version 2007-11-19.
diff --git a/scripts/w32installer.nsi b/scripts/w32installer.nsi
index 327182d5b..841850c20 100644
--- a/scripts/w32installer.nsi
+++ b/scripts/w32installer.nsi
@@ -351,6 +351,8 @@ Section "-Finish"
WriteRegStr HKCU "Software\GNU\GnuPG" "Lang" $R3
;;
+ # Set the Outpath pack so that the README file can be displayed.
+ SetOutPath "$INSTDIR"
SectionEnd ; "-Finish"