aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWerner Koch <[email protected]>2005-08-11 16:57:29 +0000
committerWerner Koch <[email protected]>2005-08-11 16:57:29 +0000
commit0a3eda24ee244534ab3081257e9777f1d67de058 (patch)
tree414b7e229c41c35177d2e3dc642671dab0a37a18
parent* configure.ac: Remove hardcoded -I and -L for /usr/local on FreeBSD. (diff)
downloadgnupg-0a3eda24ee244534ab3081257e9777f1d67de058.tar.gz
gnupg-0a3eda24ee244534ab3081257e9777f1d67de058.zip
Experimental code to improve AES performance. Got about 25% on ia32.
Diffstat (limited to '')
-rw-r--r--cipher/ChangeLog6
-rw-r--r--cipher/cipher.c20
-rw-r--r--cipher/rijndael.c51
3 files changed, 76 insertions, 1 deletions
diff --git a/cipher/ChangeLog b/cipher/ChangeLog
index b5732524d..f0e7efb0f 100644
--- a/cipher/ChangeLog
+++ b/cipher/ChangeLog
@@ -1,3 +1,9 @@
+2005-08-11 Werner Koch <[email protected]>
+
+ * rijndael.c (rijndael_cfb_encrypt): Experimental code to improve
+ AES performance. Got about 25% on ia32.
+ * cipher.c (do_cfb_encrypt): Ditto.
+
2005-06-07 David Shaw <[email protected]>
* random.c: Fix prototype of the fast random gatherer. Noted by
diff --git a/cipher/cipher.c b/cipher/cipher.c
index 591ce208e..311919fe1 100644
--- a/cipher/cipher.c
+++ b/cipher/cipher.c
@@ -536,7 +536,25 @@ do_cfb_encrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nbytes )
*outbuf++ = (*ivp++ ^= *inbuf++);
}
- /* now we can process complete blocks */
+ /* Now we can process complete blocks. */
+#if 0
+ /* Experimental code. We may only use this for standard CFB
+ because for Phil's mode we need to save the IV of before the
+ last encryption - we don't want to do this in tghe fasf CFB
+ encryption routine. */
+ if (c->algo == CIPHER_ALGO_AES
+ && nbytes >= blocksize
+ && c->mode != CIPHER_MODE_PHILS_CFB) {
+ size_t n;
+
+ memcpy( c->lastiv, c->iv, blocksize );
+ n = (nbytes / blocksize) * blocksize;
+ rijndael_cfb_encrypt (&c->context.c, c->iv, outbuf, inbuf, n);
+ inbuf += n;
+ outbuf += n;
+ nbytes -= n;
+ }
+#endif
while( nbytes >= blocksize ) {
int i;
/* encrypt the IV (and save the current one) */
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index adf276531..e52e01e96 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -1955,6 +1955,57 @@ rijndael_encrypt (void *ctx, byte *b, const byte *a)
burn_stack (16 + 2*sizeof(int));
}
+#if 0
+/* Experimental code. Needs to be generalized and we might want to
+ have variants for all possible sizes of the largest scalar type.
+ Also need to make sure that INBUF and OUTBUF are properlu
+ aligned. */
+void
+rijndael_cfb_encrypt (void *ctx, byte *iv,
+ byte *outbuf, const byte *inbuf, size_t nbytes)
+{
+/* if ( ((unsigned long)inbuf & 3) || ((unsigned long)outbuf & 3) ) */
+/* { */
+ /* Not properly aligned, use the slow version. Actually the
+ compiler might even optimize it this pretty well if the
+ target CPU has relaxed alignment requirements. Thus it is
+ questionable whether we should at all go into the hassles of
+ doing alignment wise optimizations by ourself. A quick test
+ with gcc 4.0 on ia32 did showed any advantages. */
+ byte *ivp;
+ int i;
+
+ while (nbytes >= 16)
+ {
+ do_encrypt (ctx, iv, iv);
+ for (i=0, ivp = iv; i < 16; i++)
+ *outbuf++ = (*ivp++ ^= *inbuf++);
+ nbytes -= 16;
+ }
+/* } */
+/* else */
+/* { */
+/* u32 *ivp; */
+/* u32 *ob = (u32*)outbuf; */
+/* const u32 *ib = (const u32*)inbuf; */
+
+/* while (nbytes >= 16) */
+/* { */
+/* do_encrypt (ctx, iv, iv); */
+/* ivp = iv; */
+/* *ob++ = (*ivp++ ^= *ib++); */
+/* *ob++ = (*ivp++ ^= *ib++); */
+/* *ob++ = (*ivp++ ^= *ib++); */
+/* *ob++ = (*ivp ^= *ib++); */
+/* nbytes -= 16; */
+/* } */
+/* } */
+ burn_stack (16 + 2*sizeof(int));
+}
+#endif
+
+
+
/* Decrypt one block. a and b may be the same. */
static void