aboutsummaryrefslogtreecommitdiffstats
path: root/scripts/conf-w32brg/aescrypt.asm
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/conf-w32brg/aescrypt.asm')
-rw-r--r--scripts/conf-w32brg/aescrypt.asm434
1 files changed, 434 insertions, 0 deletions
diff --git a/scripts/conf-w32brg/aescrypt.asm b/scripts/conf-w32brg/aescrypt.asm
new file mode 100644
index 000000000..a7afacaa1
--- /dev/null
+++ b/scripts/conf-w32brg/aescrypt.asm
@@ -0,0 +1,434 @@
+
+; ---------------------------------------------------------------------------
+; Copyright (c) 2002, Dr Brian Gladman, Worcester, UK. All rights reserved.
+;
+; LICENSE TERMS
+;
+; The free distribution and use of this software in both source and binary
+; form is allowed (with or without changes) provided that:
+;
+; 1. distributions of this source code include the above copyright
+; notice, this list of conditions and the following disclaimer;
+;
+; 2. distributions in binary form include the above copyright
+; notice, this list of conditions and the following disclaimer
+; in the documentation and/or other associated materials;
+;
+; 3. the copyright holder's name is not used to endorse products
+; built using this software without specific written permission.
+;
+; ALTERNATIVELY, provided that this notice is retained in full, this product
+; may be distributed under the terms of the GNU General Public License (GPL),
+; in which case the provisions of the GPL apply INSTEAD OF those given above.
+;
+; DISCLAIMER
+;
+; This software is provided 'as is' with no explicit or implied warranties
+; in respect of its properties, including, but not limited to, correctness
+; and/or fitness for purpose.
+; ---------------------------------------------------------------------------
+; Issue 30/06/2004
+
+; An AES implementation for Pentium processors using the NASM assembler (see
+; <http://sourceforge.net/projects/nasm>).This version provides the standard
+; AES block length (128 bits, 16 bytes) with the same interface as that used
+; in my C implementation. The eax, ecx and edx registers and the artihmetic
+; status flags are not preserved. The ebx, esi, edi, and ebp registers are
+; preserved across calls. Only encryption and decryption are provided here,
+; here, the key scheduling code being that in aeskey.c compiled with USE_ASM
+; defined. This code uses the VC++ register saving conentions; if it is used
+; with another compiler, its conventions for using and saving registers will
+; need to be checked (and calling conventions). The NASM command line for
+; the VC++ custom build step is:
+;
+; nasm -O2 -f win32 -o "$(TargetDir)\$(InputName).obj" "$(InputPath)"
+
+ section .text ; use32
+
+; aes_rval aes_encrypt(const unsigned char in_blk[],
+; unsigned char out_blk[], const aes_encrypt_ctx cx[1]);
+; aes_rval aes_decrypt(const unsigned char in_blk[],
+; unsigned char out_blk[], const aes_decrypt_ctx cx[1]);
+
+; Comment in/out the following lines to obtain the desired subroutines. These
+; selections MUST match those in the C header file aes.h
+
+%define AES_128 ; define if AES with 128 bit keys is needed
+%define AES_192 ; define if AES with 192 bit keys is needed
+%define AES_256 ; define if AES with 256 bit keys is needed
+%define AES_VAR ; define if a variable key size is needed
+%define ENCRYPTION ; define if encryption is needed
+%define DECRYPTION ; define if decryption is needed
+%define AES_REV_DKS ; define if key decryption schedule is reversed
+
+; The DLL interface must use the _stdcall convention in which the number
+; of bytes of parameter space is added after an @ to the sutine's name.
+; We must also remove our parameters from the stack before return (see
+; the do_ret macro). Define AES_DLL for the Dynamic Link Library version.
+
+;%define AES_DLL
+
+; End of user defines
+
+%ifdef AES_VAR
+%define KS_LENGTH 60
+%elifdef AES_256
+%define KS_LENGTH 60
+%elifdef AES_192
+%define KS_LENGTH 52
+%else
+%define KS_LENGTH 44
+%endif
+
+%define xf(x) (-16*x)
+
+%ifdef AES_REV_DKS
+%define xi(x) (-16*x)
+%else
+%define xi(x) (16*x)
+%endif
+
+tlen equ 1024 ; length of each of 4 'xor' arrays (256 32-bit words)
+
+; offsets to parameters with one register pushed onto stack
+
+in_blk equ 4 ; input byte array address parameter
+out_blk equ 8 ; output byte array address parameter
+
+ctx equ 12 ; AES context structure
+
+stk_spc equ 24 ; stack space
+
+; register mapping for encrypt and decrypt subroutines
+
+%define r0 eax
+%define r1 ebx
+%define r2 esi
+%define r3 edi
+%define r4 ecx
+%define r5 edx
+%define r6 ebp
+
+%define eaxl al
+%define eaxh ah
+%define ebxl bl
+%define ebxh bh
+%define ecxl cl
+%define ecxh ch
+%define edxl dl
+%define edxh dh
+
+; These macros take a 32-bit word representing a column and use each
+; of its 4 bytes to index a table of 256 32-bit words which are xored
+; into each of the four output columns. The output values are in the
+; registers %1, %2, %3 and %4 and the column input is in %5 with %6
+; as a scratch register.
+
+; Parameters:
+; %1 out_state[0]
+; %2 out_state[1]
+; %3 out_state[2]
+; %4 out_state[3]
+; %5 input register for the round (destroyed)
+; %6 scratch register for the round
+; %7 key schedule address for round (in form r6 + offset)
+
+%macro do_fcol 8 ; first column forward round
+
+ movzx %6,%5l
+ mov %1,[%8]
+ xor %1,[4*%6+%7]
+ movzx %6,%5h
+ shr %5,16
+ mov %2,[%8+12]
+ xor %2,[4*%6+%7+tlen]
+ movzx %6,%5l
+ mov %3,[%8+ 8]
+ xor %3,[4*%6+%7+2*tlen]
+ movzx %6,%5h
+ mov %5,%4 ; save an input register value
+ mov %4,[%8+ 4]
+ xor %4,[4*%6+%7+3*tlen]
+
+%endmacro
+
+%macro do_icol 8 ; first column for inverse round
+
+ movzx %6,%5l
+ mov %1,[%8]
+ xor %1,[4*%6+%7]
+ movzx %6,%5h
+ shr %5,16
+ mov %2,[%8+ 4]
+ xor %2,[4*%6+%7+tlen]
+ movzx %6,%5l
+ mov %3,[%8+ 8]
+ xor %3,[4*%6+%7+2*tlen]
+ movzx %6,%5h
+ mov %5,%4 ; save an input register value
+ mov %4,[%8+12]
+ xor %4,[4*%6+%7+3*tlen]
+
+%endmacro
+
+%macro do_col 7 ; other columns for forward and inverse rounds
+
+ movzx %6,%5l
+ xor %1,[4*%6+%7]
+ movzx %6,%5h
+ shr %5,16
+ xor %2,[4*%6+%7+tlen]
+ movzx %6,%5l
+ xor %3,[4*%6+%7+2*tlen]
+ movzx %6,%5h
+ xor %4,[4*%6+%7+3*tlen]
+
+%endmacro
+
+; These macros implement stack based local variables
+
+%macro save 2
+ mov [esp+4*%1],%2
+%endmacro
+
+%macro restore 2
+ mov %1,[esp+4*%2]
+%endmacro
+
+; This macro performs a forward encryption cycle. It is entered with
+; the first previous round column values in r0, r1, r2 and r3 and
+; exits with the final values in the same registers.
+
+%macro fwd_rnd 1-2 _t_fn ; normal forward rounds
+
+ mov r4,r0
+ save 0,r2
+ save 1,r3
+
+; compute new column values
+
+ do_fcol r0,r3,r2,r1, r4,r5, %2, %1 ; r4 = input r0
+ do_col r1,r0,r3,r2, r4,r5, %2 ; r4 = input r1 (saved in do_fcol)
+ restore r4,0
+ do_col r2,r1,r0,r3, r4,r5, %2 ; r4 = input r2
+ restore r4,1
+ do_col r3,r2,r1,r0, r4,r5, %2 ; r4 = input r3
+
+%endmacro
+
+; This macro performs an inverse encryption cycle. It is entered with
+; the first previous round column values in r0, r1, r2 and r3 and
+; exits with the final values in the same registers.
+
+%macro inv_rnd 1-2 _t_in ; normal inverse round
+
+ mov r4,r0
+ save 0,r1
+ save 1,r2
+
+; compute new column values
+
+ do_icol r0,r1,r2,r3, r4,r5, %2, %1 ; r4 = r0
+ do_col r3,r0,r1,r2, r4,r5, %2 ; r4 = r3 (saved in do_icol)
+ restore r4,1
+ do_col r2,r3,r0,r1, r4,r5, %2 ; r4 = r2
+ restore r4,0
+ do_col r1,r2,r3,r0, r4,r5, %2 ; r4 = r1
+
+%endmacro
+
+; the DLL has to implement the _stdcall calling interface on return
+; In this case we have to take our parameters (3 4-byte pointers)
+; off the stack
+
+%define parms 12
+
+%macro do_ret 0-1 parms
+%ifdef AES_DLL
+ ret %1
+%else
+ ret
+%endif
+%endmacro
+
+%macro do_name 1-2 parms
+%ifndef AES_DLL
+ global %1
+%1:
+%else
+ global %1@%2
+ export %1@%2
+%1@%2:
+%endif
+%endmacro
+
+; AES Encryption Subroutine
+
+%ifdef ENCRYPTION
+
+ extern _t_fn
+ extern _t_fl
+
+ do_name _aes_encrypt
+
+ sub esp,stk_spc
+ mov [esp+20],ebp
+ mov [esp+16],ebx
+ mov [esp+12],esi
+ mov [esp+ 8],edi
+
+ mov r6,[esp+ctx+stk_spc] ; key pointer
+ movzx r0,byte [r6+4*KS_LENGTH]
+ add r6,r0
+ mov [r6+16],al ; r0 = eax
+
+; input four columns and xor in first round key
+
+ mov r4,[esp+in_blk+stk_spc] ; input pointer
+ mov r0,[r4 ]
+ mov r1,[r4+ 4]
+ mov r2,[r4+ 8]
+ mov r3,[r4+12]
+
+ movzx r5,byte[r6+16]
+ lea r4,[r4+16]
+ neg r5
+
+ lea r4,[r5+r6]
+ xor r0,[r4 ]
+ xor r1,[r4+ 4]
+ xor r2,[r4+ 8]
+ xor r3,[r4+12]
+
+; determine the number of rounds
+
+ cmp r5,-10*16
+ je .3
+ cmp r5,-12*16
+ je .2
+ cmp r5,-14*16
+ je .1
+ mov eax,-1
+ jmp .5
+
+.1: fwd_rnd r6+xf(13) ; 14 rounds for 256-bit key
+ fwd_rnd r6+xf(12)
+.2: fwd_rnd r6+xf(11) ; 12 rounds for 192-bit key
+ fwd_rnd r6+xf(10)
+.3: fwd_rnd r6+xf( 9) ; 10 rounds for 128-bit key
+ fwd_rnd r6+xf( 8)
+ fwd_rnd r6+xf( 7)
+ fwd_rnd r6+xf( 6)
+ fwd_rnd r6+xf( 5)
+ fwd_rnd r6+xf( 4)
+ fwd_rnd r6+xf( 3)
+ fwd_rnd r6+xf( 2)
+ fwd_rnd r6+xf( 1)
+ fwd_rnd r6+xf( 0),_t_fl ; last round uses a different table
+
+; move final values to the output array
+
+ mov r4,[esp+out_blk+stk_spc]
+ mov [r4+12],r3
+ mov [r4+8],r2
+ mov [r4+4],r1
+ mov [r4],r0
+
+.5: mov ebp,[esp+20]
+ mov ebx,[esp+16]
+ mov esi,[esp+12]
+ mov edi,[esp+ 8]
+ lea esp,[esp+stk_spc]
+ do_ret
+
+%endif
+
+; AES Decryption Subroutine
+
+%ifdef DECRYPTION
+
+ extern _t_in
+ extern _t_il
+
+ do_name _aes_decrypt
+
+ sub esp,stk_spc
+ mov [esp+20],ebp
+ mov [esp+16],ebx
+ mov [esp+12],esi
+ mov [esp+ 8],edi
+
+ mov r6,[esp+ctx+stk_spc] ; key pointer
+%ifdef AES_REV_DKS
+ movzx r0,byte[r6+4*KS_LENGTH]
+ add r6,r0
+ mov [r6+16],al ; r0 = eax
+%endif
+
+; input four columns and xor in first round key
+
+ mov r4,[esp+in_blk+stk_spc] ; input pointer
+ mov r0,[r4 ]
+ mov r1,[r4+ 4]
+ mov r2,[r4+ 8]
+ mov r3,[r4+12]
+ lea r4,[r4+16]
+
+%ifdef AES_REV_DKS
+ movzx r5,byte[r6+16]
+ neg r5
+ lea r4,[r6+r5]
+%else
+ movzx r5,byte[r6+4*KS_LENGTH]
+ lea r4,[r6+r5]
+ neg r5
+%endif
+ xor r0,[r4 ]
+ xor r1,[r4+ 4]
+ xor r2,[r4+ 8]
+ xor r3,[r4+12]
+
+; determine the number of rounds
+
+ cmp r5,-10*16
+ je .3
+ cmp r5,-12*16
+ je .2
+ cmp r5,-14*16
+ je .1
+ mov eax,-1
+ jmp .5
+
+.1: inv_rnd r6+xi(13) ; 14 rounds for 256-bit key
+ inv_rnd r6+xi(12)
+.2: inv_rnd r6+xi(11) ; 12 rounds for 192-bit key
+ inv_rnd r6+xi(10)
+.3: inv_rnd r6+xi( 9) ; 10 rounds for 128-bit key
+ inv_rnd r6+xi( 8)
+ inv_rnd r6+xi( 7)
+ inv_rnd r6+xi( 6)
+ inv_rnd r6+xi( 5)
+ inv_rnd r6+xi( 4)
+ inv_rnd r6+xi( 3)
+ inv_rnd r6+xi( 2)
+ inv_rnd r6+xi( 1)
+ inv_rnd r6+xi( 0),_t_il ; last round uses a different table
+
+; move final values to the output array.
+
+ mov r4,[esp+out_blk+stk_spc]
+ mov [r4+12],r3
+ mov [r4+8],r2
+ mov [r4+4],r1
+ mov [r4],r0
+
+.5: mov ebp,[esp+20]
+ mov ebx,[esp+16]
+ mov esi,[esp+12]
+ mov edi,[esp+ 8]
+ lea esp,[esp+stk_spc]
+ do_ret
+
+%endif
+
+ end