diff options
author | Werner Koch <[email protected]> | 2000-07-17 14:32:21 +0000 |
---|---|---|
committer | Werner Koch <[email protected]> | 2000-07-17 14:32:21 +0000 |
commit | 0bf44b072ca648336bca9cf1ec24ea9d257cea9a (patch) | |
tree | 92df532ce40c43627283e16bb5a06f39bd3a466c /mpi/power | |
parent | See ChangeLog: Fri Jul 14 19:38:23 CEST 2000 Werner Koch (diff) | |
download | gnupg-0bf44b072ca648336bca9cf1ec24ea9d257cea9a.tar.gz gnupg-0bf44b072ca648336bca9cf1ec24ea9d257cea9a.zip |
See ChangeLog: Mon Jul 17 16:35:47 CEST 2000 Werner Koch
Diffstat (limited to '')
-rw-r--r-- | mpi/power/distfiles | 7 | ||||
-rw-r--r-- | mpi/power/mpih-add1.S | 86 | ||||
-rw-r--r-- | mpi/power/mpih-lshift.S | 64 | ||||
-rw-r--r-- | mpi/power/mpih-mul1.S | 115 | ||||
-rw-r--r-- | mpi/power/mpih-mul2.S | 130 | ||||
-rw-r--r-- | mpi/power/mpih-mul3.S | 135 | ||||
-rw-r--r-- | mpi/power/mpih-rshift.S | 64 | ||||
-rw-r--r-- | mpi/power/mpih-sub1.S | 87 |
8 files changed, 688 insertions, 0 deletions
diff --git a/mpi/power/distfiles b/mpi/power/distfiles index e69de29bb..e664c8db6 100644 --- a/mpi/power/distfiles +++ b/mpi/power/distfiles @@ -0,0 +1,7 @@ +mpih-add1.S +mpih-lshift.S +mpih-mul1.S +mpih-mul2.S +mpih-mul3.S +mpih-rshift.S +mpih-sub1.S diff --git a/mpi/power/mpih-add1.S b/mpi/power/mpih-add1.S new file mode 100644 index 000000000..ad27f3d81 --- /dev/null +++ b/mpi/power/mpih-add1.S @@ -0,0 +1,86 @@ +/* IBM POWER add_n -- Add two limb vectors of equal, non-zero length. + * + * Copyright (C) 1992, 1994, 1995, 1996, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + +/* +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# s2_ptr r5 +# size r6 + */ + + .toc + .extern mpihelp_add_n[DS] + .extern .mpihelp_add_n +.csect [PR] + .align 2 + .globl mpihelp_add_n + .globl .mpihelp_add_n + .csect mpihelp_add_n[DS] +mpihelp_add_n: + .long .mpihelp_add_n, TOC[tc0], 0 + .csect [PR] +.mpihelp_add_n: + andil. 10,6,1 # odd or even number of limbs? + l 8,0(4) # load least significant s1 limb + l 0,0(5) # load least significant s2 limb + cal 3,-4(3) # offset res_ptr, it's updated before it's used + sri 10,6,1 # count for unrolled loop + a 7,0,8 # add least significant limbs, set cy + mtctr 10 # copy count into CTR + beq 0,Leven # branch if even # of limbs (# of limbs >= 2) + +# We have an odd # of limbs. Add the first limbs separately. + cmpi 1,10,0 # is count for unrolled loop zero? + bne 1,L1 # branch if not + st 7,4(3) + aze 3,10 # use the fact that r10 is zero... + br # return + +# We added least significant limbs. Now reload the next limbs to enter loop. +L1: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + stu 7,4(3) + ae 7,0,8 # add limbs, set cy +Leven: lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + bdz Lend # If done, skip loop + +Loop: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + ae 11,9,10 # add previous limbs with cy, set cy + stu 7,4(3) # + lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + ae 7,0,8 # add previous limbs with cy, set cy + stu 11,4(3) # + bdn Loop # decrement CTR and loop back + +Lend: ae 11,9,10 # add limbs with cy, set cy + st 7,4(3) # + st 11,8(3) # + lil 3,0 # load cy into ... + aze 3,3 # ... return value register + br + diff --git a/mpi/power/mpih-lshift.S b/mpi/power/mpih-lshift.S new file mode 100644 index 000000000..5c53a0ae6 --- /dev/null +++ b/mpi/power/mpih-lshift.S @@ -0,0 +1,64 @@ +/* IBM POWER lshift + * + * Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + +/* +# INPUT PARAMETERS +# res_ptr r3 +# s_ptr r4 +# size r5 +# cnt r6 + */ + + .toc + .extern mpihelp_lshift[DS] + .extern .mpihelp_lshift +.csect [PR] + .align 2 + .globl mpihelp_lshift + .globl .mpihelp_lshift + .csect mpihelp_lshift[DS] +mpihelp_lshift: + .long .mpihelp_lshift, TOC[tc0], 0 + .csect [PR] +.mpihelp_lshift: + sli 0,5,2 + cax 9,3,0 + cax 4,4,0 + sfi 8,6,32 + mtctr 5 # put limb count in CTR loop register + lu 0,-4(4) # read most significant limb + sre 3,0,8 # compute carry out limb, and init MQ register + bdz Lend2 # if just one limb, skip loop + lu 0,-4(4) # read 2:nd most significant limb + sreq 7,0,8 # compute most significant limb of result + bdz Lend # if just two limb, skip loop +Loop: lu 0,-4(4) # load next lower limb + stu 7,-4(9) # store previous result during read latency + sreq 7,0,8 # compute result limb + bdn Loop # loop back until CTR is zero +Lend: stu 7,-4(9) # store 2:nd least significant limb +Lend2: sle 7,0,6 # compute least significant limb + st 7,-4(9) # store it + br + diff --git a/mpi/power/mpih-mul1.S b/mpi/power/mpih-mul1.S new file mode 100644 index 000000000..3b71b5aa9 --- /dev/null +++ b/mpi/power/mpih-mul1.S @@ -0,0 +1,115 @@ +/* IBM POWER mul_1 -- Multiply a limb vector with a limb and store + * the result in a second limb vector. + * + * Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + +/* +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To +# obtain that operation, we have to use the 32x32->64 signed multiplication +# instruction, and add the appropriate compensation to the high limb of the +# result. We add the multiplicand if the multiplier has its most significant +# bit set, and we add the multiplier if the multiplicand has its most +# significant bit set. We need to preserve the carry flag between each +# iteration, so we have to compute the compensation carefully (the natural, +# srai+and doesn't work). Since the POWER architecture has a branch unit +# we can branch in zero cycles, so that's how we perform the additions. + */ + + .toc + .csect .mpihelp_mul_1[PR] + .align 2 + .globl mpihelp_mul_1 + .globl .mpihelp_mul_1 + .csect mpihelp_mul_1[DS] +mpihelp_mul_1: + .long .mpihelp_mul_1[PR], TOC[tc0], 0 + .csect .mpihelp_mul_1[PR] +.mpihelp_mul_1: + + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 8 + ai 0,0,0 # reset carry + cax 9,9,7 + blt Lneg +Lpos: bdz Lend +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 8,0,9 + bge Lp0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 8,0,10 + bge Lp1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Lp1: bdn Lploop + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + cax 10,10,0 # adjust high limb for negative s2_limb + mfmq 0 + ae 8,0,9 + bge Ln0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + cax 9,9,0 # adjust high limb for negative s2_limb + mfmq 0 + ae 8,0,10 + bge Ln1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br + diff --git a/mpi/power/mpih-mul2.S b/mpi/power/mpih-mul2.S new file mode 100644 index 000000000..19ddee86d --- /dev/null +++ b/mpi/power/mpih-mul2.S @@ -0,0 +1,130 @@ +/* IBM POWER addmul_1 -- Multiply a limb vector with a limb and add + * the result to a second limb vector. + * + * Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + + + +/* +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To +# obtain that operation, we have to use the 32x32->64 signed multiplication +# instruction, and add the appropriate compensation to the high limb of the +# result. We add the multiplicand if the multiplier has its most significant +# bit set, and we add the multiplier if the multiplicand has its most +# significant bit set. We need to preserve the carry flag between each +# iteration, so we have to compute the compensation carefully (the natural, +# srai+and doesn't work). Since the POWER architecture has a branch unit +# we can branch in zero cycles, so that's how we perform the additions. + */ + + .toc + .csect .mpihelp_addmul_1[PR] + .align 2 + .globl mpihelp_addmul_1 + .globl .mpihelp_addmul_1 + .csect mpihelp_addmul_1[DS] +mpihelp_addmul_1: + .long .mpihelp_addmul_1[PR], TOC[tc0], 0 + .csect .mpihelp_addmul_1[PR] +.mpihelp_addmul_1: + + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 8 + cax 9,9,7 + l 7,4(3) + a 8,8,7 # add res_limb + blt Lneg +Lpos: bdz Lend + +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 8,0,9 # low limb + old_cy_limb + old cy + l 7,4(3) + aze 10,10 # propagate cy to new cy_limb + a 8,8,7 # add res_limb + bge Lp0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 8,0,10 + l 7,4(3) + aze 9,9 + a 8,8,7 + bge Lp1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Lp1: bdn Lploop + + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 7 + ae 8,7,9 + l 7,4(3) + ae 10,10,0 # propagate cy to new cy_limb + a 8,8,7 # add res_limb + bge Ln0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 7 + ae 8,7,10 + l 7,4(3) + ae 9,9,0 # propagate cy to new cy_limb + a 8,8,7 # add res_limb + bge Ln1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br + diff --git a/mpi/power/mpih-mul3.S b/mpi/power/mpih-mul3.S new file mode 100644 index 000000000..e875e88ea --- /dev/null +++ b/mpi/power/mpih-mul3.S @@ -0,0 +1,135 @@ +/* IBM POWER submul_1 -- Multiply a limb vector with a limb and subtract + * the result from a second limb vector. + * + * Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + + +/* + +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# size r5 +# s2_limb r6 + +# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction. To +# obtain that operation, we have to use the 32x32->64 signed multiplication +# instruction, and add the appropriate compensation to the high limb of the +# result. We add the multiplicand if the multiplier has its most significant +# bit set, and we add the multiplier if the multiplicand has its most +# significant bit set. We need to preserve the carry flag between each +# iteration, so we have to compute the compensation carefully (the natural, +# srai+and doesn't work). Since the POWER architecture has a branch unit +# we can branch in zero cycles, so that's how we perform the additions. + */ + + .toc + .csect .mpihelp_submul_1[PR] + .align 2 + .globl mpihelp_submul_1 + .globl .mpihelp_submul_1 + .csect mpihelp_submul_1[DS] +mpihelp_submul_1: + .long .mpihelp_submul_1[PR], TOC[tc0], 0 + .csect .mpihelp_submul_1[PR] +.mpihelp_submul_1: + + cal 3,-4(3) + l 0,0(4) + cmpi 0,6,0 + mtctr 5 + mul 9,0,6 + srai 7,0,31 + and 7,7,6 + mfmq 11 + cax 9,9,7 + l 7,4(3) + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + blt Lneg +Lpos: bdz Lend + +Lploop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 0 + ae 11,0,9 # low limb + old_cy_limb + old cy + l 7,4(3) + aze 10,10 # propagate cy to new cy_limb + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + bge Lp0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Lp0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 0 + ae 11,0,10 + l 7,4(3) + aze 9,9 + sf 8,11,7 + a 11,8,11 # invert cy (r11 is junk) + bge Lp1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Lp1: bdn Lploop + + b Lend + +Lneg: cax 9,9,0 + bdz Lend +Lnloop: lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 10,0,6 + mfmq 7 + ae 11,7,9 + l 7,4(3) + ae 10,10,0 # propagate cy to new cy_limb + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + bge Ln0 + cax 10,10,6 # adjust high limb for negative limb from s1 +Ln0: bdz Lend0 + lu 0,4(4) + stu 8,4(3) + cmpi 0,0,0 + mul 9,0,6 + mfmq 7 + ae 11,7,10 + l 7,4(3) + ae 9,9,0 # propagate cy to new cy_limb + sf 8,11,7 # add res_limb + a 11,8,11 # invert cy (r11 is junk) + bge Ln1 + cax 9,9,6 # adjust high limb for negative limb from s1 +Ln1: bdn Lnloop + b Lend + +Lend0: cal 9,0(10) +Lend: st 8,4(3) + aze 3,9 + br + diff --git a/mpi/power/mpih-rshift.S b/mpi/power/mpih-rshift.S new file mode 100644 index 000000000..e29645072 --- /dev/null +++ b/mpi/power/mpih-rshift.S @@ -0,0 +1,64 @@ +/* IBM POWER rshift + * + * Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + + +/* +# INPUT PARAMETERS +# res_ptr r3 +# s_ptr r4 +# size r5 +# cnt r6 +*/ + + .toc + .extern mpihelp_rshift[DS] + .extern .mpihelp_rshift +.csect [PR] + .align 2 + .globl mpihelp_rshift + .globl .mpihelp_rshift + .csect mpihelp_rshift[DS] +mpihelp_rshift: + .long .mpihelp_rshift, TOC[tc0], 0 + .csect [PR] +.mpihelp_rshift: + sfi 8,6,32 + mtctr 5 # put limb count in CTR loop register + l 0,0(4) # read least significant limb + ai 9,3,-4 # adjust res_ptr since it's offset in the stu:s + sle 3,0,8 # compute carry limb, and init MQ register + bdz Lend2 # if just one limb, skip loop + lu 0,4(4) # read 2:nd least significant limb + sleq 7,0,8 # compute least significant limb of result + bdz Lend # if just two limb, skip loop +Loop: lu 0,4(4) # load next higher limb + stu 7,4(9) # store previous result during read latency + sleq 7,0,8 # compute result limb + bdn Loop # loop back until CTR is zero +Lend: stu 7,4(9) # store 2:nd most significant limb +Lend2: sre 7,0,6 # compute most significant limb + st 7,4(9) # store it + br + + diff --git a/mpi/power/mpih-sub1.S b/mpi/power/mpih-sub1.S new file mode 100644 index 000000000..a3605533e --- /dev/null +++ b/mpi/power/mpih-sub1.S @@ -0,0 +1,87 @@ +/* IBM POWER sub_n -- Subtract two limb vectors of equal, non-zero length. + * + * Copyright (C) 1992, 1994, 1995, 1996, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "sysdep.h" +#include "asm-syntax.h" + +/* +# INPUT PARAMETERS +# res_ptr r3 +# s1_ptr r4 +# s2_ptr r5 +# size r6 + */ + + .toc + .extern mpihelp_sub_n[DS] + .extern .mpihelp_sub_n +.csect [PR] + .align 2 + .globl mpihelp_sub_n + .globl .mpihelp_sub_n + .csect mpihelp_sub_n[DS] +mpihelp_sub_n: + .long .mpihelp_sub_n, TOC[tc0], 0 + .csect [PR] +.mpihelp_sub_n: + andil. 10,6,1 # odd or even number of limbs? + l 8,0(4) # load least significant s1 limb + l 0,0(5) # load least significant s2 limb + cal 3,-4(3) # offset res_ptr, it's updated before it's used + sri 10,6,1 # count for unrolled loop + sf 7,0,8 # subtract least significant limbs, set cy + mtctr 10 # copy count into CTR + beq 0,Leven # branch if even # of limbs (# of limbs >= 2) + +# We have an odd # of limbs. Add the first limbs separately. + cmpi 1,10,0 # is count for unrolled loop zero? + bne 1,L1 # branch if not + st 7,4(3) + sfe 3,0,0 # load !cy into ... + sfi 3,3,0 # ... return value register + br # return + +# We added least significant limbs. Now reload the next limbs to enter loop. +L1: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + stu 7,4(3) + sfe 7,0,8 # subtract limbs, set cy +Leven: lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + bdz Lend # If done, skip loop + +Loop: lu 8,4(4) # load s1 limb and update s1_ptr + lu 0,4(5) # load s2 limb and update s2_ptr + sfe 11,10,9 # subtract previous limbs with cy, set cy + stu 7,4(3) # + lu 9,4(4) # load s1 limb and update s1_ptr + lu 10,4(5) # load s2 limb and update s2_ptr + sfe 7,0,8 # subtract previous limbs with cy, set cy + stu 11,4(3) # + bdn Loop # decrement CTR and loop back + +Lend: sfe 11,10,9 # subtract limbs with cy, set cy + st 7,4(3) # + st 11,8(3) # + sfe 3,0,0 # load !cy into ... + sfi 3,3,0 # ... return value register + br + |