See ChangeLog: Mon Jul 17 16:35:47 CEST 2000 Werner Koch

author: Werner Koch <[email protected]> 2000-07-17 14:32:21 +0000
committer: Werner Koch <[email protected]> 2000-07-17 14:32:21 +0000
commit: 0bf44b072ca648336bca9cf1ec24ea9d257cea9a (patch)
tree: 92df532ce40c43627283e16bb5a06f39bd3a466c /mpi/power
parent: See ChangeLog: Fri Jul 14 19:38:23 CEST 2000 Werner Koch (diff)
download: gnupg-0bf44b072ca648336bca9cf1ec24ea9d257cea9a.tar.gz
gnupg-0bf44b072ca648336bca9cf1ec24ea9d257cea9a.zip
8 files changed, 688 insertions, 0 deletions
diff --git a/mpi/power/distfiles b/mpi/power/distfiles
index e69de29bb..e664c8db6 100644
--- a/mpi/power/distfiles
+++ b/mpi/power/distfiles
@@ -0,0 +1,7 @@
+mpih-add1.S
+mpih-lshift.S
+mpih-mul1.S
+mpih-mul2.S
+mpih-mul3.S
+mpih-rshift.S
+mpih-sub1.S
diff --git a/mpi/power/mpih-add1.S b/mpi/power/mpih-add1.S
new file mode 100644
index 000000000..ad27f3d81
--- /dev/null
+++ b/mpi/power/mpih-add1.S
@@ -0,0 +1,86 @@
+/* IBM POWER add_n -- Add two limb vectors of equal, non-zero length.
+ *
+ * Copyright (C) 1992, 1994, 1995, 1996, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+/*
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# s2_ptr	r5
+# size		r6
+ */
+
+	.toc
+	.extern mpihelp_add_n[DS]
+	.extern .mpihelp_add_n
+.csect [PR]
+	.align 2
+	.globl mpihelp_add_n
+	.globl .mpihelp_add_n
+	.csect mpihelp_add_n[DS]
+mpihelp_add_n:
+	.long .mpihelp_add_n, TOC[tc0], 0
+	.csect [PR]
+.mpihelp_add_n:
+	andil.	10,6,1		# odd or even number of limbs?
+	l	8,0(4)		# load least significant s1 limb
+	l	0,0(5)		# load least significant s2 limb
+	cal	3,-4(3) 	# offset res_ptr, it's updated before it's used
+	sri	10,6,1		# count for unrolled loop
+	a	7,0,8		# add least significant limbs, set cy
+	mtctr	10		# copy count into CTR
+	beq	0,Leven 	# branch if even # of limbs (# of limbs >= 2)
+
+# We have an odd # of limbs.  Add the first limbs separately.
+	cmpi	1,10,0		# is count for unrolled loop zero?
+	bne	1,L1		# branch if not
+	st	7,4(3)
+	aze	3,10		# use the fact that r10 is zero...
+	br			# return
+
+# We added least significant limbs.  Now reload the next limbs to enter loop.
+L1:	lu	8,4(4)		# load s1 limb and update s1_ptr
+	lu	0,4(5)		# load s2 limb and update s2_ptr
+	stu	7,4(3)
+	ae	7,0,8		# add limbs, set cy
+Leven:	lu	9,4(4)		# load s1 limb and update s1_ptr
+	lu	10,4(5) 	# load s2 limb and update s2_ptr
+	bdz	Lend		# If done, skip loop
+
+Loop:	lu	8,4(4)		# load s1 limb and update s1_ptr
+	lu	0,4(5)		# load s2 limb and update s2_ptr
+	ae	11,9,10 	# add previous limbs with cy, set cy
+	stu	7,4(3)		#
+	lu	9,4(4)		# load s1 limb and update s1_ptr
+	lu	10,4(5) 	# load s2 limb and update s2_ptr
+	ae	7,0,8		# add previous limbs with cy, set cy
+	stu	11,4(3) 	#
+	bdn	Loop		# decrement CTR and loop back
+
+Lend:	ae	11,9,10 	# add limbs with cy, set cy
+	st	7,4(3)		#
+	st	11,8(3) 	#
+	lil	3,0		# load cy into ...
+	aze	3,3		# ... return value register
+	br
+
diff --git a/mpi/power/mpih-lshift.S b/mpi/power/mpih-lshift.S
new file mode 100644
index 000000000..5c53a0ae6
--- /dev/null
+++ b/mpi/power/mpih-lshift.S
@@ -0,0 +1,64 @@
+/* IBM POWER lshift
+ *
+ * Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+/*
+# INPUT PARAMETERS
+# res_ptr	r3
+# s_ptr 	r4
+# size		r5
+# cnt		r6
+ */
+
+	.toc
+	.extern mpihelp_lshift[DS]
+	.extern .mpihelp_lshift
+.csect [PR]
+	.align 2
+	.globl mpihelp_lshift
+	.globl .mpihelp_lshift
+	.csect mpihelp_lshift[DS]
+mpihelp_lshift:
+	.long .mpihelp_lshift, TOC[tc0], 0
+	.csect [PR]
+.mpihelp_lshift:
+	sli	0,5,2
+	cax	9,3,0
+	cax	4,4,0
+	sfi	8,6,32
+	mtctr	5		# put limb count in CTR loop register
+	lu	0,-4(4) 	# read most significant limb
+	sre	3,0,8		# compute carry out limb, and init MQ register
+	bdz	Lend2		# if just one limb, skip loop
+	lu	0,-4(4) 	# read 2:nd most significant limb
+	sreq	7,0,8		# compute most significant limb of result
+	bdz	Lend		# if just two limb, skip loop
+Loop:	lu	0,-4(4) 	# load next lower limb
+	stu	7,-4(9) 	# store previous result during read latency
+	sreq	7,0,8		# compute result limb
+	bdn	Loop		# loop back until CTR is zero
+Lend:	stu	7,-4(9) 	# store 2:nd least significant limb
+Lend2:	sle	7,0,6		# compute least significant limb
+	st	7,-4(9) 	# store it
+	br
+
diff --git a/mpi/power/mpih-mul1.S b/mpi/power/mpih-mul1.S
new file mode 100644
index 000000000..3b71b5aa9
--- /dev/null
+++ b/mpi/power/mpih-mul1.S
@@ -0,0 +1,115 @@
+/* IBM POWER  mul_1 -- Multiply a limb vector with a limb and store
+ * the result in a second limb vector.
+ *
+ * Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+/*
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# size		r5
+# s2_limb	r6
+
+# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction.  To
+# obtain that operation, we have to use the 32x32->64 signed multiplication
+# instruction, and add the appropriate compensation to the high limb of the
+# result.  We add the multiplicand if the multiplier has its most significant
+# bit set, and we add the multiplier if the multiplicand has its most
+# significant bit set.	We need to preserve the carry flag between each
+# iteration, so we have to compute the compensation carefully (the natural,
+# srai+and doesn't work).  Since the POWER architecture has a branch unit
+# we can branch in zero cycles, so that's how we perform the additions.
+ */
+
+	.toc
+	.csect .mpihelp_mul_1[PR]
+	.align 2
+	.globl mpihelp_mul_1
+	.globl .mpihelp_mul_1
+	.csect mpihelp_mul_1[DS]
+mpihelp_mul_1:
+	.long .mpihelp_mul_1[PR], TOC[tc0], 0
+	.csect .mpihelp_mul_1[PR]
+.mpihelp_mul_1:
+
+	cal	3,-4(3)
+	l	0,0(4)
+	cmpi	0,6,0
+	mtctr	5
+	mul	9,0,6
+	srai	7,0,31
+	and	7,7,6
+	mfmq	8
+	ai	0,0,0		# reset carry
+	cax	9,9,7
+	blt	Lneg
+Lpos:	bdz	Lend
+Lploop: lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	mfmq	0
+	ae	8,0,9
+	bge	Lp0
+	cax	10,10,6 	# adjust high limb for negative limb from s1
+Lp0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	mfmq	0
+	ae	8,0,10
+	bge	Lp1
+	cax	9,9,6		# adjust high limb for negative limb from s1
+Lp1:	bdn	Lploop
+	b	Lend
+
+Lneg:	cax	9,9,0
+	bdz	Lend
+Lnloop: lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	cax	10,10,0 	# adjust high limb for negative s2_limb
+	mfmq	0
+	ae	8,0,9
+	bge	Ln0
+	cax	10,10,6 	# adjust high limb for negative limb from s1
+Ln0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	cax	9,9,0		# adjust high limb for negative s2_limb
+	mfmq	0
+	ae	8,0,10
+	bge	Ln1
+	cax	9,9,6		# adjust high limb for negative limb from s1
+Ln1:	bdn	Lnloop
+	b	Lend
+
+Lend0:	cal	9,0(10)
+Lend:	st	8,4(3)
+	aze	3,9
+	br
+
diff --git a/mpi/power/mpih-mul2.S b/mpi/power/mpih-mul2.S
new file mode 100644
index 000000000..19ddee86d
--- /dev/null
+++ b/mpi/power/mpih-mul2.S
@@ -0,0 +1,130 @@
+/* IBM POWER addmul_1 -- Multiply a limb vector with a limb and add
+ *			 the result to a second limb vector.
+ *
+ * Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+
+/*
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# size		r5
+# s2_limb	r6
+
+# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction.  To
+# obtain that operation, we have to use the 32x32->64 signed multiplication
+# instruction, and add the appropriate compensation to the high limb of the
+# result.  We add the multiplicand if the multiplier has its most significant
+# bit set, and we add the multiplier if the multiplicand has its most
+# significant bit set.	We need to preserve the carry flag between each
+# iteration, so we have to compute the compensation carefully (the natural,
+# srai+and doesn't work).  Since the POWER architecture has a branch unit
+# we can branch in zero cycles, so that's how we perform the additions.
+ */
+
+	.toc
+	.csect .mpihelp_addmul_1[PR]
+	.align 2
+	.globl mpihelp_addmul_1
+	.globl .mpihelp_addmul_1
+	.csect mpihelp_addmul_1[DS]
+mpihelp_addmul_1:
+	.long .mpihelp_addmul_1[PR], TOC[tc0], 0
+	.csect .mpihelp_addmul_1[PR]
+.mpihelp_addmul_1:
+
+	cal	3,-4(3)
+	l	0,0(4)
+	cmpi	0,6,0
+	mtctr	5
+	mul	9,0,6
+	srai	7,0,31
+	and	7,7,6
+	mfmq	8
+	cax	9,9,7
+	l	7,4(3)
+	a	8,8,7		# add res_limb
+	blt	Lneg
+Lpos:	bdz	Lend
+
+Lploop: lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	mfmq	0
+	ae	8,0,9		# low limb + old_cy_limb + old cy
+	l	7,4(3)
+	aze	10,10		# propagate cy to new cy_limb
+	a	8,8,7		# add res_limb
+	bge	Lp0
+	cax	10,10,6 	# adjust high limb for negative limb from s1
+Lp0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	mfmq	0
+	ae	8,0,10
+	l	7,4(3)
+	aze	9,9
+	a	8,8,7
+	bge	Lp1
+	cax	9,9,6		# adjust high limb for negative limb from s1
+Lp1:	bdn	Lploop
+
+	b	Lend
+
+Lneg:	cax	9,9,0
+	bdz	Lend
+Lnloop: lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	mfmq	7
+	ae	8,7,9
+	l	7,4(3)
+	ae	10,10,0 	# propagate cy to new cy_limb
+	a	8,8,7		# add res_limb
+	bge	Ln0
+	cax	10,10,6 	# adjust high limb for negative limb from s1
+Ln0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	mfmq	7
+	ae	8,7,10
+	l	7,4(3)
+	ae	9,9,0		# propagate cy to new cy_limb
+	a	8,8,7		# add res_limb
+	bge	Ln1
+	cax	9,9,6		# adjust high limb for negative limb from s1
+Ln1:	bdn	Lnloop
+	b	Lend
+
+Lend0:	cal	9,0(10)
+Lend:	st	8,4(3)
+	aze	3,9
+	br
+
diff --git a/mpi/power/mpih-mul3.S b/mpi/power/mpih-mul3.S
new file mode 100644
index 000000000..e875e88ea
--- /dev/null
+++ b/mpi/power/mpih-mul3.S
@@ -0,0 +1,135 @@
+/* IBM POWER submul_1 -- Multiply a limb vector with a limb and subtract
+ *			 the result from a second limb vector.
+ *
+ * Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*
+
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# size		r5
+# s2_limb	r6
+
+# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction.  To
+# obtain that operation, we have to use the 32x32->64 signed multiplication
+# instruction, and add the appropriate compensation to the high limb of the
+# result.  We add the multiplicand if the multiplier has its most significant
+# bit set, and we add the multiplier if the multiplicand has its most
+# significant bit set.	We need to preserve the carry flag between each
+# iteration, so we have to compute the compensation carefully (the natural,
+# srai+and doesn't work).  Since the POWER architecture has a branch unit
+# we can branch in zero cycles, so that's how we perform the additions.
+ */
+
+	.toc
+	.csect .mpihelp_submul_1[PR]
+	.align 2
+	.globl mpihelp_submul_1
+	.globl .mpihelp_submul_1
+	.csect mpihelp_submul_1[DS]
+mpihelp_submul_1:
+	.long .mpihelp_submul_1[PR], TOC[tc0], 0
+	.csect .mpihelp_submul_1[PR]
+.mpihelp_submul_1:
+
+	cal	3,-4(3)
+	l	0,0(4)
+	cmpi	0,6,0
+	mtctr	5
+	mul	9,0,6
+	srai	7,0,31
+	and	7,7,6
+	mfmq	11
+	cax	9,9,7
+	l	7,4(3)
+	sf	8,11,7		# add res_limb
+	a	11,8,11 	# invert cy (r11 is junk)
+	blt	Lneg
+Lpos:	bdz	Lend
+
+Lploop: lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	mfmq	0
+	ae	11,0,9		# low limb + old_cy_limb + old cy
+	l	7,4(3)
+	aze	10,10		# propagate cy to new cy_limb
+	sf	8,11,7		# add res_limb
+	a	11,8,11 	# invert cy (r11 is junk)
+	bge	Lp0
+	cax	10,10,6 	# adjust high limb for negative limb from s1
+Lp0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	mfmq	0
+	ae	11,0,10
+	l	7,4(3)
+	aze	9,9
+	sf	8,11,7
+	a	11,8,11 	# invert cy (r11 is junk)
+	bge	Lp1
+	cax	9,9,6		# adjust high limb for negative limb from s1
+Lp1:	bdn	Lploop
+
+	b	Lend
+
+Lneg:	cax	9,9,0
+	bdz	Lend
+Lnloop: lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	mfmq	7
+	ae	11,7,9
+	l	7,4(3)
+	ae	10,10,0 	# propagate cy to new cy_limb
+	sf	8,11,7		# add res_limb
+	a	11,8,11 	# invert cy (r11 is junk)
+	bge	Ln0
+	cax	10,10,6 	# adjust high limb for negative limb from s1
+Ln0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	mfmq	7
+	ae	11,7,10
+	l	7,4(3)
+	ae	9,9,0		# propagate cy to new cy_limb
+	sf	8,11,7		# add res_limb
+	a	11,8,11 	# invert cy (r11 is junk)
+	bge	Ln1
+	cax	9,9,6		# adjust high limb for negative limb from s1
+Ln1:	bdn	Lnloop
+	b	Lend
+
+Lend0:	cal	9,0(10)
+Lend:	st	8,4(3)
+	aze	3,9
+	br
+
diff --git a/mpi/power/mpih-rshift.S b/mpi/power/mpih-rshift.S
new file mode 100644
index 000000000..e29645072
--- /dev/null
+++ b/mpi/power/mpih-rshift.S
@@ -0,0 +1,64 @@
+/* IBM POWER rshift
+ *
+ * Copyright (C) 1992, 1994, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*
+# INPUT PARAMETERS
+# res_ptr	r3
+# s_ptr 	r4
+# size		r5
+# cnt		r6
+*/
+
+	.toc
+	.extern mpihelp_rshift[DS]
+	.extern .mpihelp_rshift
+.csect [PR]
+	.align 2
+	.globl mpihelp_rshift
+	.globl .mpihelp_rshift
+	.csect mpihelp_rshift[DS]
+mpihelp_rshift:
+	.long .mpihelp_rshift, TOC[tc0], 0
+	.csect [PR]
+.mpihelp_rshift:
+	sfi	8,6,32
+	mtctr	5		# put limb count in CTR loop register
+	l	0,0(4)		# read least significant limb
+	ai	9,3,-4		# adjust res_ptr since it's offset in the stu:s
+	sle	3,0,8		# compute carry limb, and init MQ register
+	bdz	Lend2		# if just one limb, skip loop
+	lu	0,4(4)		# read 2:nd least significant limb
+	sleq	7,0,8		# compute least significant limb of result
+	bdz	Lend		# if just two limb, skip loop
+Loop:	lu	0,4(4)		# load next higher limb
+	stu	7,4(9)		# store previous result during read latency
+	sleq	7,0,8		# compute result limb
+	bdn	Loop		# loop back until CTR is zero
+Lend:	stu	7,4(9)		# store 2:nd most significant limb
+Lend2:	sre	7,0,6		# compute most significant limb
+	st	7,4(9)		# store it
+	br
+
+
diff --git a/mpi/power/mpih-sub1.S b/mpi/power/mpih-sub1.S
new file mode 100644
index 000000000..a3605533e
--- /dev/null
+++ b/mpi/power/mpih-sub1.S
@@ -0,0 +1,87 @@
+/* IBM POWER sub_n -- Subtract two limb vectors of equal, non-zero length.
+ *
+ * Copyright (C) 1992, 1994, 1995, 1996, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+/*
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# s2_ptr	r5
+# size		r6
+ */
+
+	.toc
+	.extern mpihelp_sub_n[DS]
+	.extern .mpihelp_sub_n
+.csect [PR]
+	.align 2
+	.globl mpihelp_sub_n
+	.globl .mpihelp_sub_n
+	.csect mpihelp_sub_n[DS]
+mpihelp_sub_n:
+	.long .mpihelp_sub_n, TOC[tc0], 0
+	.csect [PR]
+.mpihelp_sub_n:
+	andil.	10,6,1		# odd or even number of limbs?
+	l	8,0(4)		# load least significant s1 limb
+	l	0,0(5)		# load least significant s2 limb
+	cal	3,-4(3) 	# offset res_ptr, it's updated before it's used
+	sri	10,6,1		# count for unrolled loop
+	sf	7,0,8		# subtract least significant limbs, set cy
+	mtctr	10		# copy count into CTR
+	beq	0,Leven 	# branch if even # of limbs (# of limbs >= 2)
+
+# We have an odd # of limbs.  Add the first limbs separately.
+	cmpi	1,10,0		# is count for unrolled loop zero?
+	bne	1,L1		# branch if not
+	st	7,4(3)
+	sfe	3,0,0		# load !cy into ...
+	sfi	3,3,0		# ... return value register
+	br			# return
+
+# We added least significant limbs.  Now reload the next limbs to enter loop.
+L1:	lu	8,4(4)		# load s1 limb and update s1_ptr
+	lu	0,4(5)		# load s2 limb and update s2_ptr
+	stu	7,4(3)
+	sfe	7,0,8		# subtract limbs, set cy
+Leven:	lu	9,4(4)		# load s1 limb and update s1_ptr
+	lu	10,4(5) 	# load s2 limb and update s2_ptr
+	bdz	Lend		# If done, skip loop
+
+Loop:	lu	8,4(4)		# load s1 limb and update s1_ptr
+	lu	0,4(5)		# load s2 limb and update s2_ptr
+	sfe	11,10,9 	# subtract previous limbs with cy, set cy
+	stu	7,4(3)		#
+	lu	9,4(4)		# load s1 limb and update s1_ptr
+	lu	10,4(5) 	# load s2 limb and update s2_ptr
+	sfe	7,0,8		# subtract previous limbs with cy, set cy
+	stu	11,4(3) 	#
+	bdn	Loop		# decrement CTR and loop back
+
+Lend:	sfe	11,10,9 	# subtract limbs with cy, set cy
+	st	7,4(3)		#
+	st	11,8(3) 	#
+	sfe	3,0,0		# load !cy into ...
+	sfi	3,3,0		# ... return value register
+	br
+
author	Werner Koch <[email protected]>	2000-07-17 14:32:21 +0000
committer	Werner Koch <[email protected]>	2000-07-17 14:32:21 +0000
commit	0bf44b072ca648336bca9cf1ec24ea9d257cea9a (patch)
tree	92df532ce40c43627283e16bb5a06f39bd3a466c /mpi/power
parent	See ChangeLog: Fri Jul 14 19:38:23 CEST 2000 Werner Koch (diff)
download	gnupg-0bf44b072ca648336bca9cf1ec24ea9d257cea9a.tar.gz gnupg-0bf44b072ca648336bca9cf1ec24ea9d257cea9a.zip