diff options
Diffstat (limited to '')
-rw-r--r-- | mpi/hppa/README | 84 | ||||
-rw-r--r-- | mpi/hppa/distfiles | 7 | ||||
-rw-r--r-- | mpi/hppa/mpih-add1.S | 70 | ||||
-rw-r--r-- | mpi/hppa/mpih-lshift.S | 72 | ||||
-rw-r--r-- | mpi/hppa/mpih-rshift.S | 68 | ||||
-rw-r--r-- | mpi/hppa/mpih-sub1.S | 77 | ||||
-rw-r--r-- | mpi/hppa/udiv-qrnnd.S | 297 | ||||
-rw-r--r-- | mpi/hppa1.1/distfiles | 5 | ||||
-rw-r--r-- | mpi/hppa1.1/mpih-mul1.S | 112 | ||||
-rw-r--r-- | mpi/hppa1.1/mpih-mul2.S | 114 | ||||
-rw-r--r-- | mpi/hppa1.1/mpih-mul3.S | 124 | ||||
-rw-r--r-- | mpi/hppa1.1/udiv-qrnnd.S | 88 |
12 files changed, 0 insertions, 1118 deletions
diff --git a/mpi/hppa/README b/mpi/hppa/README deleted file mode 100644 index 5a2d5fd97..000000000 --- a/mpi/hppa/README +++ /dev/null @@ -1,84 +0,0 @@ -This directory contains mpn functions for various HP PA-RISC chips. Code -that runs faster on the PA7100 and later implementations, is in the pa7100 -directory. - -RELEVANT OPTIMIZATION ISSUES - - Load and Store timing - -On the PA7000 no memory instructions can issue the two cycles after a store. -For the PA7100, this is reduced to one cycle. - -The PA7100 has a lookup-free cache, so it helps to schedule loads and the -dependent instruction really far from each other. - -STATUS - -1. mpn_mul_1 could be improved to 6.5 cycles/limb on the PA7100, using the - instructions bwlow (but some sw pipelining is needed to avoid the - xmpyu-fstds delay): - - fldds s1_ptr - - xmpyu - fstds N(%r30) - xmpyu - fstds N(%r30) - - ldws N(%r30) - ldws N(%r30) - ldws N(%r30) - ldws N(%r30) - - addc - stws res_ptr - addc - stws res_ptr - - addib Loop - -2. mpn_addmul_1 could be improved from the current 10 to 7.5 cycles/limb - (asymptotically) on the PA7100, using the instructions below. With proper - sw pipelining and the unrolling level below, the speed becomes 8 - cycles/limb. - - fldds s1_ptr - fldds s1_ptr - - xmpyu - fstds N(%r30) - xmpyu - fstds N(%r30) - xmpyu - fstds N(%r30) - xmpyu - fstds N(%r30) - - ldws N(%r30) - ldws N(%r30) - ldws N(%r30) - ldws N(%r30) - ldws N(%r30) - ldws N(%r30) - ldws N(%r30) - ldws N(%r30) - addc - addc - addc - addc - addc %r0,%r0,cy-limb - - ldws res_ptr - ldws res_ptr - ldws res_ptr - ldws res_ptr - add - stws res_ptr - addc - stws res_ptr - addc - stws res_ptr - addc - stws res_ptr - - addib diff --git a/mpi/hppa/distfiles b/mpi/hppa/distfiles deleted file mode 100644 index 7f24205d3..000000000 --- a/mpi/hppa/distfiles +++ /dev/null @@ -1,7 +0,0 @@ -README -udiv-qrnnd.S -mpih-add1.S -mpih-sub1.S -mpih-lshift.S -mpih-rshift.S - diff --git a/mpi/hppa/mpih-add1.S b/mpi/hppa/mpih-add1.S deleted file mode 100644 index e01c2eed0..000000000 --- a/mpi/hppa/mpih-add1.S +++ /dev/null @@ -1,70 +0,0 @@ -/* hppa add_n -- Add two limb vectors of the same length > 0 and store - * sum in a third limb vector. - * - * Copyright (C) 1992, 1994 Free Software Foundation, Inc. - * Copyright (C) 1998 Free Software Foundation, Inc. - * - * This file is part of GNUPG. - * - * GNUPG is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GNUPG is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - * The GNU MP Library itself is published under the LGPL; - * however I decided to publish this code under the plain GPL. - */ - - - -/******************* - * mpi_limb_t - * mpihelp_add_n( mpi_ptr_t res_ptr, (gr26) - * mpi_ptr_t s1_ptr, (gr25) - * mpi_ptr_t s2_ptr, (gr24) - * mpi_size_t size) (gr23) - * - * One might want to unroll this as for other processors, but it turns - * out that the data cache contention after a store makes such - * unrolling useless. We can't come under 5 cycles/limb anyway. - */ - - .code - .export mpihelp_add_n -mpihelp_add_n - .proc - .callinfo frame=0,no_calls - .entry - - ldws,ma 4(0,%r25),%r20 - ldws,ma 4(0,%r24),%r19 - - addib,= -1,%r23,L$end ; check for (SIZE == 1) - add %r20,%r19,%r28 ; add first limbs ignoring cy - -L$loop ldws,ma 4(0,%r25),%r20 - ldws,ma 4(0,%r24),%r19 - stws,ma %r28,4(0,%r26) - addib,<> -1,%r23,L$loop - addc %r20,%r19,%r28 - -L$end stws %r28,0(0,%r26) - bv 0(%r2) - addc %r0,%r0,%r28 - - .exit - .procend diff --git a/mpi/hppa/mpih-lshift.S b/mpi/hppa/mpih-lshift.S deleted file mode 100644 index ada09f595..000000000 --- a/mpi/hppa/mpih-lshift.S +++ /dev/null @@ -1,72 +0,0 @@ -/* hppa lshift -* - * Copyright (C) 1992, 1994, 1998 Free Software Foundation, Inc. - * - * This file is part of GNUPG. - * - * GNUPG is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GNUPG is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - - - -/******************* - * mpi_limb_t - * mpihelp_lshift( mpi_ptr_t wp, (gr26) - * mpi_ptr_t up, (gr25) - * mpi_size_t usize, (gr24) - * unsigned cnt) (gr23) - */ - - .code - .export mpihelp_lshift -mpihelp_lshift - .proc - .callinfo frame=64,no_calls - .entry - - sh2add %r24,%r25,%r25 - sh2add %r24,%r26,%r26 - ldws,mb -4(0,%r25),%r22 - subi 32,%r23,%r1 - mtsar %r1 - addib,= -1,%r24,L$0004 - vshd %r0,%r22,%r28 ; compute carry out limb - ldws,mb -4(0,%r25),%r29 - addib,= -1,%r24,L$0002 - vshd %r22,%r29,%r20 - -L$loop ldws,mb -4(0,%r25),%r22 - stws,mb %r20,-4(0,%r26) - addib,= -1,%r24,L$0003 - vshd %r29,%r22,%r20 - ldws,mb -4(0,%r25),%r29 - stws,mb %r20,-4(0,%r26) - addib,<> -1,%r24,L$loop - vshd %r22,%r29,%r20 - -L$0002 stws,mb %r20,-4(0,%r26) - vshd %r29,%r0,%r20 - bv 0(%r2) - stw %r20,-4(0,%r26) -L$0003 stws,mb %r20,-4(0,%r26) -L$0004 vshd %r22,%r0,%r20 - bv 0(%r2) - stw %r20,-4(0,%r26) - - .exit - .procend - - - diff --git a/mpi/hppa/mpih-rshift.S b/mpi/hppa/mpih-rshift.S deleted file mode 100644 index 0299d2e27..000000000 --- a/mpi/hppa/mpih-rshift.S +++ /dev/null @@ -1,68 +0,0 @@ -/* hppa rshift -* - * Copyright (C) 1992, 1994, 1998 Free Software Foundation, Inc. - * - * This file is part of GNUPG. - * - * GNUPG is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GNUPG is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - - - - -/******************* - * mpi_limb_t - * mpihelp_rshift( mpi_ptr_t wp, (gr26) - * mpi_ptr_t up, (gr25) - * mpi_size_t usize, (gr24) - * unsigned cnt) (gr23) - */ - - .code - .export mpihelp_rshift -mpihelp_rshift - .proc - .callinfo frame=64,no_calls - .entry - - ldws,ma 4(0,%r25),%r22 - mtsar %r23 - addib,= -1,%r24,L$r004 - vshd %r22,%r0,%r28 ; compute carry out limb - ldws,ma 4(0,%r25),%r29 - addib,= -1,%r24,L$r002 - vshd %r29,%r22,%r20 - -L$roop ldws,ma 4(0,%r25),%r22 - stws,ma %r20,4(0,%r26) - addib,= -1,%r24,L$r003 - vshd %r22,%r29,%r20 - ldws,ma 4(0,%r25),%r29 - stws,ma %r20,4(0,%r26) - addib,<> -1,%r24,L$roop - vshd %r29,%r22,%r20 - -L$r002 stws,ma %r20,4(0,%r26) - vshd %r0,%r29,%r20 - bv 0(%r2) - stw %r20,0(0,%r26) -L$r003 stws,ma %r20,4(0,%r26) -L$r004 vshd %r0,%r22,%r20 - bv 0(%r2) - stw %r20,0(0,%r26) - - .exit - .procend - diff --git a/mpi/hppa/mpih-sub1.S b/mpi/hppa/mpih-sub1.S deleted file mode 100644 index 8672e3145..000000000 --- a/mpi/hppa/mpih-sub1.S +++ /dev/null @@ -1,77 +0,0 @@ -/* hppa sub_n -- Sub two limb vectors of the same length > 0 and store - * sum in a third limb vector. - * Copyright (C) 1992, 1994 Free Software Foundation, Inc. - * Copyright (C) 1998 Free Software Foundation, Inc. - * - * This file is part of GNUPG. - * - * GNUPG is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GNUPG is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - * The GNU MP Library itself is published under the LGPL; - * however I decided to publish this code under the plain GPL. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - -/******************* - * mpi_limb_t - * mpihelp_sub_n( mpi_ptr_t res_ptr, (gr26) - * mpi_ptr_t s1_ptr, (gr25) - * mpi_ptr_t s2_ptr, (gr24) - * mpi_size_t size) (gr23) - * - * One might want to unroll this as for other processors, but it turns - * out that the data cache contention after a store makes such - * unrolling useless. We can't come under 5 cycles/limb anyway. - */ - - - .code - .export mpihelp_sub_n -mpihelp_sub_n - .proc - .callinfo frame=0,no_calls - .entry - - ldws,ma 4(0,%r25),%r20 - ldws,ma 4(0,%r24),%r19 - - addib,= -1,%r23,L$end ; check for (SIZE == 1) - sub %r20,%r19,%r28 ; subtract first limbs ignoring cy - -L$loop ldws,ma 4(0,%r25),%r20 - ldws,ma 4(0,%r24),%r19 - stws,ma %r28,4(0,%r26) - addib,<> -1,%r23,L$loop - subb %r20,%r19,%r28 - -L$end stws %r28,0(0,%r26) - addc %r0,%r0,%r28 - bv 0(%r2) - subi 1,%r28,%r28 - - .exit - .procend - - - diff --git a/mpi/hppa/udiv-qrnnd.S b/mpi/hppa/udiv-qrnnd.S deleted file mode 100644 index 849238349..000000000 --- a/mpi/hppa/udiv-qrnnd.S +++ /dev/null @@ -1,297 +0,0 @@ -/* HP-PA __udiv_qrnnd division support, used from longlong.h. - * This version runs fast on pre-PA7000 CPUs. - * - * Copyright (C) 1993, 1994 Free Software Foundation, Inc. - * Copyright (c) 1997 by Werner Koch (dd9jn) - * - * This file is part of G10. - * - * G10 is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * G10 is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - * The GNU MP Library itself is published under the LGPL; - * however I decided to publish this code under the plain GPL. - */ - - - -/* INPUT PARAMETERS - * rem_ptr gr26 - * n1 gr25 - * n0 gr24 - * d gr23 - * - * The code size is a bit excessive. We could merge the last two ds;addc - * sequences by simply moving the "bb,< Odd" instruction down. The only - * trouble is the FFFFFFFF code that would need some hacking. - */ - - .code - .export __udiv_qrnnd -__udiv_qrnnd - .proc - .callinfo frame=0,no_calls - .entry - - comb,< %r23,0,L$largedivisor - sub %r0,%r23,%r1 ; clear cy as side-effect - ds %r0,%r1,%r0 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r24 - ds %r25,%r23,%r25 - addc %r24,%r24,%r28 - ds %r25,%r23,%r25 - comclr,>= %r25,%r0,%r0 - addl %r25,%r23,%r25 - stws %r25,0(0,%r26) - bv 0(%r2) - addc %r28,%r28,%r28 - -L$largedivisor - extru %r24,31,1,%r19 ; r19 = n0 & 1 - bb,< %r23,31,L$odd - extru %r23,30,31,%r22 ; r22 = d >> 1 - shd %r25,%r24,1,%r24 ; r24 = new n0 - extru %r25,30,31,%r25 ; r25 = new n1 - sub %r0,%r22,%r21 - ds %r0,%r21,%r0 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - comclr,>= %r25,%r0,%r0 - addl %r25,%r22,%r25 - sh1addl %r25,%r19,%r25 - stws %r25,0(0,%r26) - bv 0(%r2) - addc %r24,%r24,%r28 - -L$odd addib,sv,n 1,%r22,L$FF.. ; r22 = (d / 2 + 1) - shd %r25,%r24,1,%r24 ; r24 = new n0 - extru %r25,30,31,%r25 ; r25 = new n1 - sub %r0,%r22,%r21 - ds %r0,%r21,%r0 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r24 - ds %r25,%r22,%r25 - addc %r24,%r24,%r28 - comclr,>= %r25,%r0,%r0 - addl %r25,%r22,%r25 - sh1addl %r25,%r19,%r25 -; We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25 - add,nuv %r28,%r25,%r25 - addl %r25,%r1,%r25 - addc %r0,%r28,%r28 - sub,<< %r25,%r23,%r0 - addl %r25,%r1,%r25 - stws %r25,0(0,%r26) - bv 0(%r2) - addc %r0,%r28,%r28 - -; This is just a special case of the code above. -; We come here when d == 0xFFFFFFFF -L$FF.. add,uv %r25,%r24,%r24 - sub,<< %r24,%r23,%r0 - ldo 1(%r24),%r24 - stws %r24,0(0,%r26) - bv 0(%r2) - addc %r0,%r25,%r28 - - .exit - .procend diff --git a/mpi/hppa1.1/distfiles b/mpi/hppa1.1/distfiles deleted file mode 100644 index d68227ac7..000000000 --- a/mpi/hppa1.1/distfiles +++ /dev/null @@ -1,5 +0,0 @@ -udiv-qrnnd.S -mpih-mul1.S -mpih-mul2.S -mpih-mul3.S - diff --git a/mpi/hppa1.1/mpih-mul1.S b/mpi/hppa1.1/mpih-mul1.S deleted file mode 100644 index 1f7377473..000000000 --- a/mpi/hppa1.1/mpih-mul1.S +++ /dev/null @@ -1,112 +0,0 @@ -/* hppa1.1 mul_1 -- Multiply a limb vector with a limb and store - * the result in a second limb vector. - * Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. - * Copyright (C) 1998 Free Software Foundation, Inc. - * - * This file is part of GNUPG. - * - * GNUPG is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GNUPG is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - * The GNU MP Library itself is published under the LGPL; - * however I decided to publish this code under the plain GPL. - */ - - -/******************* - * mpi_limb_t - * mpihelp_mul_1( mpi_ptr_t res_ptr, (r26) - * mpi_ptr_t s1_ptr, (r25) - * mpi_size_t s1_size, (r24) - * mpi_limb_t s2_limb) (r23) - * - * - * - * This runs at 9 cycles/limb on a PA7000. With the used instructions, it can - * not become faster due to data cache contention after a store. On the - * PA7100 it runs at 7 cycles/limb, and that can not be improved either, since - * only the xmpyu does not need the integer pipeline, so the only dual-issue - * we will get are addc+xmpyu. Unrolling would not help either CPU. - * - * We could use fldds to read two limbs at a time from the S1 array, and that - * could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and - * PA7100, respectively. We don't do that since it does not seem worth the - * (alignment) troubles... - * - * At least the PA7100 is rumored to be able to deal with cache-misses - * without stalling instruction issue. If this is true, and the cache is - * actually also lockup-free, we should use a deeper software pipeline, and - * load from S1 very early! (The loads and stores to -12(sp) will surely be - * in the cache.) - */ - - .code - .export mpihelp_mul_1 -mpihelp_mul_1 - .proc - .callinfo frame=64,no_calls - .entry - - ldo 64(%r30),%r30 - fldws,ma 4(%r25),%fr5 - stw %r23,-16(%r30) ; move s2_limb ... - addib,= -1,%r24,L$just_one_limb - fldws -16(%r30),%fr4 ; ... into fr4 - add %r0,%r0,%r0 ; clear carry - xmpyu %fr4,%fr5,%fr6 - fldws,ma 4(%r25),%fr7 - fstds %fr6,-16(%r30) - xmpyu %fr4,%fr7,%fr8 - ldw -12(%r30),%r19 ; least significant limb in product - ldw -16(%r30),%r28 - - fstds %fr8,-16(%r30) - addib,= -1,%r24,L$end - ldw -12(%r30),%r1 - -; Main loop -L$loop fldws,ma 4(%r25),%fr5 - stws,ma %r19,4(%r26) - addc %r28,%r1,%r19 - xmpyu %fr4,%fr5,%fr6 - ldw -16(%r30),%r28 - fstds %fr6,-16(%r30) - addib,<> -1,%r24,L$loop - ldw -12(%r30),%r1 - -L$end stws,ma %r19,4(%r26) - addc %r28,%r1,%r19 - ldw -16(%r30),%r28 - stws,ma %r19,4(%r26) - addc %r0,%r28,%r28 - bv 0(%r2) - ldo -64(%r30),%r30 - -L$just_one_limb - xmpyu %fr4,%fr5,%fr6 - fstds %fr6,-16(%r30) - ldw -16(%r30),%r28 - ldo -64(%r30),%r30 - bv 0(%r2) - fstws %fr6R,0(%r26) - - .exit - .procend - - diff --git a/mpi/hppa1.1/mpih-mul2.S b/mpi/hppa1.1/mpih-mul2.S deleted file mode 100644 index 89c025952..000000000 --- a/mpi/hppa1.1/mpih-mul2.S +++ /dev/null @@ -1,114 +0,0 @@ -/* hppa1.1 addmul_1 -- Multiply a limb vector with a limb and add - * the result to a second limb vector. - * Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. - * Copyright (C) 1998 Free Software Foundation, Inc. - * - * This file is part of GNUPG. - * - * GNUPG is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GNUPG is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - * The GNU MP Library itself is published under the LGPL; - * however I decided to publish this code under the plain GPL. - */ - - -/******************* - * mpi_limb_t - * mpihelp_addmul_1( mpi_ptr_t res_ptr, (r26) - * mpi_ptr_t s1_ptr, (r25) - * mpi_size_t s1_size, (r24) - * mpi_limb_t s2_limb) (r23) - * - * This runs at 11 cycles/limb on a PA7000. With the used instructions, it - * can not become faster due to data cache contention after a store. On the - * PA7100 it runs at 10 cycles/limb, and that can not be improved either, - * since only the xmpyu does not need the integer pipeline, so the only - * dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb - * on the PA7100. - * - * There are some ideas described in mul1.S that applies to this code too. - */ - - .code - .export mpihelp_addmul_1 -mpihelp_addmul_1 - .proc - .callinfo frame=64,no_calls - .entry - - ldo 64(%r30),%r30 - fldws,ma 4(%r25),%fr5 - stw %r23,-16(%r30) ; move s2_limb ... - addib,= -1,%r24,L$just_one_limb - fldws -16(%r30),%fr4 ; ... into fr4 - add %r0,%r0,%r0 ; clear carry - xmpyu %fr4,%fr5,%fr6 - fldws,ma 4(%r25),%fr7 - fstds %fr6,-16(%r30) - xmpyu %fr4,%fr7,%fr8 - ldw -12(%r30),%r19 ; least significant limb in product - ldw -16(%r30),%r28 - - fstds %fr8,-16(%r30) - addib,= -1,%r24,L$end - ldw -12(%r30),%r1 - -; Main loop -L$loop ldws 0(%r26),%r29 - fldws,ma 4(%r25),%fr5 - add %r29,%r19,%r19 - stws,ma %r19,4(%r26) - addc %r28,%r1,%r19 - xmpyu %fr4,%fr5,%fr6 - ldw -16(%r30),%r28 - fstds %fr6,-16(%r30) - addc %r0,%r28,%r28 - addib,<> -1,%r24,L$loop - ldw -12(%r30),%r1 - -L$end ldw 0(%r26),%r29 - add %r29,%r19,%r19 - stws,ma %r19,4(%r26) - addc %r28,%r1,%r19 - ldw -16(%r30),%r28 - ldws 0(%r26),%r29 - addc %r0,%r28,%r28 - add %r29,%r19,%r19 - stws,ma %r19,4(%r26) - addc %r0,%r28,%r28 - bv 0(%r2) - ldo -64(%r30),%r30 - -L$just_one_limb - xmpyu %fr4,%fr5,%fr6 - ldw 0(%r26),%r29 - fstds %fr6,-16(%r30) - ldw -12(%r30),%r1 - ldw -16(%r30),%r28 - add %r29,%r1,%r19 - stw %r19,0(%r26) - addc %r0,%r28,%r28 - bv 0(%r2) - ldo -64(%r30),%r30 - - .exit - .procend - - diff --git a/mpi/hppa1.1/mpih-mul3.S b/mpi/hppa1.1/mpih-mul3.S deleted file mode 100644 index 80868a3ef..000000000 --- a/mpi/hppa1.1/mpih-mul3.S +++ /dev/null @@ -1,124 +0,0 @@ -/* hppa1.1 submul_1 -- Multiply a limb vector with a limb and add - * the result to a second limb vector. - * Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. - * Copyright (C) 1998 Free Software Foundation, Inc. - * - * This file is part of GNUPG. - * - * GNUPG is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GNUPG is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - * The GNU MP Library itself is published under the LGPL; - * however I decided to publish this code under the plain GPL. - */ - - -/******************* - * mpi_limb_t - * mpihelp_submul_1( mpi_ptr_t res_ptr, (r26) - * mpi_ptr_t s1_ptr, (r25) - * mpi_size_t s1_size, (r24) - * mpi_limb_t s2_limb) (r23) - * - * - * This runs at 12 cycles/limb on a PA7000. With the used instructions, it - * can not become faster due to data cache contention after a store. On the - * PA7100 it runs at 11 cycles/limb, and that can not be improved either, - * since only the xmpyu does not need the integer pipeline, so the only - * dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb - * on the PA7100. - * - * There are some ideas described in mul1.S that applies to this code too. - * - * It seems possible to make this run as fast as addmul_1, if we use - * sub,>>= %r29,%r19,%r22 - * addi 1,%r28,%r28 - * but that requires reworking the hairy software pipeline... - */ - - - .code - .export mpihelp_submul_1 -mpihelp_submul_1 - .proc - .callinfo frame=64,no_calls - .entry - - ldo 64(%r30),%r30 - fldws,ma 4(%r25),%fr5 - stw %r23,-16(%r30) ; move s2_limb ... - addib,= -1,%r24,L$just_one_limb - fldws -16(%r30),%fr4 ; ... into fr4 - add %r0,%r0,%r0 ; clear carry - xmpyu %fr4,%fr5,%fr6 - fldws,ma 4(%r25),%fr7 - fstds %fr6,-16(%r30) - xmpyu %fr4,%fr7,%fr8 - ldw -12(%r30),%r19 ; least significant limb in product - ldw -16(%r30),%r28 - - fstds %fr8,-16(%r30) - addib,= -1,%r24,L$end - ldw -12(%r30),%r1 - -; Main loop -L$loop ldws 0(%r26),%r29 - fldws,ma 4(%r25),%fr5 - sub %r29,%r19,%r22 - add %r22,%r19,%r0 - stws,ma %r22,4(%r26) - addc %r28,%r1,%r19 - xmpyu %fr4,%fr5,%fr6 - ldw -16(%r30),%r28 - fstds %fr6,-16(%r30) - addc %r0,%r28,%r28 - addib,<> -1,%r24,L$loop - ldw -12(%r30),%r1 - -L$end ldw 0(%r26),%r29 - sub %r29,%r19,%r22 - add %r22,%r19,%r0 - stws,ma %r22,4(%r26) - addc %r28,%r1,%r19 - ldw -16(%r30),%r28 - ldws 0(%r26),%r29 - addc %r0,%r28,%r28 - sub %r29,%r19,%r22 - add %r22,%r19,%r0 - stws,ma %r22,4(%r26) - addc %r0,%r28,%r28 - bv 0(%r2) - ldo -64(%r30),%r30 - -L$just_one_limb - xmpyu %fr4,%fr5,%fr6 - ldw 0(%r26),%r29 - fstds %fr6,-16(%r30) - ldw -12(%r30),%r1 - ldw -16(%r30),%r28 - sub %r29,%r1,%r22 - add %r22,%r1,%r0 - stw %r22,0(%r26) - addc %r0,%r28,%r28 - bv 0(%r2) - ldo -64(%r30),%r30 - - .exit - .procend - diff --git a/mpi/hppa1.1/udiv-qrnnd.S b/mpi/hppa1.1/udiv-qrnnd.S deleted file mode 100644 index b48eee49a..000000000 --- a/mpi/hppa1.1/udiv-qrnnd.S +++ /dev/null @@ -1,88 +0,0 @@ -/* HP-PA __udiv_qrnnd division support, used from longlong.h. - * This version runs fast on PA 7000 and later. - * - * Copyright (C) 1993, 1994 Free Software Foundation, Inc. - * Copyright (C) 1998 Free Software Foundation, Inc. - * - * This file is part of GNUPG. - * - * GNUPG is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GNUPG is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - * The GNU MP Library itself is published under the LGPL; - * however I decided to publish this code under the plain GPL. - */ - - - -/* INPUT PARAMETERS - * rem_ptr gr26 - * n1 gr25 - * n0 gr24 - * d gr23 - */ - - .code -L$0000 .word 0x43f00000 - .word 0x0 - .export __udiv_qrnnd -__udiv_qrnnd - .proc - .callinfo frame=64,no_calls - .entry - ldo 64(%r30),%r30 - - stws %r25,-16(0,%r30) ; n_hi - stws %r24,-12(0,%r30) ; n_lo - ldil L'L$0000,%r19 ; ' - ldo R'L$0000(%r19),%r19 ; ' - fldds -16(0,%r30),%fr5 - stws %r23,-12(0,%r30) - comib,<= 0,%r25,L$1 - fcnvxf,dbl,dbl %fr5,%fr5 - fldds 0(0,%r19),%fr4 - fadd,dbl %fr4,%fr5,%fr5 -L$1 - fcpy,sgl %fr0,%fr6L - fldws -12(0,%r30),%fr6R - fcnvxf,dbl,dbl %fr6,%fr4 - - fdiv,dbl %fr5,%fr4,%fr5 - - fcnvfx,dbl,dbl %fr5,%fr4 - fstws %fr4R,-16(%r30) - xmpyu %fr4R,%fr6R,%fr6 - ldws -16(%r30),%r28 - fstds %fr6,-16(0,%r30) - ldws -12(0,%r30),%r21 - ldws -16(0,%r30),%r20 - sub %r24,%r21,%r22 - subb %r25,%r20,%r19 - comib,= 0,%r19,L$2 - ldo -64(%r30),%r30 - - add %r22,%r23,%r22 - ldo -1(%r28),%r28 -L$2 bv 0(%r2) - stws %r22,0(0,%r26) - - .exit - .procend - - |