pqc/external/flint-2.4.3/doc/longlong.txt

/*=============================================================================

    longlong.h -- 64 bit arithetic

    This file is free software; you can redistribute it and/or modify it 
    under the terms of the GNU Lesser General Public License as published 
    by the Free Software Foundation; either version 2.1 of the License, or 
    (at your option) any later version.

    This file is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
    License for more details.

    You should have received a copy of the GNU Lesser General Public 
    License along with this file; see the file COPYING.LIB.  If not, write 
    to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 
    Boston, MA 02110-1301, USA

=============================================================================*/
/******************************************************************************

    Copyright (C) 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 
                  2002, 2003, 2004, 2005 Free Software Foundation, Inc.
    Copyright (C) 2009 William Hart
    Copyright (C) 2011 Fredrik Johansson

******************************************************************************/

*******************************************************************************

    Auxiliary asm macros

*******************************************************************************

umul_ppmm(high_prod, low_prod, multipler, multiplicand) 

    Multiplies two single limb integers \code{MULTIPLER} and 
    \code{MULTIPLICAND}, and generates a two limb product in 
    \code{HIGH_PROD} and \code{LOW_PROD}.

smul_ppmm(high_prod, low_prod, multipler, multiplicand) 

    As for \code{umul_ppmm()} but the numbers are signed.

udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, denominator) 

    Divides an unsigned integer, composed by the limb integers 
    \code{HIGH_NUMERATOR} and\\ \code{LOW_NUMERATOR}, by \code{DENOMINATOR} 
    and places the quotient in \code{QUOTIENT} and the remainder in 
    \code{REMAINDER}.  \code{HIGH_NUMERATOR} must be less than 
    \code{DENOMINATOR} for correct operation. 

sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, denominator)  

    As for \code{udiv_qrnnd()} but the numbers are signed.  The quotient is 
    rounded towards $0$. Note that as the quotient is signed it must lie in 
    the range $[-2^63, 2^63)$.

count_leading_zeros(count, x) 

    Counts the number of zero-bits from the msb to the first non-zero bit 
    in the limb \code{x}.  This is the number of steps \code{x} needs to 
    be shifted left to set the msb. If \code{x} is $0$ then count is 
    undefined.

count_trailing_zeros(count, x) 

    As for \code{count_leading_zeros()}, but counts from the least 
    significant end. If \code{x} is zero then count is undefined.

add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, 
                                               high_addend_2, low_addend_2) 

    Adds two limb integers, composed by \code{HIGH_ADDEND_1} and 
    \code{LOW_ADDEND_1}, and\\ \code{HIGH_ADDEND_2} and \code{LOW_ADDEND_2}, 
    respectively.  The result is placed in \code{HIGH_SUM} and 
    \code{LOW_SUM}.  Overflow, i.e.\ carry out, is not stored anywhere, 
    and is lost.

add_sssaaaaaa(high_sum, mid_sum, low_sum, high_addend_1, mid_addend_1,
            low_addend_1, high_addend_2, mid_addend_2, low_addend_2)

    Adds two three limb integers. Carry out is lost.

sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
                                            high_subtrahend, low_subtrahend) 

    Subtracts two limb integers, composed by \code{HIGH_MINUEND_1} and 
    \code{LOW_MINUEND_1}, and \code{HIGH_SUBTRAHEND_2} and 
    \code{LOW_SUBTRAHEND_2}, respectively.  The result is placed in\\ 
    \code{HIGH_DIFFERENCE} and \code{LOW_DIFFERENCE}.  Overflow, i.e.\ 
    carry out is not stored anywhere, and is lost.

invert_limb(invxl, xl)

    Computes an approximate inverse \code{invxl} of the limb \code{xl}, 
    with an implicit leading~$1$. More formally it computes
    \begin{lstlisting}[language=c]
    invxl = (B^2 - B*x - 1)/x = (B^2 - 1)/x - B
    \end{lstlisting}

    Note that $x$ must be normalised, i.e.\ with msb set. This inverse 
    makes use of the following theorem of Torbjorn Granlund and Peter 
    Montgomery~\citep[Lemma~8.1]{GraMon1994}:

    Let $d$ be normalised, $d < B$, i.e.\ it fits in a word, and suppose 
    that $m d < B^2 \leq (m+1) d$. Let $0 \leq n \leq B d - 1$.  Write 
    $n = n_2 B + n_1 B/2 + n_0$ with $n_1 = 0$ or $1$ and $n_0 < B/2$. 
    Suppose $q_1 B + q_0 = n_2 B + (n_2 + n_1) (m - B) + n_1 (d-B/2) + n_0$
    and $0 \leq q_0 < B$. Then $0 \leq q_1 < B$ and $0 \leq n - q_1 d < 2 d$.

    In the theorem, $m$ is the inverse of $d$. If we let 
    \code{m = invxl + B} and $d = x$ we have $m d = B^2 - 1 < B^2$ and 
    $(m+1) x = B^2 + d - 1 \geq B^2$.

    The theorem is often applied as follows: note that $n_0$ and $n_1 (d-B/2)$ 
    are both less than $B/2$. Also note that $n_1 (m-B) < B$. Thus the sum of 
    all these terms contributes at most $1$ to $q_1$. We are left with 
    $n_2 B + n_2 (m-B)$. But note that $(m-B)$ is precisely our precomputed 
    inverse \code{invxl}. If we write $q_1 B + q_0 = n_2 B + n_2 (m-B)$, 
    then from the theorem, we have $0 \leq n - q_1 d < 3 d$, i.e.\ the 
    quotient is out by at most $2$ and is always either correct or too small.

udiv_qrnnd_preinv(q, r, nh, nl, d, di)

    As for \code{udiv_qrnnd()} but takes a precomputed inverse \code{di} as 
    computed by \code{invert_limb()}. The algorithm, in terms of the theorem 
    above, is:

    \begin{lstlisting}[language=c]
    nadj = n1*(d-B/2) + n0
    xh, xl = (n2+n1)*(m-B)
    xh, xl += nadj + n2*B ( xh, xl = n2*B + (n2+n1)*(m-B) + n1*(d-B/2) + n0 )
    _q1 = B - xh - 1
    xh, xl = _q1*d + nh, nl - B*d = nh, nl - q1*d - d so that xh = 0 or -1
    r = xl + xh*d where xh is 0 if q1 is off by 1, otherwise -1
    q = xh - _q1 = xh + 1 + n2
    \end{lstlisting}
ALL: Add flint 2014-05-18 22:03:37 +00:00			`/*=============================================================================`

			`longlong.h -- 64 bit arithetic`

			`This file is free software; you can redistribute it and/or modify it`
			`under the terms of the GNU Lesser General Public License as published`
			`by the Free Software Foundation; either version 2.1 of the License, or`
			`(at your option) any later version.`

			`This file is distributed in the hope that it will be useful, but`
			`WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY`
			`or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public`
			`License for more details.`

			`You should have received a copy of the GNU Lesser General Public`
			`License along with this file; see the file COPYING.LIB. If not, write`
			`to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,`
			`Boston, MA 02110-1301, USA`

			`=============================================================================*/`
			`/******************************************************************************`

			`Copyright (C) 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001,`
			`2002, 2003, 2004, 2005 Free Software Foundation, Inc.`
			`Copyright (C) 2009 William Hart`
			`Copyright (C) 2011 Fredrik Johansson`

			`******************************************************************************/`

			`*******************************************************************************`

			`Auxiliary asm macros`

			`*******************************************************************************`

			`umul_ppmm(high_prod, low_prod, multipler, multiplicand)`

			`Multiplies two single limb integers \code{MULTIPLER} and`
			`\code{MULTIPLICAND}, and generates a two limb product in`
			`\code{HIGH_PROD} and \code{LOW_PROD}.`

			`smul_ppmm(high_prod, low_prod, multipler, multiplicand)`

			`As for \code{umul_ppmm()} but the numbers are signed.`

			`udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, denominator)`

			`Divides an unsigned integer, composed by the limb integers`
			`\code{HIGH_NUMERATOR} and\\ \code{LOW_NUMERATOR}, by \code{DENOMINATOR}`
			`and places the quotient in \code{QUOTIENT} and the remainder in`
			`\code{REMAINDER}. \code{HIGH_NUMERATOR} must be less than`
			`\code{DENOMINATOR} for correct operation.`

			`sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, denominator)`

			`As for \code{udiv_qrnnd()} but the numbers are signed. The quotient is`
			`rounded towards $0$. Note that as the quotient is signed it must lie in`
			`the range $[-2^63, 2^63)$.`

			`count_leading_zeros(count, x)`

			`Counts the number of zero-bits from the msb to the first non-zero bit`
			`in the limb \code{x}. This is the number of steps \code{x} needs to`
			`be shifted left to set the msb. If \code{x} is $0$ then count is`
			`undefined.`

			`count_trailing_zeros(count, x)`

			`As for \code{count_leading_zeros()}, but counts from the least`
			`significant end. If \code{x} is zero then count is undefined.`

			`add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,`
			`high_addend_2, low_addend_2)`

			`Adds two limb integers, composed by \code{HIGH_ADDEND_1} and`
			`\code{LOW_ADDEND_1}, and\\ \code{HIGH_ADDEND_2} and \code{LOW_ADDEND_2},`
			`respectively. The result is placed in \code{HIGH_SUM} and`
			`\code{LOW_SUM}. Overflow, i.e.\ carry out, is not stored anywhere,`
			`and is lost.`

			`add_sssaaaaaa(high_sum, mid_sum, low_sum, high_addend_1, mid_addend_1,`
			`low_addend_1, high_addend_2, mid_addend_2, low_addend_2)`

			`Adds two three limb integers. Carry out is lost.`

			`sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,`
			`high_subtrahend, low_subtrahend)`

			`Subtracts two limb integers, composed by \code{HIGH_MINUEND_1} and`
			`\code{LOW_MINUEND_1}, and \code{HIGH_SUBTRAHEND_2} and`
			`\code{LOW_SUBTRAHEND_2}, respectively. The result is placed in\\`
			`\code{HIGH_DIFFERENCE} and \code{LOW_DIFFERENCE}. Overflow, i.e.\`
			`carry out is not stored anywhere, and is lost.`

			`invert_limb(invxl, xl)`

			`Computes an approximate inverse \code{invxl} of the limb \code{xl},`
			`with an implicit leading~$1$. More formally it computes`
			`\begin{lstlisting}[language=c]`
			`invxl = (B^2 - B*x - 1)/x = (B^2 - 1)/x - B`
			`\end{lstlisting}`

			`Note that $x$ must be normalised, i.e.\ with msb set. This inverse`
			`makes use of the following theorem of Torbjorn Granlund and Peter`
			`Montgomery~\citep[Lemma~8.1]{GraMon1994}:`

			`Let $d$ be normalised, $d < B$, i.e.\ it fits in a word, and suppose`
			`that $m d < B^2 \leq (m+1) d$. Let $0 \leq n \leq B d - 1$. Write`
			`$n = n_2 B + n_1 B/2 + n_0$ with $n_1 = 0$ or $1$ and $n_0 < B/2$.`
			`Suppose $q_1 B + q_0 = n_2 B + (n_2 + n_1) (m - B) + n_1 (d-B/2) + n_0$`
			`and $0 \leq q_0 < B$. Then $0 \leq q_1 < B$ and $0 \leq n - q_1 d < 2 d$.`

			`In the theorem, $m$ is the inverse of $d$. If we let`
			`\code{m = invxl + B} and $d = x$ we have $m d = B^2 - 1 < B^2$ and`
			`$(m+1) x = B^2 + d - 1 \geq B^2$.`

			`The theorem is often applied as follows: note that $n_0$ and $n_1 (d-B/2)$`
			`are both less than $B/2$. Also note that $n_1 (m-B) < B$. Thus the sum of`
			`all these terms contributes at most $1$ to $q_1$. We are left with`
			`$n_2 B + n_2 (m-B)$. But note that $(m-B)$ is precisely our precomputed`
			`inverse \code{invxl}. If we write $q_1 B + q_0 = n_2 B + n_2 (m-B)$,`
			`then from the theorem, we have $0 \leq n - q_1 d < 3 d$, i.e.\ the`
			`quotient is out by at most $2$ and is always either correct or too small.`

			`udiv_qrnnd_preinv(q, r, nh, nl, d, di)`

			`As for \code{udiv_qrnnd()} but takes a precomputed inverse \code{di} as`
			`computed by \code{invert_limb()}. The algorithm, in terms of the theorem`
			`above, is:`

			`\begin{lstlisting}[language=c]`
			`nadj = n1*(d-B/2) + n0`
			`xh, xl = (n2+n1)*(m-B)`
			`xh, xl += nadj + n2B ( xh, xl = n2B + (n2+n1)(m-B) + n1(d-B/2) + n0 )`
			`_q1 = B - xh - 1`
			`xh, xl = _q1d + nh, nl - Bd = nh, nl - q1*d - d so that xh = 0 or -1`
			`r = xl + xh*d where xh is 0 if q1 is off by 1, otherwise -1`
			`q = xh - _q1 = xh + 1 + n2`
			`\end{lstlisting}`