pqc/external/flint-2.4.3/doc/longlong.txt

/*=============================================================================

    longlong.h -- 64 bit arithetic

    This file is free software; you can redistribute it and/or modify it
    under the terms of the GNU Lesser General Public License as published
    by the Free Software Foundation; either version 2.1 of the License, or
    (at your option) any later version.

    This file is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
    License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this file; see the file COPYING.LIB.  If not, write
    to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
    Boston, MA 02110-1301, USA

=============================================================================*/
/******************************************************************************

    Copyright (C) 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001,
                  2002, 2003, 2004, 2005 Free Software Foundation, Inc.
    Copyright (C) 2009 William Hart
    Copyright (C) 2011 Fredrik Johansson

******************************************************************************/

*******************************************************************************

    Auxiliary asm macros

*******************************************************************************

umul_ppmm(high_prod, low_prod, multipler, multiplicand)

    Multiplies two single limb integers \code{MULTIPLER} and
    \code{MULTIPLICAND}, and generates a two limb product in
    \code{HIGH_PROD} and \code{LOW_PROD}.

smul_ppmm(high_prod, low_prod, multipler, multiplicand)

    As for \code{umul_ppmm()} but the numbers are signed.

udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, denominator)

    Divides an unsigned integer, composed by the limb integers
    \code{HIGH_NUMERATOR} and\\ \code{LOW_NUMERATOR}, by \code{DENOMINATOR}
    and places the quotient in \code{QUOTIENT} and the remainder in
    \code{REMAINDER}.  \code{HIGH_NUMERATOR} must be less than
    \code{DENOMINATOR} for correct operation.

sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, denominator)

    As for \code{udiv_qrnnd()} but the numbers are signed.  The quotient is
    rounded towards $0$. Note that as the quotient is signed it must lie in
    the range $[-2^63, 2^63)$.

count_leading_zeros(count, x)

    Counts the number of zero-bits from the msb to the first non-zero bit
    in the limb \code{x}.  This is the number of steps \code{x} needs to
    be shifted left to set the msb. If \code{x} is $0$ then count is
    undefined.

count_trailing_zeros(count, x)

    As for \code{count_leading_zeros()}, but counts from the least
    significant end. If \code{x} is zero then count is undefined.

add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
                                               high_addend_2, low_addend_2)

    Adds two limb integers, composed by \code{HIGH_ADDEND_1} and
    \code{LOW_ADDEND_1}, and\\ \code{HIGH_ADDEND_2} and \code{LOW_ADDEND_2},
    respectively.  The result is placed in \code{HIGH_SUM} and
    \code{LOW_SUM}.  Overflow, i.e.\ carry out, is not stored anywhere,
    and is lost.

add_sssaaaaaa(high_sum, mid_sum, low_sum, high_addend_1, mid_addend_1,
            low_addend_1, high_addend_2, mid_addend_2, low_addend_2)

    Adds two three limb integers. Carry out is lost.

sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
                                            high_subtrahend, low_subtrahend)

    Subtracts two limb integers, composed by \code{HIGH_MINUEND_1} and
    \code{LOW_MINUEND_1}, and \code{HIGH_SUBTRAHEND_2} and
    \code{LOW_SUBTRAHEND_2}, respectively.  The result is placed in\\
    \code{HIGH_DIFFERENCE} and \code{LOW_DIFFERENCE}.  Overflow, i.e.\
    carry out is not stored anywhere, and is lost.

invert_limb(invxl, xl)

    Computes an approximate inverse \code{invxl} of the limb \code{xl},
    with an implicit leading~$1$. More formally it computes
    \begin{lstlisting}[language=c]
    invxl = (B^2 - B*x - 1)/x = (B^2 - 1)/x - B
    \end{lstlisting}

    Note that $x$ must be normalised, i.e.\ with msb set. This inverse
    makes use of the following theorem of Torbjorn Granlund and Peter
    Montgomery~\citep[Lemma~8.1]{GraMon1994}:

    Let $d$ be normalised, $d < B$, i.e.\ it fits in a word, and suppose
    that $m d < B^2 \leq (m+1) d$. Let $0 \leq n \leq B d - 1$.  Write
    $n = n_2 B + n_1 B/2 + n_0$ with $n_1 = 0$ or $1$ and $n_0 < B/2$.
    Suppose $q_1 B + q_0 = n_2 B + (n_2 + n_1) (m - B) + n_1 (d-B/2) + n_0$
    and $0 \leq q_0 < B$. Then $0 \leq q_1 < B$ and $0 \leq n - q_1 d < 2 d$.

    In the theorem, $m$ is the inverse of $d$. If we let
    \code{m = invxl + B} and $d = x$ we have $m d = B^2 - 1 < B^2$ and
    $(m+1) x = B^2 + d - 1 \geq B^2$.

    The theorem is often applied as follows: note that $n_0$ and $n_1 (d-B/2)$
    are both less than $B/2$. Also note that $n_1 (m-B) < B$. Thus the sum of
    all these terms contributes at most $1$ to $q_1$. We are left with
    $n_2 B + n_2 (m-B)$. But note that $(m-B)$ is precisely our precomputed
    inverse \code{invxl}. If we write $q_1 B + q_0 = n_2 B + n_2 (m-B)$,
    then from the theorem, we have $0 \leq n - q_1 d < 3 d$, i.e.\ the
    quotient is out by at most $2$ and is always either correct or too small.

udiv_qrnnd_preinv(q, r, nh, nl, d, di)

    As for \code{udiv_qrnnd()} but takes a precomputed inverse \code{di} as
    computed by \code{invert_limb()}. The algorithm, in terms of the theorem
    above, is:

    \begin{lstlisting}[language=c]
    nadj = n1*(d-B/2) + n0
    xh, xl = (n2+n1)*(m-B)
    xh, xl += nadj + n2*B ( xh, xl = n2*B + (n2+n1)*(m-B) + n1*(d-B/2) + n0 )
    _q1 = B - xh - 1
    xh, xl = _q1*d + nh, nl - B*d = nh, nl - q1*d - d so that xh = 0 or -1
    r = xl + xh*d where xh is 0 if q1 is off by 1, otherwise -1
    q = xh - _q1 = xh + 1 + n2
    \end{lstlisting}