/*============================================================================= This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA =============================================================================*/ /****************************************************************************** Copyright (C) 2010 William Hart ******************************************************************************/ #ifndef NMOD_VEC_H #define NMOD_VEC_H #undef ulong #define ulong ulongxx /* interferes with system includes */ #include #undef ulong #include #define ulong mp_limb_t #include "longlong.h" #include "ulong_extras.h" #include "flint.h" #ifdef __cplusplus extern "C" { #endif typedef struct { mp_limb_t n; mp_limb_t ninv; mp_bitcnt_t norm; } nmod_t; #define NMOD_VEC_NORM(vec, i) \ do { \ while ((i) && vec[(i) - 1] == UWORD(0)) \ (i)--; \ } while (0) #define NMOD_RED2(r, a_hi, a_lo, mod) \ do { \ mp_limb_t q0, q1, r1; \ const mp_limb_t u1 = ((a_hi)<<(mod).norm) + r_shift((a_lo), FLINT_BITS - (mod).norm); \ const mp_limb_t u0 = ((a_lo)<<(mod).norm); \ const mp_limb_t nxx = ((mod).n<<(mod).norm); \ umul_ppmm(q1, q0, (mod).ninv, u1); \ add_ssaaaa(q1, q0, q1, q0, u1, u0); \ r1 = (u0 - (q1 + 1)*nxx); \ if (r1 >= q0) r1 += nxx; \ if (r1 < nxx) r = (r1>>(mod).norm); \ else r = ((r1 - nxx)>>(mod).norm); \ } while (0) #define NMOD_RED(r, a, mod) \ do { \ NMOD_RED2(r, 0, a, mod); \ } while (0) #define NMOD2_RED2(r, a_hi, a_lo, mod) \ do { \ mp_limb_t v_hi; \ NMOD_RED(v_hi, a_hi, mod); \ NMOD_RED2(r, v_hi, a_lo, mod); \ } while (0) #define NMOD_RED3(r, a_hi, a_me, a_lo, mod) \ do { \ mp_limb_t v_hi; \ NMOD_RED2(v_hi, a_hi, a_me, mod); \ NMOD_RED2(r, v_hi, a_lo, mod); \ } while (0) #define NMOD_ADDMUL(r, a, b, mod) \ do { \ mp_limb_t a_hi, a_lo; \ umul_ppmm(a_hi, a_lo, a, b); \ add_ssaaaa(a_hi, a_lo, a_hi, a_lo, (mp_limb_t) 0, r); \ NMOD_RED2(r, a_hi, a_lo, mod); \ } while (0) static __inline__ mp_limb_t _nmod_add(mp_limb_t a, mp_limb_t b, nmod_t mod) { const mp_limb_t sum = a + b; return sum - mod.n + ((((mp_limb_signed_t)(sum - mod.n))>>(FLINT_BITS - 1)) & mod.n); } static __inline__ mp_limb_t _nmod_sub(mp_limb_t a, mp_limb_t b, nmod_t mod) { const mp_limb_t diff = a - b; return ((((mp_limb_signed_t)diff)>>(FLINT_BITS - 1)) & mod.n) + diff; } static __inline__ mp_limb_t nmod_add(mp_limb_t a, mp_limb_t b, nmod_t mod) { const mp_limb_t neg = mod.n - a; if (neg > b) return a + b; else return b - neg; } static __inline__ mp_limb_t nmod_sub(mp_limb_t a, mp_limb_t b, nmod_t mod) { const mp_limb_t diff = a - b; if (a < b) return mod.n + diff; else return diff; } static __inline__ mp_limb_t nmod_neg(mp_limb_t a, nmod_t mod) { if (a) return mod.n - a; else return 0; } static __inline__ mp_limb_t nmod_mul(mp_limb_t a, mp_limb_t b, nmod_t mod) { return n_mulmod2_preinv(a, b, mod.n, mod.ninv); } static __inline__ mp_limb_t nmod_inv(mp_limb_t a, nmod_t mod) { return n_invmod(a, mod.n); } static __inline__ mp_limb_t nmod_div(mp_limb_t a, mp_limb_t b, nmod_t mod) { b = n_invmod(b, mod.n); return n_mulmod2_preinv(a, b, mod.n, mod.ninv); } static __inline__ mp_limb_t nmod_pow_ui(mp_limb_t a, ulong exp, nmod_t mod) { return n_powmod2_ui_preinv(a, exp, mod.n, mod.ninv); } static __inline__ void nmod_init(nmod_t * mod, mp_limb_t n) { mod->n = n; mod->ninv = n_preinvert_limb(n); count_leading_zeros(mod->norm, n); } static __inline__ mp_ptr _nmod_vec_init(slong len) { return (mp_ptr) flint_malloc(len * sizeof(mp_limb_t)); } static __inline__ void _nmod_vec_clear(mp_ptr vec) { flint_free(vec); } void _nmod_vec_randtest(mp_ptr vec, flint_rand_t state, slong len, nmod_t mod); static __inline__ void _nmod_vec_zero(mp_ptr vec, slong len) { flint_mpn_zero(vec, len); } mp_bitcnt_t _nmod_vec_max_bits(mp_srcptr vec, slong len); static __inline__ void _nmod_vec_set(mp_ptr res, mp_srcptr vec, slong len) { flint_mpn_copyi(res, vec, len); } static __inline__ void _nmod_vec_swap(mp_ptr a, mp_ptr b, slong length) { slong i; for (i = 0; i < length; i++) { mp_limb_t t = a[i]; a[i] = b[i]; b[i] = t; } } static __inline__ int _nmod_vec_equal(mp_srcptr vec, mp_srcptr vec2, slong len) { slong i; for (i = 0; i < len; i++) if (vec[i] != vec2[i]) return 0; return 1; } static __inline__ int _nmod_vec_is_zero(mp_srcptr vec, slong len) { slong i; for (i = 0; i < len; i++) if (vec[i] != 0) return 0; return 1; } void _nmod_vec_reduce(mp_ptr res, mp_srcptr vec, slong len, nmod_t mod); void _nmod_vec_add(mp_ptr res, mp_srcptr vec1, mp_srcptr vec2, slong len, nmod_t mod); void _nmod_vec_sub(mp_ptr res, mp_srcptr vec1, mp_srcptr vec2, slong len, nmod_t mod); void _nmod_vec_neg(mp_ptr res, mp_srcptr vec, slong len, nmod_t mod); void _nmod_vec_scalar_mul_nmod(mp_ptr res, mp_srcptr vec, slong len, mp_limb_t c, nmod_t mod); void _nmod_vec_scalar_addmul_nmod(mp_ptr res, mp_srcptr vec, slong len, mp_limb_t c, nmod_t mod); int _nmod_vec_dot_bound_limbs(slong len, nmod_t mod); #define NMOD_VEC_DOT(res, i, len, expr1, expr2, mod, nlimbs) \ do \ { \ mp_limb_t s0, s1, s2, t0, t1; \ s0 = s1 = s2 = UWORD(0); \ switch (nlimbs) \ { \ case 1: \ for (i = 0; i < len; i++) \ { \ s0 += (expr1) * (expr2); \ } \ NMOD_RED(s0, s0, mod); \ break; \ case 2: \ if (mod.n <= (UWORD(1) << (FLINT_BITS / 2))) \ { \ for (i = 0; i < len; i++) \ { \ t0 = (expr1) * (expr2); \ add_ssaaaa(s1, s0, s1, s0, 0, t0); \ } \ } \ else \ { \ for (i = 0; i < len; i++) \ { \ umul_ppmm(t1, t0, (expr1), (expr2)); \ add_ssaaaa(s1, s0, s1, s0, t1, t0); \ } \ } \ NMOD2_RED2(s0, s1, s0, mod); \ break; \ default: \ for (i = 0; i < len; i++) \ { \ umul_ppmm(t1, t0, (expr1), (expr2)); \ add_sssaaaaaa(s2, s1, s0, s2, s1, s0, 0, t1, t0); \ } \ NMOD_RED(s2, s2, mod); \ NMOD_RED3(s0, s2, s1, s0, mod); \ break; \ } \ res = s0; \ } while (0); mp_limb_t _nmod_vec_dot(mp_srcptr vec1, mp_srcptr vec2, slong len, nmod_t mod, int nlimbs); mp_limb_t _nmod_vec_dot_ptr(mp_srcptr vec1, const mp_ptr * vec2, slong offset, slong len, nmod_t mod, int nlimbs); #ifdef __cplusplus } #endif #endif