/*============================================================================= This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA =============================================================================*/ /****************************************************************************** Copyright (C) 2011 Fredrik Johansson ******************************************************************************/ #include "fmpz_mat.h" static mp_limb_t find_good_prime_and_invert(nmod_mat_t Ainv, const fmpz_mat_t A, const fmpz_t det_bound) { mp_limb_t p; fmpz_t tested; p = UWORD(1) << NMOD_MAT_OPTIMAL_MODULUS_BITS; fmpz_init(tested); fmpz_one(tested); while (1) { p = n_nextprime(p, 0); _nmod_mat_set_mod(Ainv, p); fmpz_mat_get_nmod_mat(Ainv, A); if (nmod_mat_inv(Ainv, Ainv)) break; fmpz_mul_ui(tested, tested, p); if (fmpz_cmp(tested, det_bound) > 0) { p = 0; break; } } fmpz_clear(tested); return p; } /* We need to perform several matrix-vector products Ay, and speed them up by using modular multiplication (this is only faster if we precompute the modular matrices). Note: we assume that all primes are >= p. This allows reusing y_mod as the right-hand side without reducing it. */ #define USE_SLOW_MULTIPLICATION 0 mp_limb_t * get_crt_primes(slong * num_primes, const fmpz_mat_t A, mp_limb_t p) { fmpz_t bound, prod; mp_limb_t * primes; slong i, j; fmpz_init(bound); fmpz_init(prod); for (i = 0; i < A->r; i++) for (j = 0; j < A->c; j++) if (fmpz_cmpabs(bound, fmpz_mat_entry(A, i, j)) < 0) fmpz_abs(bound, fmpz_mat_entry(A, i, j)); fmpz_mul_ui(bound, bound, p - UWORD(1)); fmpz_mul_ui(bound, bound, A->r); fmpz_mul_ui(bound, bound, UWORD(2)); /* signs */ primes = flint_malloc(sizeof(mp_limb_t) * (fmpz_bits(bound) / (FLINT_BIT_COUNT(p) - 1) + 2)); primes[0] = p; fmpz_set_ui(prod, p); *num_primes = 1; while (fmpz_cmp(prod, bound) <= 0) { primes[*num_primes] = p = n_nextprime(p, 0); *num_primes += 1; fmpz_mul_ui(prod, prod, p); } fmpz_clear(bound); fmpz_clear(prod); return primes; } static void _fmpz_mat_solve_dixon(fmpz_mat_t X, fmpz_t mod, const fmpz_mat_t A, const fmpz_mat_t B, const nmod_mat_t Ainv, mp_limb_t p, const fmpz_t N, const fmpz_t D) { fmpz_t bound, ppow; fmpz_mat_t x, d, y, Ay; fmpz_t prod; mp_limb_t * crt_primes; nmod_mat_t * A_mod; nmod_mat_t Ay_mod, d_mod, y_mod; slong i, n, cols, num_primes; n = A->r; cols = B->c; fmpz_init(bound); fmpz_init(ppow); fmpz_init(prod); fmpz_mat_init(x, n, cols); fmpz_mat_init(y, n, cols); fmpz_mat_init(Ay, n, cols); fmpz_mat_init_set(d, B); /* Compute bound for the needed modulus. TODO: if one of N and D is much smaller than the other, we could use a tighter bound (i.e. 2ND). This would require the ability to forward N and D to the rational reconstruction routine. */ if (fmpz_cmpabs(N, D) < 0) fmpz_mul(bound, D, D); else fmpz_mul(bound, N, N); fmpz_mul_ui(bound, bound, UWORD(2)); /* signs */ crt_primes = get_crt_primes(&num_primes, A, p); A_mod = flint_malloc(sizeof(nmod_mat_t) * num_primes); for (i = 0; i < num_primes; i++) { nmod_mat_init(A_mod[i], n, n, crt_primes[i]); fmpz_mat_get_nmod_mat(A_mod[i], A); } nmod_mat_init(Ay_mod, n, cols, UWORD(1)); nmod_mat_init(d_mod, n, cols, p); nmod_mat_init(y_mod, n, cols, p); fmpz_one(ppow); while (fmpz_cmp(ppow, bound) <= 0) { /* y = A^(-1) * d (mod p) */ fmpz_mat_get_nmod_mat(d_mod, d); nmod_mat_mul(y_mod, Ainv, d_mod); /* x = x + y * p^i [= A^(-1) * b mod p^(i+1)] */ fmpz_mat_scalar_addmul_nmod_mat_fmpz(x, y_mod, ppow); /* ppow = p^(i+1) */ fmpz_mul_ui(ppow, ppow, p); if (fmpz_cmp(ppow, bound) > 0) break; /* d = (d - Ay) / p */ #if USE_SLOW_MULTIPLICATION fmpz_mat_set_nmod_mat_unsigned(y, y_mod); fmpz_mat_mul(Ay, A, y); #else for (i = 0; i < num_primes; i++) { _nmod_mat_set_mod(y_mod, crt_primes[i]); _nmod_mat_set_mod(Ay_mod, crt_primes[i]); nmod_mat_mul(Ay_mod, A_mod[i], y_mod); if (i == 0) { fmpz_mat_set_nmod_mat(Ay, Ay_mod); fmpz_set_ui(prod, crt_primes[0]); } else { fmpz_mat_CRT_ui(Ay, Ay, prod, Ay_mod, 1); fmpz_mul_ui(prod, prod, crt_primes[i]); } } #endif _nmod_mat_set_mod(y_mod, p); fmpz_mat_sub(d, d, Ay); fmpz_mat_scalar_divexact_ui(d, d, p); } fmpz_set(mod, ppow); fmpz_mat_set(X, x); nmod_mat_clear(y_mod); nmod_mat_clear(d_mod); nmod_mat_clear(Ay_mod); for (i = 0; i < num_primes; i++) nmod_mat_clear(A_mod[i]); flint_free(A_mod); flint_free(crt_primes); fmpz_clear(bound); fmpz_clear(ppow); fmpz_clear(prod); fmpz_mat_clear(x); fmpz_mat_clear(y); fmpz_mat_clear(d); fmpz_mat_clear(Ay); } int fmpz_mat_solve_dixon(fmpz_mat_t X, fmpz_t mod, const fmpz_mat_t A, const fmpz_mat_t B) { nmod_mat_t Ainv; fmpz_t N, D; mp_limb_t p; if (!fmpz_mat_is_square(A)) { flint_printf("Exception (fmpz_mat_solve_dixon). Non-square system matrix.\n"); abort(); } if (fmpz_mat_is_empty(A) || fmpz_mat_is_empty(B)) return 1; fmpz_init(N); fmpz_init(D); fmpz_mat_solve_bound(N, D, A, B); nmod_mat_init(Ainv, A->r, A->r, 1); p = find_good_prime_and_invert(Ainv, A, D); if (p != 0) _fmpz_mat_solve_dixon(X, mod, A, B, Ainv, p, N, D); nmod_mat_clear(Ainv); fmpz_clear(N); fmpz_clear(D); return p != 0; }